aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.8/gcc/config/rs6000/rs6000.c
diff options
context:
space:
mode:
Diffstat (limited to 'gcc-4.8/gcc/config/rs6000/rs6000.c')
-rw-r--r--gcc-4.8/gcc/config/rs6000/rs6000.c5895
1 files changed, 5129 insertions, 766 deletions
diff --git a/gcc-4.8/gcc/config/rs6000/rs6000.c b/gcc-4.8/gcc/config/rs6000/rs6000.c
index f8579fd98..9cea1f112 100644
--- a/gcc-4.8/gcc/config/rs6000/rs6000.c
+++ b/gcc-4.8/gcc/config/rs6000/rs6000.c
@@ -1,5 +1,5 @@
/* Subroutines used for code generation on IBM RS/6000.
- Copyright (C) 1991-2013 Free Software Foundation, Inc.
+ Copyright (C) 1991-2014 Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
This file is part of GCC.
@@ -56,6 +56,7 @@
#include "intl.h"
#include "params.h"
#include "tm-constrs.h"
+#include "ira.h"
#include "opts.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
@@ -96,6 +97,7 @@ typedef struct rs6000_stack {
int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
int varargs_save_offset; /* offset to save the varargs registers */
int ehrd_offset; /* offset to EH return data */
+ int ehcr_offset; /* offset to EH CR field data */
int reg_size; /* register size (4 or 8) */
HOST_WIDE_INT vars_size; /* variable save area size */
int parm_size; /* outgoing parameter size */
@@ -139,6 +141,8 @@ typedef struct GTY(()) machine_function
64-bits wide and is allocated early enough so that the offset
does not overflow the 16-bit load/store offset field. */
rtx sdmode_stack_slot;
+ /* Flag if r2 setup is needed with ELFv2 ABI. */
+ bool r2_setup_needed;
} machine_function;
/* Support targetm.vectorize.builtin_mask_for_load. */
@@ -189,9 +193,6 @@ unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
/* Map register number to register class. */
enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
-/* Reload functions based on the type and the vector unit. */
-static enum insn_code rs6000_vector_reload[NUM_MACHINE_MODES][2];
-
static int dbg_cost_ctrl;
/* Built in types. */
@@ -289,6 +290,105 @@ static struct
don't link in rs6000-c.c, so we can't call it directly. */
void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+/* Simplfy register classes into simpler classifications. We assume
+ GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+ check for standard register classes (gpr/floating/altivec/vsx) and
+ floating/vector classes (float/altivec/vsx). */
+
+enum rs6000_reg_type {
+ NO_REG_TYPE,
+ PSEUDO_REG_TYPE,
+ GPR_REG_TYPE,
+ VSX_REG_TYPE,
+ ALTIVEC_REG_TYPE,
+ FPR_REG_TYPE,
+ SPR_REG_TYPE,
+ CR_REG_TYPE,
+ SPE_ACC_TYPE,
+ SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type. */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+ purpose, floating point, altivec, and VSX registers). */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+
+/* Register classes we care about in secondary reload or go if legitimate
+ address. We only need to worry about GPR, FPR, and Altivec registers here,
+ along an ANY field that is the OR of the 3 register classes. */
+
+enum rs6000_reload_reg_type {
+ RELOAD_REG_GPR, /* General purpose registers. */
+ RELOAD_REG_FPR, /* Traditional floating point regs. */
+ RELOAD_REG_VMX, /* Altivec (VMX) registers. */
+ RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
+ N_RELOAD_REG
+};
+
+/* For setting up register classes, loop through the 3 register classes mapping
+ into real registers, and skip the ANY class, which is just an OR of the
+ bits. */
+#define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
+#define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
+
+/* Map reload register type to a register in the register class. */
+struct reload_reg_map_type {
+ const char *name; /* Register class name. */
+ int reg; /* Register in the register class. */
+};
+
+static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
+ { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
+ { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
+ { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
+ { "Any", -1 }, /* RELOAD_REG_ANY. */
+};
+
+/* Mask bits for each register class, indexed per mode. Historically the
+ compiler has been more restrictive which types can do PRE_MODIFY instead of
+ PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
+typedef unsigned char addr_mask_type;
+
+#define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
+#define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
+#define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
+#define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
+#define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
+#define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
+
+/* Register type masks based on the type, of valid addressing modes. */
+struct rs6000_reg_addr {
+ enum insn_code reload_load; /* INSN to reload for loading. */
+ enum insn_code reload_store; /* INSN to reload for storing. */
+ enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
+ enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
+ enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
+ addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
+};
+
+static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
+
+/* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
+static inline bool
+mode_supports_pre_incdec_p (enum machine_mode mode)
+{
+ return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
+ != 0);
+}
+
+/* Helper function to say whether a mode supports PRE_MODIFY. */
+static inline bool
+mode_supports_pre_modify_p (enum machine_mode mode)
+{
+ return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
+ != 0);
+}
+
/* Target cpu costs. */
@@ -828,6 +928,25 @@ struct processor_costs power7_cost = {
12, /* prefetch streams */
};
+/* Instruction costs on POWER8 processors. */
+static const
+struct processor_costs power8_cost = {
+ COSTS_N_INSNS (3), /* mulsi */
+ COSTS_N_INSNS (3), /* mulsi_const */
+ COSTS_N_INSNS (3), /* mulsi_const9 */
+ COSTS_N_INSNS (3), /* muldi */
+ COSTS_N_INSNS (19), /* divsi */
+ COSTS_N_INSNS (35), /* divdi */
+ COSTS_N_INSNS (3), /* fp */
+ COSTS_N_INSNS (3), /* dmul */
+ COSTS_N_INSNS (14), /* sdiv */
+ COSTS_N_INSNS (17), /* ddiv */
+ 128, /* cache line size */
+ 32, /* l1 cache */
+ 256, /* l2 cache */
+ 12, /* prefetch streams */
+};
+
/* Instruction costs on POWER A2 processors. */
static const
struct processor_costs ppca2_cost = {
@@ -855,6 +974,7 @@ struct processor_costs ppca2_cost = {
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -878,6 +998,9 @@ struct processor_costs ppca2_cost = {
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
{ NAME, ICODE, MASK, ATTR },
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
+ { NAME, ICODE, MASK, ATTR },
+
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
{ NAME, ICODE, MASK, ATTR },
@@ -908,6 +1031,7 @@ static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -948,6 +1072,7 @@ static void rs6000_common_init_builtins (void);
static void paired_init_builtins (void);
static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
static void spe_init_builtins (void);
+static void htm_init_builtins (void);
static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
@@ -1020,6 +1145,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
static void rs6000_print_builtin_options (FILE *, int, const char *,
HOST_WIDE_INT);
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+ enum rs6000_reg_type,
+ enum machine_mode,
+ secondary_reload_info *,
+ bool);
+
/* Hash table stuff for keeping track of TOC entries. */
struct GTY(()) toc_hash_struct
@@ -1068,7 +1200,9 @@ char rs6000_reg_names[][8] =
/* SPE registers. */
"spe_acc", "spefscr",
/* Soft frame pointer. */
- "sfp"
+ "sfp",
+ /* HTM SPR registers. */
+ "tfhar", "tfiar", "texasr"
};
#ifdef TARGET_REGNAMES
@@ -1094,7 +1228,9 @@ static const char alt_reg_names[][8] =
/* SPE registers. */
"spe_acc", "spefscr",
/* Soft frame pointer. */
- "sfp"
+ "sfp",
+ /* HTM SPR registers. */
+ "tfhar", "tfiar", "texasr"
};
#endif
@@ -1316,6 +1452,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
+#undef TARGET_RETURN_IN_MSB
+#define TARGET_RETURN_IN_MSB rs6000_return_in_msb
+
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
@@ -1425,6 +1564,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
#undef TARGET_MODE_DEPENDENT_ADDRESS_P
#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE rs6000_can_eliminate
@@ -1513,8 +1655,9 @@ rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
{
unsigned HOST_WIDE_INT reg_size;
+ /* TF/TD modes are special in that they always take 2 registers. */
if (FP_REGNO_P (regno))
- reg_size = (VECTOR_MEM_VSX_P (mode)
+ reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
? UNITS_PER_VSX_WORD
: UNITS_PER_FP_WORD);
@@ -1546,17 +1689,40 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
{
int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
+ /* PTImode can only go in GPRs. Quad word memory operations require even/odd
+ register combinations, and use PTImode where we need to deal with quad
+ word memory operations. Don't allow quad words in the argument or frame
+ pointer registers, just registers 0..31. */
+ if (mode == PTImode)
+ return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+ && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+ && ((regno & 1) == 0));
+
/* VSX registers that overlap the FPR registers are larger than for non-VSX
implementations. Don't allow an item to be split between a FP register
- and an Altivec register. */
- if (VECTOR_MEM_VSX_P (mode))
+ and an Altivec register. Allow TImode in all VSX registers if the user
+ asked for it. */
+ if (TARGET_VSX && VSX_REGNO_P (regno)
+ && (VECTOR_MEM_VSX_P (mode)
+ || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode)
+ || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode))
+ || (TARGET_VSX_TIMODE && mode == TImode)
+ || (TARGET_VADDUQM && mode == V1TImode)))
{
if (FP_REGNO_P (regno))
return FP_REGNO_P (last_regno);
if (ALTIVEC_REGNO_P (regno))
+ {
+ if (mode == SFmode && !TARGET_UPPER_REGS_SF)
+ return 0;
+
+ if ((mode == DFmode || mode == DImode) && !TARGET_UPPER_REGS_DF)
+ return 0;
+
return ALTIVEC_REGNO_P (last_regno);
}
+ }
/* The GPRs can hold any mode, but values bigger than one register
cannot go past R31. */
@@ -1564,8 +1730,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
return INT_REGNO_P (last_regno);
/* The float registers (except for VSX vector modes) can only hold floating
- modes and DImode. This excludes the 32-bit decimal float mode for
- now. */
+ modes and DImode. */
if (FP_REGNO_P (regno))
{
if (SCALAR_FLOAT_MODE_P (mode)
@@ -1593,15 +1758,15 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
/* AltiVec only in AldyVec registers. */
if (ALTIVEC_REGNO_P (regno))
- return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode);
+ return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
+ || mode == V1TImode);
/* ...but GPRs can hold SIMD data on the SPE in one register. */
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
return 1;
- /* We cannot put TImode anywhere except general register and it must be able
- to fit within the register set. In the future, allow TImode in the
- Altivec or VSX registers. */
+ /* We cannot put non-VSX TImode or PTImode anywhere except general register
+ and it must be able to fit within the register set. */
return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
}
@@ -1674,10 +1839,77 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
comma = "";
}
+ len += fprintf (stderr, "%sreg-class = %s", comma,
+ reg_class_names[(int)rs6000_regno_regclass[r]]);
+ comma = ", ";
+
+ if (len > 70)
+ {
+ fprintf (stderr, ",\n\t");
+ comma = "";
+ }
+
fprintf (stderr, "%sregno = %d\n", comma, r);
}
}
+static const char *
+rs6000_debug_vector_unit (enum rs6000_vector v)
+{
+ const char *ret;
+
+ switch (v)
+ {
+ case VECTOR_NONE: ret = "none"; break;
+ case VECTOR_ALTIVEC: ret = "altivec"; break;
+ case VECTOR_VSX: ret = "vsx"; break;
+ case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
+ case VECTOR_PAIRED: ret = "paired"; break;
+ case VECTOR_SPE: ret = "spe"; break;
+ case VECTOR_OTHER: ret = "other"; break;
+ default: ret = "unknown"; break;
+ }
+
+ return ret;
+}
+
+/* Print the address masks in a human readble fashion. */
+DEBUG_FUNCTION void
+rs6000_debug_print_mode (ssize_t m)
+{
+ ssize_t rc;
+
+ fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
+ for (rc = 0; rc < N_RELOAD_REG; rc++)
+ {
+ addr_mask_type mask = reg_addr[m].addr_mask[rc];
+ fprintf (stderr,
+ " %s: %c%c%c%c%c%c",
+ reload_reg_map[rc].name,
+ (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
+ (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
+ (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
+ (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
+ (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
+ (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
+ }
+
+ if (rs6000_vector_unit[m] != VECTOR_NONE
+ || rs6000_vector_mem[m] != VECTOR_NONE
+ || (reg_addr[m].reload_store != CODE_FOR_nothing)
+ || (reg_addr[m].reload_load != CODE_FOR_nothing))
+ {
+ fprintf (stderr,
+ " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c",
+ rs6000_debug_vector_unit (rs6000_vector_unit[m]),
+ rs6000_debug_vector_unit (rs6000_vector_mem[m]),
+ (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
+ (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*');
+ }
+
+ fputs ("\n", stderr);
+}
+
#define DEBUG_FMT_ID "%-32s= "
#define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
#define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
@@ -1690,6 +1922,7 @@ rs6000_debug_reg_global (void)
static const char *const tf[2] = { "false", "true" };
const char *nl = (const char *)0;
int m;
+ size_t m1, m2, v;
char costly_num[20];
char nop_num[20];
char flags_buffer[40];
@@ -1700,20 +1933,69 @@ rs6000_debug_reg_global (void)
const char *cmodel_str;
struct cl_target_option cl_opts;
- /* Map enum rs6000_vector to string. */
- static const char *rs6000_debug_vector_unit[] = {
- "none",
- "altivec",
- "vsx",
- "paired",
- "spe",
- "other"
+ /* Modes we want tieable information on. */
+ static const enum machine_mode print_tieable_modes[] = {
+ QImode,
+ HImode,
+ SImode,
+ DImode,
+ TImode,
+ PTImode,
+ SFmode,
+ DFmode,
+ TFmode,
+ SDmode,
+ DDmode,
+ TDmode,
+ V8QImode,
+ V4HImode,
+ V2SImode,
+ V16QImode,
+ V8HImode,
+ V4SImode,
+ V2DImode,
+ V1TImode,
+ V32QImode,
+ V16HImode,
+ V8SImode,
+ V4DImode,
+ V2TImode,
+ V2SFmode,
+ V4SFmode,
+ V2DFmode,
+ V8SFmode,
+ V4DFmode,
+ CCmode,
+ CCUNSmode,
+ CCEQmode,
+ };
+
+ /* Virtual regs we are interested in. */
+ const static struct {
+ int regno; /* register number. */
+ const char *name; /* register name. */
+ } virtual_regs[] = {
+ { STACK_POINTER_REGNUM, "stack pointer:" },
+ { TOC_REGNUM, "toc: " },
+ { STATIC_CHAIN_REGNUM, "static chain: " },
+ { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
+ { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
+ { ARG_POINTER_REGNUM, "arg pointer: " },
+ { FRAME_POINTER_REGNUM, "frame pointer:" },
+ { FIRST_PSEUDO_REGISTER, "first pseudo: " },
+ { FIRST_VIRTUAL_REGISTER, "first virtual:" },
+ { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
+ { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
+ { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
+ { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
+ { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
+ { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
+ { LAST_VIRTUAL_REGISTER, "last virtual: " },
};
- fprintf (stderr, "Register information: (last virtual reg = %d)\n",
- LAST_VIRTUAL_REGISTER);
- rs6000_debug_reg_print (0, 31, "gr");
- rs6000_debug_reg_print (32, 63, "fp");
+ fputs ("\nHard register information:\n", stderr);
+ rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
+ rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
LAST_ALTIVEC_REGNO,
"vs");
@@ -1726,6 +2008,10 @@ rs6000_debug_reg_global (void)
rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
+ fputs ("\nVirtual/stack/frame registers:\n", stderr);
+ for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
+ fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
+
fprintf (stderr,
"\n"
"d reg_class = %s\n"
@@ -1734,23 +2020,68 @@ rs6000_debug_reg_global (void)
"wa reg_class = %s\n"
"wd reg_class = %s\n"
"wf reg_class = %s\n"
- "ws reg_class = %s\n\n",
+ "wg reg_class = %s\n"
+ "wl reg_class = %s\n"
+ "wm reg_class = %s\n"
+ "wr reg_class = %s\n"
+ "ws reg_class = %s\n"
+ "wt reg_class = %s\n"
+ "wu reg_class = %s\n"
+ "wv reg_class = %s\n"
+ "ww reg_class = %s\n"
+ "wx reg_class = %s\n"
+ "wy reg_class = %s\n"
+ "wz reg_class = %s\n"
+ "\n",
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
- reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
-
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
+
+ nl = "\n";
for (m = 0; m < NUM_MACHINE_MODES; ++m)
- if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
+ rs6000_debug_print_mode (m);
+
+ fputs ("\n", stderr);
+
+ for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
+ {
+ enum machine_mode mode1 = print_tieable_modes[m1];
+ bool first_time = true;
+
+ nl = (const char *)0;
+ for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
+ {
+ enum machine_mode mode2 = print_tieable_modes[m2];
+ if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
{
+ if (first_time)
+ {
+ fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
nl = "\n";
- fprintf (stderr, "Vector mode: %-5s arithmetic: %-8s move: %-8s\n",
- GET_MODE_NAME (m),
- rs6000_debug_vector_unit[ rs6000_vector_unit[m] ],
- rs6000_debug_vector_unit[ rs6000_vector_mem[m] ]);
+ first_time = false;
+ }
+
+ fprintf (stderr, " %s", GET_MODE_NAME (mode2));
+ }
+ }
+
+ if (!first_time)
+ fputs ("\n", stderr);
}
if (nl)
@@ -1913,6 +2244,7 @@ rs6000_debug_reg_global (void)
{
case ABI_NONE: abi_str = "none"; break;
case ABI_AIX: abi_str = "aix"; break;
+ case ABI_ELFv2: abi_str = "ELFv2"; break;
case ABI_V4: abi_str = "V4"; break;
case ABI_DARWIN: abi_str = "darwin"; break;
default: abi_str = "unknown"; break;
@@ -1932,9 +2264,34 @@ rs6000_debug_reg_global (void)
if (rs6000_float_gprs)
fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
+ fprintf (stderr, DEBUG_FMT_S, "fprs",
+ (TARGET_FPRS ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "single_float",
+ (TARGET_SINGLE_FLOAT ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "double_float",
+ (TARGET_DOUBLE_FLOAT ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "soft_float",
+ (TARGET_SOFT_FLOAT ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "e500_single",
+ (TARGET_E500_SINGLE ? "true" : "false"));
+
+ fprintf (stderr, DEBUG_FMT_S, "e500_double",
+ (TARGET_E500_DOUBLE ? "true" : "false"));
+
if (TARGET_LINK_STACK)
fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
+ if (targetm.lra_p ())
+ fprintf (stderr, DEBUG_FMT_S, "lra", "true");
+
+ if (TARGET_P8_FUSION)
+ fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+ (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
fprintf (stderr, DEBUG_FMT_S, "plt-format",
TARGET_SECURE_PLT ? "secure" : "bss");
fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -1954,11 +2311,106 @@ rs6000_debug_reg_global (void)
(int)RS6000_BUILTIN_COUNT);
}
+
+/* Update the addr mask bits in reg_addr to help secondary reload and go if
+ legitimate address support to figure out the appropriate addressing to
+ use. */
+
+static void
+rs6000_setup_reg_addr_masks (void)
+{
+ ssize_t rc, reg, m, nregs;
+ addr_mask_type any_addr_mask, addr_mask;
+
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
+ {
+ /* SDmode is special in that we want to access it only via REG+REG
+ addressing on power7 and above, since we want to use the LFIWZX and
+ STFIWZX instructions to load it. */
+ bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
+
+ any_addr_mask = 0;
+ for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
+ {
+ addr_mask = 0;
+ reg = reload_reg_map[rc].reg;
+
+ /* Can mode values go in the GPR/FPR/Altivec registers? */
+ if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
+ {
+ nregs = rs6000_hard_regno_nregs[m][reg];
+ addr_mask |= RELOAD_REG_VALID;
+
+ /* Indicate if the mode takes more than 1 physical register. If
+ it takes a single register, indicate it can do REG+REG
+ addressing. */
+ if (nregs > 1 || m == BLKmode)
+ addr_mask |= RELOAD_REG_MULTIPLE;
+ else
+ addr_mask |= RELOAD_REG_INDEXED;
+
+ /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
+ addressing. Restrict addressing on SPE for 64-bit types
+ because of the SUBREG hackery used to address 64-bit floats in
+ '32-bit' GPRs. To simplify secondary reload, don't allow
+ update forms on scalar floating point types that can go in the
+ upper registers. */
+
+ if (TARGET_UPDATE
+ && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
+ && GET_MODE_SIZE (m) <= 8
+ && !VECTOR_MODE_P (m)
+ && !COMPLEX_MODE_P (m)
+ && !indexed_only_p
+ && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m) == 8)
+ && !(m == DFmode && TARGET_UPPER_REGS_DF)
+ && !(m == SFmode && TARGET_UPPER_REGS_SF))
+ {
+ addr_mask |= RELOAD_REG_PRE_INCDEC;
+
+ /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
+ we don't allow PRE_MODIFY for some multi-register
+ operations. */
+ switch (m)
+ {
+ default:
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
+ break;
+
+ case DImode:
+ if (TARGET_POWERPC64)
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
+ break;
+
+ case DFmode:
+ case DDmode:
+ if (TARGET_DF_INSN)
+ addr_mask |= RELOAD_REG_PRE_MODIFY;
+ break;
+ }
+ }
+ }
+
+ /* GPR and FPR registers can do REG+OFFSET addressing, except
+ possibly for SDmode. */
+ if ((addr_mask != 0) && !indexed_only_p
+ && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
+ addr_mask |= RELOAD_REG_OFFSET;
+
+ reg_addr[m].addr_mask[rc] = addr_mask;
+ any_addr_mask |= addr_mask;
+ }
+
+ reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
+ }
+}
+
+
/* Initialize the various global tables that are based on register size. */
static void
rs6000_init_hard_regno_mode_ok (bool global_init_p)
{
- int r, m, c;
+ ssize_t r, m, c;
int align64;
int align32;
@@ -1987,20 +2439,54 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
+ rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
+ rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
+ rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
- /* Precalculate vector information, this must be set up before the
- rs6000_hard_regno_nregs_internal below. */
- for (m = 0; m < NUM_MACHINE_MODES; ++m)
+ /* Precalculate register class to simpler reload register class. We don't
+ need all of the register classes that are combinations of different
+ classes, just the simple ones that have constraint letters. */
+ for (c = 0; c < N_REG_CLASSES; c++)
+ reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+ reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+ reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+ if (TARGET_VSX)
{
- rs6000_vector_unit[m] = rs6000_vector_mem[m] = VECTOR_NONE;
- rs6000_vector_reload[m][0] = CODE_FOR_nothing;
- rs6000_vector_reload[m][1] = CODE_FOR_nothing;
+ reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
}
+ else
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+ }
+
+ /* Precalculate the valid memory formats as well as the vector information,
+ this must be set up before the rs6000_hard_regno_nregs_internal calls
+ below. */
+ gcc_assert ((int)VECTOR_NONE == 0);
+ memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
+ memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
+
+ gcc_assert ((int)CODE_FOR_nothing == 0);
+ memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
- for (c = 0; c < (int)(int)RS6000_CONSTRAINT_MAX; c++)
- rs6000_constraints[c] = NO_REGS;
+ gcc_assert ((int)NO_REGS == 0);
+ memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
/* The VSX hardware allows native alignment for vectors, but control whether the compiler
believes it can use native alignment or still uses 128-bit alignment. */
@@ -2062,13 +2548,19 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
}
}
- /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
- Altivec doesn't have 64-bit support. */
+ /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
+ do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
if (TARGET_VSX)
{
rs6000_vector_mem[V2DImode] = VECTOR_VSX;
- rs6000_vector_unit[V2DImode] = VECTOR_NONE;
+ rs6000_vector_unit[V2DImode]
+ = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
rs6000_vector_align[V2DImode] = align64;
+
+ rs6000_vector_mem[V1TImode] = VECTOR_VSX;
+ rs6000_vector_unit[V1TImode]
+ = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
+ rs6000_vector_align[V1TImode] = 128;
}
/* DFmode, see if we want to use the VSX unit. */
@@ -2076,14 +2568,48 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
{
rs6000_vector_unit[DFmode] = VECTOR_VSX;
rs6000_vector_mem[DFmode]
- = (TARGET_VSX_SCALAR_MEMORY ? VECTOR_VSX : VECTOR_NONE);
+ = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
rs6000_vector_align[DFmode] = align64;
}
+ /* Allow TImode in VSX register and set the VSX memory macros. */
+ if (TARGET_VSX && TARGET_VSX_TIMODE)
+ {
+ rs6000_vector_mem[TImode] = VECTOR_VSX;
+ rs6000_vector_align[TImode] = align64;
+ }
+
/* TODO add SPE and paired floating point vector support. */
/* Register class constraints for the constraints that depend on compile
- switches. */
+ switches. When the VSX code was added, different constraints were added
+ based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
+ of the VSX registers are used. The register classes for scalar floating
+ point types is set, based on whether we allow that type into the upper
+ (Altivec) registers. GCC has register classes to target the Altivec
+ registers for load/store operations, to select using a VSX memory
+ operation instead of the traditional floating point operation. The
+ constraints are:
+
+ d - Register class to use with traditional DFmode instructions.
+ f - Register class to use with traditional SFmode instructions.
+ v - Altivec register.
+ wa - Any VSX register.
+ wd - Preferred register class for V2DFmode.
+ wf - Preferred register class for V4SFmode.
+ wg - Float register for power6x move insns.
+ wl - Float register if we can do 32-bit signed int loads.
+ wm - VSX register for ISA 2.07 direct move operations.
+ wr - GPR if 64-bit mode is permitted.
+ ws - Register class to do ISA 2.06 DF operations.
+ wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
+ wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
+ wt - VSX register for TImode in VSX registers.
+ ww - Register class to do SF conversions in with VSX operations.
+ wx - Float register if we can do 32-bit int stores.
+ wy - Register class to do ISA 2.07 SF operations.
+ wz - Float register if we can do 32-bit unsigned int loads. */
+
if (TARGET_HARD_FLOAT && TARGET_FPRS)
rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS;
@@ -2092,63 +2618,163 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
if (TARGET_VSX)
{
- /* At present, we just use VSX_REGS, but we have different constraints
- based on the use, in case we want to fine tune the default register
- class used. wa = any VSX register, wf = register class to use for
- V4SF, wd = register class to use for V2DF, and ws = register classs to
- use for DF scalars. */
rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
- rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS;
- rs6000_constraints[RS6000_CONSTRAINT_ws] = (TARGET_VSX_SCALAR_MEMORY
- ? VSX_REGS
- : FLOAT_REGS);
+ rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS;
+
+ if (TARGET_VSX_TIMODE)
+ rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS;
+
+ if (TARGET_UPPER_REGS_DF)
+ {
+ rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+ }
+ else
+ rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
}
+ /* Add conditional constraints based on various options, to allow us to
+ collapse multiple insn patterns. */
if (TARGET_ALTIVEC)
rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
- /* Set up the reload helper functions. */
+ if (TARGET_MFPGPR)
+ rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
+
+ if (TARGET_LFIWAX)
+ rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+
+ if (TARGET_DIRECT_MOVE)
+ rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+ if (TARGET_POWERPC64)
+ rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+ if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF)
+ {
+ rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
+ }
+ else if (TARGET_P8_VECTOR)
+ {
+ rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+ }
+ else if (TARGET_VSX)
+ rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
+
+ if (TARGET_STFIWX)
+ rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+
+ if (TARGET_LFIWZX)
+ rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+
+ /* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
if (TARGET_64BIT)
{
- rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_di_store;
- rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_di_load;
- rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_di_store;
- rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_di_load;
- rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_di_store;
- rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_di_load;
- rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_di_store;
- rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_di_load;
- rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_di_store;
- rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_di_load;
- rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_di_store;
- rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_di_load;
- if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
+ reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
+ reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
+ reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
+ reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
+ reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
+ reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
+ reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
+ reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
+ reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
+ reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
+ reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
+ reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
+ reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
+ reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
+ if (TARGET_VSX && TARGET_UPPER_REGS_DF)
{
- rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_di_store;
- rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_di_load;
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
+ }
+ if (TARGET_P8_VECTOR)
+ {
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
+ }
+ if (TARGET_VSX_TIMODE)
+ {
+ reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
+ reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
+ }
+ if (TARGET_DIRECT_MOVE)
+ {
+ if (TARGET_POWERPC64)
+ {
+ reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
+ reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
+ reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
+ reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
+ reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
+ reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
+ reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
+ reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
+ reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
+
+ reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
+ reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
+ reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
+ reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
+ reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
+ reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
+ reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
+ reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
+ reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
+ }
+ else
+ {
+ reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
+ reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
+ reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
+ }
}
}
else
{
- rs6000_vector_reload[V16QImode][0] = CODE_FOR_reload_v16qi_si_store;
- rs6000_vector_reload[V16QImode][1] = CODE_FOR_reload_v16qi_si_load;
- rs6000_vector_reload[V8HImode][0] = CODE_FOR_reload_v8hi_si_store;
- rs6000_vector_reload[V8HImode][1] = CODE_FOR_reload_v8hi_si_load;
- rs6000_vector_reload[V4SImode][0] = CODE_FOR_reload_v4si_si_store;
- rs6000_vector_reload[V4SImode][1] = CODE_FOR_reload_v4si_si_load;
- rs6000_vector_reload[V2DImode][0] = CODE_FOR_reload_v2di_si_store;
- rs6000_vector_reload[V2DImode][1] = CODE_FOR_reload_v2di_si_load;
- rs6000_vector_reload[V4SFmode][0] = CODE_FOR_reload_v4sf_si_store;
- rs6000_vector_reload[V4SFmode][1] = CODE_FOR_reload_v4sf_si_load;
- rs6000_vector_reload[V2DFmode][0] = CODE_FOR_reload_v2df_si_store;
- rs6000_vector_reload[V2DFmode][1] = CODE_FOR_reload_v2df_si_load;
- if (TARGET_VSX && TARGET_VSX_SCALAR_MEMORY)
+ reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
+ reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
+ reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
+ reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
+ reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
+ reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
+ reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
+ reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
+ reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
+ reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
+ reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
+ reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
+ reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
+ reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
+ if (TARGET_VSX && TARGET_UPPER_REGS_DF)
+ {
+ reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
+ reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
+ reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
+ reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
+ }
+ if (TARGET_P8_VECTOR)
+ {
+ reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
+ reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
+ reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
+ reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
+ }
+ if (TARGET_VSX_TIMODE)
{
- rs6000_vector_reload[DFmode][0] = CODE_FOR_reload_df_si_store;
- rs6000_vector_reload[DFmode][1] = CODE_FOR_reload_df_si_load;
+ reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
+ reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
}
}
}
@@ -2267,6 +2893,11 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
}
}
+ /* Update the addr mask bits in reg_addr to help secondary reload and go if
+ legitimate address support to figure out the appropriate addressing to
+ use. */
+ rs6000_setup_reg_addr_masks ();
+
if (global_init_p || TARGET_DEBUG_TARGET)
{
if (TARGET_DEBUG_REG)
@@ -2378,7 +3009,12 @@ rs6000_builtin_mask_calculate (void)
| ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
| ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
| ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
- | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0));
+ | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
+ | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
+ | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
+ | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
+ | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
+ | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0));
}
/* Override command line options. Mostly we process the processor type and
@@ -2415,7 +3051,8 @@ rs6000_option_override_internal (bool global_init_p)
calculation works better for RTL loop invariant motion on targets
with enough (>= 32) registers. It is an expensive optimization.
So it is on only for peak performance. */
- if (optimize >= 3 && global_init_p)
+ if (optimize >= 3 && global_init_p
+ && !global_options_set.x_flag_ira_loop_pressure)
flag_ira_loop_pressure = 1;
/* Set the pointer size. */
@@ -2609,6 +3246,24 @@ rs6000_option_override_internal (bool global_init_p)
}
}
+ /* If little-endian, default to -mstrict-align on older processors.
+ Testing for htm matches power8 and later. */
+ if (!BYTES_BIG_ENDIAN
+ && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
+ rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
+
+ /* -maltivec={le,be} implies -maltivec. */
+ if (rs6000_altivec_element_order != 0)
+ rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
+
+ /* Disallow -maltivec=le in big endian mode for now. This is not
+ known to be useful for anyone. */
+ if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
+ {
+ warning (0, N_("-maltivec=le not allowed for big-endian targets"));
+ rs6000_altivec_element_order = 0;
+ }
+
/* Add some warnings for VSX. */
if (TARGET_VSX)
{
@@ -2619,15 +3274,13 @@ rs6000_option_override_internal (bool global_init_p)
if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
msg = N_("-mvsx requires hardware floating point");
else
+ {
rs6000_isa_flags &= ~ OPTION_MASK_VSX;
+ rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
+ }
}
else if (TARGET_PAIRED_FLOAT)
msg = N_("-mvsx and -mpaired are incompatible");
- /* The hardware will allow VSX and little endian, but until we make sure
- things like vector select, etc. work don't allow VSX on little endian
- systems at this point. */
- else if (!BYTES_BIG_ENDIAN)
- msg = N_("-mvsx used with little endian code");
else if (TARGET_AVOID_XFORM > 0)
msg = N_("-mvsx needs indexed addressing");
else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
@@ -2647,9 +3300,24 @@ rs6000_option_override_internal (bool global_init_p)
}
}
+ /* If hard-float/altivec/vsx were explicitly turned off then don't allow
+ the -mcpu setting to enable options that conflict. */
+ if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
+ && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
+ | OPTION_MASK_ALTIVEC
+ | OPTION_MASK_VSX)) != 0)
+ rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
+ | OPTION_MASK_DIRECT_MOVE)
+ & ~rs6000_isa_flags_explicit);
+
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+ rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
+
/* For the newer switches (vsx, dfp, etc.) set some of the older options,
unless the user explicitly used the -mno-<option> to disable the code. */
- if (TARGET_VSX)
+ if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+ rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+ else if (TARGET_VSX)
rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
else if (TARGET_POPCNTD)
rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
@@ -2664,6 +3332,99 @@ rs6000_option_override_internal (bool global_init_p)
else if (TARGET_ALTIVEC)
rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
+ if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+ error ("-mcrypto requires -maltivec");
+ rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+ }
+
+ if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+ error ("-mdirect-move requires -mvsx");
+ rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+ }
+
+ if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+ error ("-mpower8-vector requires -maltivec");
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+ }
+
+ if (TARGET_P8_VECTOR && !TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+ error ("-mpower8-vector requires -mvsx");
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+ }
+
+ if (TARGET_VSX_TIMODE && !TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
+ error ("-mvsx-timode requires -mvsx");
+ rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
+ }
+
+ if (TARGET_DFP && !TARGET_HARD_FLOAT)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
+ error ("-mhard-dfp requires -mhard-float");
+ rs6000_isa_flags &= ~OPTION_MASK_DFP;
+ }
+
+ /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+ silently turn off quad memory mode. */
+ if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
+ warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
+ | OPTION_MASK_QUAD_MEMORY_ATOMIC);
+ }
+
+ /* Non-atomic quad memory load/store are disabled for little endian, since
+ the words are reversed, but atomic operations can still be done by
+ swapping the words. */
+ if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory is not available in little endian mode"));
+
+ rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+ }
+
+ /* Assume if the user asked for normal quad memory instructions, they want
+ the atomic versions as well, unless they explicity told us not to use quad
+ word atomic instructions. */
+ if (TARGET_QUAD_MEMORY
+ && !TARGET_QUAD_MEMORY_ATOMIC
+ && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
+ rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
+
+ /* Enable power8 fusion if we are tuning for power8, even if we aren't
+ generating power8 instructions. */
+ if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
+ rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
+ & OPTION_MASK_P8_FUSION);
+
+ /* Power8 does not fuse sign extended loads with the addis. If we are
+ optimizing at high levels for speed, convert a sign extended load into a
+ zero extending load, and an explicit sign extension. */
+ if (TARGET_P8_FUSION
+ && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
+ && optimize_function_for_speed_p (cfun)
+ && optimize >= 3)
+ rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
+
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+ rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
+
/* E500mc does "better" if we inline more aggressively. Respect the
user's opinion, though. */
if (rs6000_block_move_inline_limit == 0
@@ -2787,9 +3548,13 @@ rs6000_option_override_internal (bool global_init_p)
/* Place FP constants in the constant pool instead of TOC
if section anchors enabled. */
- if (flag_section_anchors)
+ if (flag_section_anchors
+ && !global_options_set.x_TARGET_NO_FP_IN_TOC)
TARGET_NO_FP_IN_TOC = 1;
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+ rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
+
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
@@ -2800,6 +3565,9 @@ rs6000_option_override_internal (bool global_init_p)
SUB3TARGET_OVERRIDE_OPTIONS;
#endif
+ if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
+ rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
+
/* For the E500 family of cores, reset the single/double FP flags to let us
check that they remain constant across attributes or pragmas. Also,
clear a possible request for string instructions, not supported and which
@@ -2849,16 +3617,19 @@ rs6000_option_override_internal (bool global_init_p)
&& rs6000_cpu != PROCESSOR_POWER5
&& rs6000_cpu != PROCESSOR_POWER6
&& rs6000_cpu != PROCESSOR_POWER7
+ && rs6000_cpu != PROCESSOR_POWER8
&& rs6000_cpu != PROCESSOR_PPCA2
&& rs6000_cpu != PROCESSOR_CELL
&& rs6000_cpu != PROCESSOR_PPC476);
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
- || rs6000_cpu == PROCESSOR_POWER7);
+ || rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8);
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
|| rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8
|| rs6000_cpu == PROCESSOR_PPCE500MC
|| rs6000_cpu == PROCESSOR_PPCE500MC64
|| rs6000_cpu == PROCESSOR_PPCE5500
@@ -2988,7 +3759,7 @@ rs6000_option_override_internal (bool global_init_p)
/* We should always be splitting complex arguments, but we can't break
Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
- if (DEFAULT_ABI != ABI_AIX)
+ if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
targetm.calls.split_complex_arg = NULL;
}
@@ -3102,6 +3873,10 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_cost = &power7_cost;
break;
+ case PROCESSOR_POWER8:
+ rs6000_cost = &power8_cost;
+ break;
+
case PROCESSOR_PPCA2:
rs6000_cost = &ppca2_cost;
break;
@@ -3274,7 +4049,8 @@ rs6000_loop_align (rtx label)
&& (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7))
+ || rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8))
return 5;
else
return align_loops_log;
@@ -3493,6 +4269,8 @@ rs6000_preferred_simd_mode (enum machine_mode mode)
{
case SFmode:
return V4SFmode;
+ case TImode:
+ return V1TImode;
case DImode:
return V2DImode;
case SImode:
@@ -3813,6 +4591,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
switch (fn)
{
+ case BUILT_IN_CLZIMAX:
+ case BUILT_IN_CLZLL:
+ case BUILT_IN_CLZL:
+ case BUILT_IN_CLZ:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+ }
+ break;
case BUILT_IN_COPYSIGN:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -3828,6 +4622,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
break;
+ case BUILT_IN_POPCOUNTIMAX:
+ case BUILT_IN_POPCOUNTLL:
+ case BUILT_IN_POPCOUNTL:
+ case BUILT_IN_POPCOUNT:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+ }
+ break;
case BUILT_IN_SQRT:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -4043,7 +4853,11 @@ rs6000_file_start (void)
putc ('\n', file);
}
- if (DEFAULT_ABI == ABI_AIX || (TARGET_ELF && flag_pic == 2))
+ if (DEFAULT_ABI == ABI_ELFv2)
+ fprintf (file, "\t.abiversion 2\n");
+
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
+ || (TARGET_ELF && flag_pic == 2))
{
switch_to_section (toc_section);
switch_to_section (text_section);
@@ -4230,7 +5044,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
HOST_WIDE_INT splat_val;
HOST_WIDE_INT msb_val;
- if (mode == V2DImode || mode == V2DFmode)
+ if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
return false;
nunits = GET_MODE_NUNITS (mode);
@@ -4239,7 +5053,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
splat_val = val;
- msb_val = val > 0 ? 0 : -1;
+ msb_val = val >= 0 ? 0 : -1;
/* Construct the value to be splatted, if possible. If not, return 0. */
for (i = 2; i <= copies; i *= 2)
@@ -4274,15 +5088,16 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
/* Check if VAL is present in every STEP-th element, and the
other elements are filled with its most significant bit. */
- for (i = 0; i < nunits - 1; ++i)
+ for (i = 1; i < nunits; ++i)
{
HOST_WIDE_INT desired_val;
- if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0)
+ unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
+ if ((i & (step - 1)) == 0)
desired_val = val;
else
desired_val = msb_val;
- if (desired_val != const_vector_elt_as_int (op, i))
+ if (desired_val != const_vector_elt_as_int (op, elt))
return false;
}
@@ -4308,7 +5123,7 @@ easy_altivec_constant (rtx op, enum machine_mode mode)
if (mode == V2DFmode)
return zero_constant (op, mode);
- if (mode == V2DImode)
+ else if (mode == V2DImode)
{
/* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
easy. */
@@ -4326,6 +5141,10 @@ easy_altivec_constant (rtx op, enum machine_mode mode)
return false;
}
+ /* V1TImode is a special container for TImode. Ignore for now. */
+ else if (mode == V1TImode)
+ return false;
+
/* Start with a vspltisw. */
step = GET_MODE_NUNITS (mode) / 4;
copies = 1;
@@ -4407,7 +5226,7 @@ output_vec_const_move (rtx *operands)
if (zero_constant (vec, mode))
return "xxlxor %x0,%x0,%x0";
- if (mode == V2DImode
+ if ((mode == V2DImode || mode == V1TImode)
&& INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
&& INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
return "vspltisw %0,-1";
@@ -4698,9 +5517,12 @@ rs6000_expand_vector_init (rtx target, rtx vals)
{
rtx freg = gen_reg_rtx (V4SFmode);
rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+ rtx cvt = ((TARGET_XSCVDPSPN)
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
- emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
- emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
+ emit_insn (cvt);
+ emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
}
else
{
@@ -4790,6 +5612,13 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
return;
}
+ /* Simplify setting single element vectors like V1TImode. */
+ if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
+ {
+ emit_move_insn (target, gen_lowpart (mode, val));
+ return;
+ }
+
/* Load single variable value. */
mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
@@ -4811,10 +5640,29 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt)
XVECEXP (mask, 0, elt*width + i)
= GEN_INT (i + 0x10);
x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
+
+ if (BYTES_BIG_ENDIAN)
x = gen_rtx_UNSPEC (mode,
gen_rtvec (3, target, reg,
force_reg (V16QImode, x)),
UNSPEC_VPERM);
+ else
+ {
+ /* Invert selector. We prefer to generate VNAND on P8 so
+ that future fusion opportunities can kick in, but must
+ generate VNOR elsewhere. */
+ rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
+ rtx iorx = (TARGET_P8_VECTOR
+ ? gen_rtx_IOR (V16QImode, notx, notx)
+ : gen_rtx_AND (V16QImode, notx, notx));
+ rtx tmp = gen_reg_rtx (V16QImode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
+
+ /* Permute with operands reversed and adjusted selector. */
+ x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
+ UNSPEC_VPERM);
+ }
+
emit_insn (gen_rtx_SET (VOIDmode, target, x));
}
@@ -4833,6 +5681,10 @@ rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
{
default:
break;
+ case V1TImode:
+ gcc_assert (elt == 0 && inner_mode == TImode);
+ emit_move_insn (target, gen_lowpart (TImode, vec));
+ break;
case V2DFmode:
emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
return;
@@ -4938,7 +5790,7 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
purpose. */
if (GET_CODE (op) == SUBREG
&& (mode == SImode || mode == DImode || mode == TImode
- || mode == DDmode || mode == TDmode)
+ || mode == DDmode || mode == TDmode || mode == PTImode)
&& REG_P (SUBREG_REG (op))
&& (GET_MODE (SUBREG_REG (op)) == DFmode
|| GET_MODE (SUBREG_REG (op)) == TFmode))
@@ -4951,6 +5803,7 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
&& REG_P (SUBREG_REG (op))
&& (GET_MODE (SUBREG_REG (op)) == DImode
|| GET_MODE (SUBREG_REG (op)) == TImode
+ || GET_MODE (SUBREG_REG (op)) == PTImode
|| GET_MODE (SUBREG_REG (op)) == DDmode
|| GET_MODE (SUBREG_REG (op)) == TDmode))
return true;
@@ -4966,6 +5819,48 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
return false;
}
+/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
+ selects whether the alignment is abi mandated, optional, or
+ both abi and optional alignment. */
+
+unsigned int
+rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
+{
+ if (how != align_opt)
+ {
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ {
+ if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
+ || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
+ {
+ if (align < 64)
+ align = 64;
+ }
+ else if (align < 128)
+ align = 128;
+ }
+ else if (TARGET_E500_DOUBLE
+ && TREE_CODE (type) == REAL_TYPE
+ && TYPE_MODE (type) == DFmode)
+ {
+ if (align < 64)
+ align = 64;
+ }
+ }
+
+ if (how != align_abi)
+ {
+ if (TREE_CODE (type) == ARRAY_TYPE
+ && TYPE_MODE (TREE_TYPE (type)) == QImode)
+ {
+ if (align < BITS_PER_WORD)
+ align = BITS_PER_WORD;
+ }
+ }
+
+ return align;
+}
+
/* AIX increases natural record alignment to doubleword if the first
field is an FP double while the FP fields remain word aligned. */
@@ -5087,6 +5982,73 @@ gpr_or_gpr_p (rtx op0, rtx op1)
|| (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
}
+/* Return true if this is a move direct operation between GPR registers and
+ floating point/VSX registers. */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+ int regno0, regno1;
+
+ if (!REG_P (op0) || !REG_P (op1))
+ return false;
+
+ if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+ return false;
+
+ regno0 = REGNO (op0);
+ regno1 = REGNO (op1);
+ if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+ return false;
+
+ if (INT_REGNO_P (regno0))
+ return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+ else if (INT_REGNO_P (regno1))
+ {
+ if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+ return true;
+
+ else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if this is a load or store quad operation. This function does
+ not handle the atomic quad memory instructions. */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+ bool ret;
+
+ if (!TARGET_QUAD_MEMORY)
+ ret = false;
+
+ else if (REG_P (op0) && MEM_P (op1))
+ ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+ && quad_memory_operand (op1, GET_MODE (op1))
+ && !reg_overlap_mentioned_p (op0, op1));
+
+ else if (MEM_P (op0) && REG_P (op1))
+ ret = (quad_memory_operand (op0, GET_MODE (op0))
+ && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+ else
+ ret = false;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n========== quad_load_store, return %s\n",
+ ret ? "true" : "false");
+ debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+ }
+
+ return ret;
+}
+
/* Given an address, return a constant offset term if one exists. */
static rtx
@@ -5170,7 +6132,12 @@ reg_offset_addressing_ok_p (enum machine_mode mode)
case V4SImode:
case V2DFmode:
case V2DImode:
- /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. */
+ case V1TImode:
+ case TImode:
+ /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
+ TImode is not a vector mode, if we want to use the VSX registers to
+ move it around, we need to restrict ourselves to reg+reg
+ addressing. */
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
return false;
break;
@@ -5184,6 +6151,13 @@ reg_offset_addressing_ok_p (enum machine_mode mode)
return false;
break;
+ case SDmode:
+ /* If we can do direct load/stores of SDmode, restrict it to reg+reg
+ addressing for the LFIWZX and STFIWX instructions. */
+ if (TARGET_NO_SDMODE_STACK)
+ return false;
+ break;
+
default:
break;
}
@@ -5211,59 +6185,56 @@ virtual_stack_registers_memory_p (rtx op)
&& regnum <= LAST_VIRTUAL_POINTER_REGISTER);
}
-/* Return true if memory accesses to OP are known to never straddle
- a 32k boundary. */
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+ is known to not straddle a 32k boundary. */
static bool
offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
enum machine_mode mode)
{
tree decl, type;
- unsigned HOST_WIDE_INT dsize, dalign;
+ unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
if (GET_CODE (op) != SYMBOL_REF)
return false;
+ dsize = GET_MODE_SIZE (mode);
decl = SYMBOL_REF_DECL (op);
if (!decl)
{
- if (GET_MODE_SIZE (mode) == 0)
+ if (dsize == 0)
return false;
/* -fsection-anchors loses the original SYMBOL_REF_DECL when
replacing memory addresses with an anchor plus offset. We
could find the decl by rummaging around in the block->objects
VEC for the given offset but that seems like too much work. */
- dalign = 1;
+ dalign = BITS_PER_UNIT;
if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
&& SYMBOL_REF_ANCHOR_P (op)
&& SYMBOL_REF_BLOCK (op) != NULL)
{
struct object_block *block = SYMBOL_REF_BLOCK (op);
- HOST_WIDE_INT lsb, mask;
- /* Given the alignment of the block.. */
dalign = block->alignment;
- mask = dalign / BITS_PER_UNIT - 1;
-
- /* ..and the combined offset of the anchor and any offset
- to this block object.. */
offset += SYMBOL_REF_BLOCK_OFFSET (op);
- lsb = offset & -offset;
+ }
+ else if (CONSTANT_POOL_ADDRESS_P (op))
+ {
+ /* It would be nice to have get_pool_align().. */
+ enum machine_mode cmode = get_pool_mode (op);
- /* ..find how many bits of the alignment we know for the
- object. */
- mask &= lsb - 1;
- dalign = mask + 1;
+ dalign = GET_MODE_ALIGNMENT (cmode);
}
- return dalign >= GET_MODE_SIZE (mode);
}
-
- if (DECL_P (decl))
+ else if (DECL_P (decl))
{
- if (TREE_CODE (decl) == FUNCTION_DECL)
- return true;
+ dalign = DECL_ALIGN (decl);
+ if (dsize == 0)
+ {
+ /* Allow BLKmode when the entire object is known to not
+ cross a 32k boundary. */
if (!DECL_SIZE_UNIT (decl))
return false;
@@ -5274,12 +6245,22 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
if (dsize > 32768)
return false;
- dalign = DECL_ALIGN_UNIT (decl);
- return dalign >= dsize;
+ return dalign / BITS_PER_UNIT >= dsize;
}
-
+ }
+ else
+ {
type = TREE_TYPE (decl);
+ dalign = TYPE_ALIGN (type);
+ if (CONSTANT_CLASS_P (decl))
+ dalign = CONSTANT_ALIGNMENT (decl, dalign);
+ else
+ dalign = DATA_ALIGNMENT (decl, dalign);
+
+ if (dsize == 0)
+ {
+ /* BLKmode, check the entire object. */
if (TREE_CODE (decl) == STRING_CST)
dsize = TREE_STRING_LENGTH (decl);
else if (TYPE_SIZE_UNIT (type)
@@ -5290,12 +6271,16 @@ offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
if (dsize > 32768)
return false;
- dalign = TYPE_ALIGN (type);
- if (CONSTANT_CLASS_P (decl))
- dalign = CONSTANT_ALIGNMENT (decl, dalign);
- else
- dalign = DATA_ALIGNMENT (decl, dalign);
- dalign /= BITS_PER_UNIT;
+ return dalign / BITS_PER_UNIT >= dsize;
+ }
+ }
+
+ /* Find how many bits of the alignment we know for this access. */
+ mask = dalign / BITS_PER_UNIT - 1;
+ lsb = offset & -offset;
+ mask &= lsb - 1;
+ dalign = mask + 1;
+
return dalign >= dsize;
}
@@ -5387,7 +6372,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
return false;
if (!reg_offset_addressing_ok_p (mode))
return virtual_stack_registers_memory_p (x);
- if (legitimate_constant_pool_address_p (x, mode, strict))
+ if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
return true;
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
return false;
@@ -5416,7 +6401,7 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
/* If we are using VSX scalar loads, restrict ourselves to reg+reg
addressing. */
- if (mode == DFmode && VECTOR_MEM_VSX_P (DFmode))
+ if (VECTOR_MEM_VSX_P (mode))
return false;
if (!worst_case)
@@ -5428,12 +6413,14 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
break;
case TFmode:
- case TDmode:
- case TImode:
if (TARGET_E500_DOUBLE)
return (SPE_CONST_OFFSET_OK (offset)
&& SPE_CONST_OFFSET_OK (offset + 8));
+ /* fall through */
+ case TDmode:
+ case TImode:
+ case PTImode:
extra = 8;
if (!worst_case)
break;
@@ -5526,9 +6513,21 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
if (TARGET_ELF || TARGET_MACHO)
{
- if (DEFAULT_ABI != ABI_AIX && DEFAULT_ABI != ABI_DARWIN && flag_pic)
+ bool large_toc_ok;
+
+ if (DEFAULT_ABI == ABI_V4 && flag_pic)
return false;
- if (TARGET_TOC)
+ /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+ push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
+ recognizes some LO_SUM addresses as valid although this
+ function says opposite. In most cases, LRA through different
+ transformations can generate correct code for address reloads.
+ It can not manage only some LO_SUM cases. So we need to add
+ code analogous to one in rs6000_legitimize_reload_address for
+ LOW_SUM here saying that some addresses are still valid. */
+ large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+ && small_toc_ref (x, VOIDmode));
+ if (TARGET_TOC && ! large_toc_ok)
return false;
if (GET_MODE_NUNITS (mode) != 1)
return false;
@@ -5538,7 +6537,7 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
&& (mode == DFmode || mode == DDmode)))
return false;
- return CONSTANT_P (x);
+ return CONSTANT_P (x) || large_toc_ok;
}
return false;
@@ -5582,8 +6581,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
return force_reg (Pmode, XEXP (x, 0));
+ /* For TImode with load/store quad, restrict addresses to just a single
+ pointer, so it works with both GPRs and VSX registers. */
/* Make sure both operands are registers. */
- else if (GET_CODE (x) == PLUS)
+ else if (GET_CODE (x) == PLUS
+ && (mode != TImode || !TARGET_QUAD_MEMORY))
return gen_rtx_PLUS (Pmode,
force_reg (Pmode, XEXP (x, 0)),
force_reg (Pmode, XEXP (x, 1)));
@@ -5603,11 +6605,12 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
case TFmode:
case TDmode:
case TImode:
+ case PTImode:
/* As in legitimate_offset_address_p we do not assume
worst-case. The mode here is just a hint as to the registers
used. A TImode is usually in gprs, but may actually be in
fprs. Leave worst-case scenario for reload to handle via
- insn constraints. */
+ insn constraints. PTImode is only GPRs. */
extra = 8;
break;
default:
@@ -6099,10 +7102,13 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
1, const0_rtx, Pmode);
r3 = gen_rtx_REG (Pmode, 3);
- if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ {
+ if (TARGET_64BIT)
insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
+ else
insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
+ }
else if (DEFAULT_ABI == ABI_V4)
insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
else
@@ -6121,10 +7127,13 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
1, const0_rtx, Pmode);
r3 = gen_rtx_REG (Pmode, 3);
- if (DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ {
+ if (TARGET_64BIT)
insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_64BIT)
+ else
insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
+ }
else if (DEFAULT_ABI == ABI_V4)
insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
else
@@ -6239,7 +7248,6 @@ use_toc_relative_ref (rtx sym)
&& ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
get_pool_mode (sym)))
|| (TARGET_CMODEL == CMODEL_MEDIUM
- && !CONSTANT_POOL_ADDRESS_P (sym)
&& SYMBOL_REF_LOCAL_P (sym)));
}
@@ -6338,7 +7346,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
&& !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DDmode || mode == TDmode
|| mode == DImode))
- && VECTOR_MEM_NONE_P (mode))
+ && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
{
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
@@ -6369,7 +7377,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
if (GET_CODE (x) == SYMBOL_REF
&& reg_offset_p
- && VECTOR_MEM_NONE_P (mode)
+ && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
&& !SPE_VECTOR_MODE (mode)
#if TARGET_MACHO
&& DEFAULT_ABI == ABI_DARWIN
@@ -6395,6 +7403,8 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
mem is sufficiently aligned. */
&& mode != TFmode
&& mode != TDmode
+ && (mode != TImode || !TARGET_VSX_TIMODE)
+ && mode != PTImode
&& (mode != DImode || TARGET_POWERPC64)
&& ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
|| (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
@@ -6515,15 +7525,9 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
return 0;
if (legitimate_indirect_address_p (x, reg_ok_strict))
return 1;
- if ((GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
- && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
- && !SPE_VECTOR_MODE (mode)
- && mode != TFmode
- && mode != TDmode
- /* Restrict addressing for DI because of our SUBREG hackery. */
- && !(TARGET_E500_DOUBLE
- && (mode == DFmode || mode == DDmode || mode == DImode))
- && TARGET_UPDATE
+ if (TARGET_UPDATE
+ && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
+ && mode_supports_pre_incdec_p (mode)
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
return 1;
if (virtual_stack_registers_memory_p (x))
@@ -6531,8 +7535,16 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
if (reg_offset_p && legitimate_small_data_p (mode, x))
return 1;
if (reg_offset_p
- && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+ && legitimate_constant_pool_address_p (x, mode,
+ reg_ok_strict || lra_in_progress))
return 1;
+ /* For TImode, if we have load/store quad and TImode in VSX registers, only
+ allow register indirect addresses. This will allow the values to go in
+ either GPRs or VSX registers without reloading. The vector types would
+ tend to go into VSX registers, so we allow REG+REG, while TImode seems
+ somewhat split, in that some uses are GPR based, and some VSX based. */
+ if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
+ return 0;
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
if (! reg_ok_strict
&& reg_offset_p
@@ -6544,31 +7556,20 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
return 1;
if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
return 1;
- if (mode != TImode
- && mode != TFmode
+ if (mode != TFmode
&& mode != TDmode
&& ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
|| TARGET_POWERPC64
|| (mode != DFmode && mode != DDmode)
|| (TARGET_E500_DOUBLE && mode != DDmode))
&& (TARGET_POWERPC64 || mode != DImode)
+ && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
+ && mode != PTImode
&& !avoiding_indexed_address_p (mode)
&& legitimate_indexed_address_p (x, reg_ok_strict))
return 1;
- if (GET_CODE (x) == PRE_MODIFY
- && mode != TImode
- && mode != TFmode
- && mode != TDmode
- && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
- || TARGET_POWERPC64
- || ((mode != DFmode && mode != DDmode) || TARGET_E500_DOUBLE))
- && (TARGET_POWERPC64 || mode != DImode)
- && !VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
- && !SPE_VECTOR_MODE (mode)
- /* Restrict addressing for DI because of our SUBREG hackery. */
- && !(TARGET_E500_DOUBLE
- && (mode == DFmode || mode == DDmode || mode == DImode))
- && TARGET_UPDATE
+ if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
+ && mode_supports_pre_modify_p (mode)
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
&& (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
reg_ok_strict, false)
@@ -6589,10 +7590,13 @@ rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
fprintf (stderr,
"\nrs6000_legitimate_address_p: return = %s, mode = %s, "
- "strict = %d, code = %s\n",
+ "strict = %d, reload = %s, code = %s\n",
ret ? "true" : "false",
GET_MODE_NAME (mode),
reg_ok_strict,
+ (reload_completed
+ ? "after"
+ : (reload_in_progress ? "progress" : "before")),
GET_RTX_NAME (GET_CODE (x)));
debug_rtx (x);
@@ -6758,7 +7762,7 @@ rs6000_conditional_register_usage (void)
/* The TOC register is not killed across calls in a way that is
visible to the compiler. */
- if (DEFAULT_ABI == ABI_AIX)
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
call_really_used_regs[2] = 0;
if (DEFAULT_ABI == ABI_V4
@@ -6819,6 +7823,7 @@ rs6000_conditional_register_usage (void)
}
}
+
/* Try to output insns to set TARGET equal to the constant C if it can
be done in less than N insns. Do all computations in MODE.
Returns the place where the output has been placed if it can be
@@ -7006,7 +8011,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
}
/* Helper for the following. Get rid of [r+r] memory refs
- in cases where it won't work (TImode, TFmode, TDmode). */
+ in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
static void
rs6000_eliminate_indexed_memrefs (rtx operands[2])
@@ -7031,6 +8036,131 @@ rs6000_eliminate_indexed_memrefs (rtx operands[2])
copy_addr_to_reg (XEXP (operands[1], 0)));
}
+/* Generate a vector of constants to permute MODE for a little-endian
+ storage operation by swapping the two halves of a vector. */
+static rtvec
+rs6000_const_vec (enum machine_mode mode)
+{
+ int i, subparts;
+ rtvec v;
+
+ switch (mode)
+ {
+ case V1TImode:
+ subparts = 1;
+ break;
+ case V2DFmode:
+ case V2DImode:
+ subparts = 2;
+ break;
+ case V4SFmode:
+ case V4SImode:
+ subparts = 4;
+ break;
+ case V8HImode:
+ subparts = 8;
+ break;
+ case V16QImode:
+ subparts = 16;
+ break;
+ default:
+ gcc_unreachable();
+ }
+
+ v = rtvec_alloc (subparts);
+
+ for (i = 0; i < subparts / 2; ++i)
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
+ for (i = subparts / 2; i < subparts; ++i)
+ RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
+
+ return v;
+}
+
+/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
+ for a VSX load or store operation. */
+rtx
+rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
+{
+ rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
+ return gen_rtx_VEC_SELECT (mode, source, par);
+}
+
+/* Emit a little-endian load from vector memory location SOURCE to VSX
+ register DEST in mode MODE. The load is done with two permuting
+ insn's that represent an lxvd2x and xxpermdi. */
+void
+rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
+{
+ rtx tmp, permute_mem, permute_reg;
+
+ /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ V1TImode). */
+ if (mode == TImode || mode == V1TImode)
+ {
+ mode = V2DImode;
+ dest = gen_lowpart (V2DImode, dest);
+ source = adjust_address (source, V2DImode, 0);
+ }
+
+ tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+ permute_mem = rs6000_gen_le_vsx_permute (source, mode);
+ permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
+ emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
+}
+
+/* Emit a little-endian store to vector memory location DEST from VSX
+ register SOURCE in mode MODE. The store is done with two permuting
+ insn's that represent an xxpermdi and an stxvd2x. */
+void
+rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
+{
+ rtx tmp, permute_src, permute_tmp;
+
+ /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ V1TImode). */
+ if (mode == TImode || mode == V1TImode)
+ {
+ mode = V2DImode;
+ dest = adjust_address (dest, V2DImode, 0);
+ source = gen_lowpart (V2DImode, source);
+ }
+
+ tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+ permute_src = rs6000_gen_le_vsx_permute (source, mode);
+ permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
+ emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
+}
+
+/* Emit a sequence representing a little-endian VSX load or store,
+ moving data from SOURCE to DEST in mode MODE. This is done
+ separately from rs6000_emit_move to ensure it is called only
+ during expand. LE VSX loads and stores introduced later are
+ handled with a split. The expand-time RTL generation allows
+ us to optimize away redundant pairs of register-permutes. */
+void
+rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
+{
+ gcc_assert (!BYTES_BIG_ENDIAN
+ && VECTOR_MEM_VSX_P (mode)
+ && !gpr_or_gpr_p (dest, source)
+ && (MEM_P (source) ^ MEM_P (dest)));
+
+ if (MEM_P (source))
+ {
+ gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
+ rs6000_emit_le_vsx_load (dest, source, mode);
+ }
+ else
+ {
+ if (!REG_P (source))
+ source = force_reg (mode, source);
+ rs6000_emit_le_vsx_store (dest, source, mode);
+ }
+}
+
/* Emit a move from SOURCE to DEST in mode MODE. */
void
rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
@@ -7149,8 +8279,71 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
cfun->machine->sdmode_stack_slot =
eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
+
+ if (lra_in_progress
+ && mode == SDmode
+ && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+ && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+ && (REG_P (operands[1])
+ || (GET_CODE (operands[1]) == SUBREG
+ && REG_P (SUBREG_REG (operands[1])))))
+ {
+ int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+ ? SUBREG_REG (operands[1]) : operands[1]);
+ enum reg_class cl;
+
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ cl = reg_preferred_class (regno);
+ gcc_assert (cl != NO_REGS);
+ regno = ira_class_hard_regs[cl][0];
+ }
+ if (FP_REGNO_P (regno))
+ {
+ if (GET_MODE (operands[0]) != DDmode)
+ operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+ emit_insn (gen_movsd_store (operands[0], operands[1]));
+ }
+ else if (INT_REGNO_P (regno))
+ emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+ else
+ gcc_unreachable();
+ return;
+ }
+ if (lra_in_progress
+ && mode == SDmode
+ && (REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && REG_P (SUBREG_REG (operands[0]))))
+ && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+ && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+ {
+ int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+ ? SUBREG_REG (operands[0]) : operands[0]);
+ enum reg_class cl;
+
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ cl = reg_preferred_class (regno);
+ gcc_assert (cl != NO_REGS);
+ regno = ira_class_hard_regs[cl][0];
+ }
+ if (FP_REGNO_P (regno))
+ {
+ if (GET_MODE (operands[1]) != DDmode)
+ operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+ emit_insn (gen_movsd_load (operands[0], operands[1]));
+ }
+ else if (INT_REGNO_P (regno))
+ emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+ else
+ gcc_unreachable();
+ return;
+ }
+
if (reload_in_progress
&& mode == SDmode
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
&& MEM_P (operands[0])
&& rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
&& REG_P (operands[1]))
@@ -7163,7 +8356,9 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
}
else if (INT_REGNO_P (REGNO (operands[1])))
{
- rtx mem = adjust_address_nv (operands[0], mode, 4);
+ rtx mem = operands[0];
+ if (BYTES_BIG_ENDIAN)
+ mem = adjust_address_nv (mem, mode, 4);
mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
emit_insn (gen_movsd_hardfloat (mem, operands[1]));
}
@@ -7175,6 +8370,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
&& mode == SDmode
&& REG_P (operands[0])
&& MEM_P (operands[1])
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
&& rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
{
if (FP_REGNO_P (REGNO (operands[0])))
@@ -7185,7 +8381,9 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
}
else if (INT_REGNO_P (REGNO (operands[0])))
{
- rtx mem = adjust_address_nv (operands[1], mode, 4);
+ rtx mem = operands[1];
+ if (BYTES_BIG_ENDIAN)
+ mem = adjust_address_nv (mem, mode, 4);
mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
emit_insn (gen_movsd_hardfloat (operands[0], mem));
}
@@ -7230,6 +8428,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
case V1DImode:
case V2DFmode:
case V2DImode:
+ case V1TImode:
if (CONSTANT_P (operands[1])
&& !easy_vector_constant (operands[1], mode))
operands[1] = force_const_mem (mode, operands[1]);
@@ -7388,6 +8587,11 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
break;
case TImode:
+ if (!VECTOR_MEM_VSX_P (TImode))
+ rs6000_eliminate_indexed_memrefs (operands);
+ break;
+
+ case PTImode:
rs6000_eliminate_indexed_memrefs (operands);
break;
@@ -7426,18 +8630,231 @@ rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
}
/* Nonzero if we can use a floating-point register to pass this arg. */
-#define USE_FP_FOR_ARG_P(CUM,MODE,TYPE) \
+#define USE_FP_FOR_ARG_P(CUM,MODE) \
(SCALAR_FLOAT_MODE_P (MODE) \
&& (CUM)->fregno <= FP_ARG_MAX_REG \
&& TARGET_HARD_FLOAT && TARGET_FPRS)
/* Nonzero if we can use an AltiVec register to pass this arg. */
-#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,TYPE,NAMED) \
+#define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
(ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
&& (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
&& TARGET_ALTIVEC_ABI \
&& (NAMED))
+/* Walk down the type tree of TYPE counting consecutive base elements.
+ If *MODEP is VOIDmode, then set it to the first valid floating point
+ or vector type. If a non-floating point or vector type is found, or
+ if a floating point or vector type that doesn't match a non-VOIDmode
+ *MODEP is found, then return -1, otherwise return the count in the
+ sub-tree. */
+
+static int
+rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
+{
+ enum machine_mode mode;
+ HOST_WIDE_INT size;
+
+ switch (TREE_CODE (type))
+ {
+ case REAL_TYPE:
+ mode = TYPE_MODE (type);
+ if (!SCALAR_FLOAT_MODE_P (mode))
+ return -1;
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ if (*modep == mode)
+ return 1;
+
+ break;
+
+ case COMPLEX_TYPE:
+ mode = TYPE_MODE (TREE_TYPE (type));
+ if (!SCALAR_FLOAT_MODE_P (mode))
+ return -1;
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ if (*modep == mode)
+ return 2;
+
+ break;
+
+ case VECTOR_TYPE:
+ if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
+ return -1;
+
+ /* Use V4SImode as representative of all 128-bit vector types. */
+ size = int_size_in_bytes (type);
+ switch (size)
+ {
+ case 16:
+ mode = V4SImode;
+ break;
+ default:
+ return -1;
+ }
+
+ if (*modep == VOIDmode)
+ *modep = mode;
+
+ /* Vector modes are considered to be opaque: two vectors are
+ equivalent for the purposes of being homogeneous aggregates
+ if they are the same size. */
+ if (*modep == mode)
+ return 1;
+
+ break;
+
+ case ARRAY_TYPE:
+ {
+ int count;
+ tree index = TYPE_DOMAIN (type);
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P (type))
+ return -1;
+
+ count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
+ if (count == -1
+ || !index
+ || !TYPE_MAX_VALUE (index)
+ || !host_integerp (TYPE_MAX_VALUE (index), 1)
+ || !TYPE_MIN_VALUE (index)
+ || !host_integerp (TYPE_MIN_VALUE (index), 1)
+ || count < 0)
+ return -1;
+
+ count *= (1 + tree_low_cst (TYPE_MAX_VALUE (index), 1)
+ - tree_low_cst (TYPE_MIN_VALUE (index), 1));
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case RECORD_TYPE:
+ {
+ int count = 0;
+ int sub_count;
+ tree field;
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P (type))
+ return -1;
+
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count += sub_count;
+ }
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ case UNION_TYPE:
+ case QUAL_UNION_TYPE:
+ {
+ /* These aren't very interesting except in a degenerate case. */
+ int count = 0;
+ int sub_count;
+ tree field;
+
+ /* Can't handle incomplete types. */
+ if (!COMPLETE_TYPE_P (type))
+ return -1;
+
+ for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+ {
+ if (TREE_CODE (field) != FIELD_DECL)
+ continue;
+
+ sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
+ if (sub_count < 0)
+ return -1;
+ count = count > sub_count ? count : sub_count;
+ }
+
+ /* There must be no padding. */
+ if (!host_integerp (TYPE_SIZE (type), 1)
+ || (tree_low_cst (TYPE_SIZE (type), 1)
+ != count * GET_MODE_BITSIZE (*modep)))
+ return -1;
+
+ return count;
+ }
+
+ default:
+ break;
+ }
+
+ return -1;
+}
+
+/* If an argument, whose type is described by TYPE and MODE, is a homogeneous
+ float or vector aggregate that shall be passed in FP/vector registers
+ according to the ELFv2 ABI, return the homogeneous element mode in
+ *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
+
+ Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
+
+static bool
+rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
+ enum machine_mode *elt_mode,
+ int *n_elts)
+{
+ /* Note that we do not accept complex types at the top level as
+ homogeneous aggregates; these types are handled via the
+ targetm.calls.split_complex_arg mechanism. Complex types
+ can be elements of homogeneous aggregates, however. */
+ if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
+ {
+ enum machine_mode field_mode = VOIDmode;
+ int field_count = rs6000_aggregate_candidate (type, &field_mode);
+
+ if (field_count > 0)
+ {
+ int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
+ (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
+
+ /* The ELFv2 ABI allows homogeneous aggregates to occupy
+ up to AGGR_ARG_NUM_REG registers. */
+ if (field_count * n_regs <= AGGR_ARG_NUM_REG)
+ {
+ if (elt_mode)
+ *elt_mode = field_mode;
+ if (n_elts)
+ *n_elts = field_count;
+ return true;
+ }
+ }
+ }
+
+ if (elt_mode)
+ *elt_mode = mode;
+ if (n_elts)
+ *n_elts = 1;
+ return false;
+}
+
/* Return a nonzero value to say to return the function value in
memory, just as large structures are always returned. TYPE will be
the data type of the value, and FNTYPE will be the type of the
@@ -7490,6 +8907,16 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
/* Otherwise fall through to more conventional ABI rules. */
}
+ /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
+ if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
+ NULL, NULL))
+ return false;
+
+ /* The ELFv2 ABI returns aggregates up to 16B in registers */
+ if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
+ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
+ return false;
+
if (AGGREGATE_TYPE_P (type)
&& (aix_struct_return
|| (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
@@ -7521,6 +8948,19 @@ rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
return false;
}
+/* Specify whether values returned in registers should be at the most
+ significant end of a register. We want aggregates returned by
+ value to match the way aggregates are passed to functions. */
+
+static bool
+rs6000_return_in_msb (const_tree valtype)
+{
+ return (DEFAULT_ABI == ABI_ELFv2
+ && BYTES_BIG_ENDIAN
+ && AGGREGATE_TYPE_P (valtype)
+ && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
+}
+
#ifdef HAVE_AS_GNU_ATTRIBUTE
/* Return TRUE if a call to function FNDECL may be one that
potentially affects the function calling ABI of the object file. */
@@ -7657,7 +9097,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
static bool
rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
{
- if (DEFAULT_ABI == ABI_AIX || TARGET_64BIT)
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
return must_pass_in_stack_var_size (mode, type);
else
return must_pass_in_stack_var_size_or_pad (mode, type);
@@ -7738,6 +9178,11 @@ function_arg_padding (enum machine_mode mode, const_tree type)
static unsigned int
rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
{
+ enum machine_mode elt_mode;
+ int n_elts;
+
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
if (DEFAULT_ABI == ABI_V4
&& (GET_MODE_SIZE (mode) == 8
|| (TARGET_HARD_FLOAT
@@ -7749,12 +9194,13 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
&& int_size_in_bytes (type) >= 8
&& int_size_in_bytes (type) < 16))
return 64;
- else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+ else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) >= 16))
return 128;
- else if (TARGET_MACHO
- && rs6000_darwin64_abi
+ else if (((TARGET_MACHO && rs6000_darwin64_abi)
+ || DEFAULT_ABI == ABI_ELFv2
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
&& mode == BLKmode
&& type && TYPE_ALIGN (type) > 64)
return 128;
@@ -7762,6 +9208,16 @@ rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
return PARM_BOUNDARY;
}
+/* The offset in words to the start of the parameter save area. */
+
+static unsigned int
+rs6000_parm_offset (void)
+{
+ return (DEFAULT_ABI == ABI_V4 ? 2
+ : DEFAULT_ABI == ABI_ELFv2 ? 4
+ : 6);
+}
+
/* For a function parm of MODE and TYPE, return the starting word in
the parameter area. NWORDS of the parameter area are already used. */
@@ -7770,11 +9226,9 @@ rs6000_parm_start (enum machine_mode mode, const_tree type,
unsigned int nwords)
{
unsigned int align;
- unsigned int parm_offset;
align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
- parm_offset = DEFAULT_ABI == ABI_V4 ? 2 : 6;
- return nwords + (-(parm_offset + nwords) & align);
+ return nwords + (-(rs6000_parm_offset () + nwords) & align);
}
/* Compute the size (in words) of a function argument. */
@@ -7881,7 +9335,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
if (TREE_CODE (ftype) == RECORD_TYPE)
rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
- else if (USE_FP_FOR_ARG_P (cum, mode, ftype))
+ else if (USE_FP_FOR_ARG_P (cum, mode))
{
unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
@@ -7922,7 +9376,7 @@ rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
else
cum->words += n_fpregs;
}
- else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, 1))
+ else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
{
rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
cum->vregno++;
@@ -7959,6 +9413,11 @@ static void
rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
const_tree type, bool named, int depth)
{
+ enum machine_mode elt_mode;
+ int n_elts;
+
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
/* Only tick off an argument if we're not recursing. */
if (depth == 0)
cum->nargs_prototype--;
@@ -7979,15 +9438,16 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
#endif
if (TARGET_ALTIVEC_ABI
- && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+ && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
&& int_size_in_bytes (type) == 16)))
{
bool stack = false;
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
{
- cum->vregno++;
+ cum->vregno += n_elts;
+
if (!TARGET_ALTIVEC)
error ("cannot pass argument in vector register because"
" altivec instructions are disabled, use -maltivec"
@@ -7996,7 +9456,8 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
/* PowerPC64 Linux and AIX allocate GPRs for a vector argument
even if it is going to be passed in a vector register.
Darwin does the same for variable-argument functions. */
- if ((DEFAULT_ABI == ABI_AIX && TARGET_64BIT)
+ if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ && TARGET_64BIT)
|| (cum->stdarg && DEFAULT_ABI != ABI_V4))
stack = true;
}
@@ -8007,15 +9468,13 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
{
int align;
- /* Vector parameters must be 16-byte aligned. This places
- them at 2 mod 4 in terms of words in 32-bit mode, since
- the parameter save area starts at offset 24 from the
- stack. In 64-bit mode, they just have to start on an
- even word, since the parameter save area is 16-byte
- aligned. Space for GPRs is reserved even if the argument
- will be passed in memory. */
+ /* Vector parameters must be 16-byte aligned. In 32-bit
+ mode this means we need to take into account the offset
+ to the parameter save area. In 64-bit mode, they just
+ have to start on an even word, since the parameter save
+ area is 16-byte aligned. */
if (TARGET_32BIT)
- align = (2 - cum->words) & 3;
+ align = -(rs6000_parm_offset () + cum->words) & 3;
else
align = cum->words & 1;
cum->words += align + rs6000_arg_size (mode, type);
@@ -8140,15 +9599,15 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
cum->words = align_words + n_words;
- if (SCALAR_FLOAT_MODE_P (mode)
+ if (SCALAR_FLOAT_MODE_P (elt_mode)
&& TARGET_HARD_FLOAT && TARGET_FPRS)
{
/* _Decimal128 must be passed in an even/odd float register pair.
This assumes that the register number is odd when fregno is
odd. */
- if (mode == TDmode && (cum->fregno % 2) == 1)
+ if (elt_mode == TDmode && (cum->fregno % 2) == 1)
cum->fregno++;
- cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
+ cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
}
if (TARGET_DEBUG_ARG)
@@ -8358,7 +9817,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
if (TREE_CODE (ftype) == RECORD_TYPE)
rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
- else if (cum->named && USE_FP_FOR_ARG_P (cum, mode, ftype))
+ else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
{
unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
#if 0
@@ -8386,7 +9845,7 @@ rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
if (mode == TFmode || mode == TDmode)
cum->fregno++;
}
- else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, ftype, 1))
+ else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
{
rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
rvec[(*k)++]
@@ -8503,6 +9962,84 @@ rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
}
+/* We have an argument of MODE and TYPE that goes into FPRs or VRs,
+ but must also be copied into the parameter save area starting at
+ offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
+ to the GPRs and/or memory. Return the number of elements used. */
+
+static int
+rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
+ int align_words, rtx *rvec)
+{
+ int k = 0;
+
+ if (align_words < GP_ARG_NUM_REG)
+ {
+ int n_words = rs6000_arg_size (mode, type);
+
+ if (align_words + n_words > GP_ARG_NUM_REG
+ || mode == BLKmode
+ || (TARGET_32BIT && TARGET_POWERPC64))
+ {
+ /* If this is partially on the stack, then we only
+ include the portion actually in registers here. */
+ enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
+ int i = 0;
+
+ if (align_words + n_words > GP_ARG_NUM_REG)
+ {
+ /* Not all of the arg fits in gprs. Say that it goes in memory
+ too, using a magic NULL_RTX component. Also see comment in
+ rs6000_mixed_function_arg for why the normal
+ function_arg_partial_nregs scheme doesn't work in this case. */
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+ }
+
+ do
+ {
+ rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
+ rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+ }
+ while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
+ }
+ else
+ {
+ /* The whole arg fits in gprs. */
+ rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
+ }
+ }
+ else
+ {
+ /* It's entirely in memory. */
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+ }
+
+ return k;
+}
+
+/* RVEC is a vector of K components of an argument of mode MODE.
+ Construct the final function_arg return value from it. */
+
+static rtx
+rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
+{
+ gcc_assert (k >= 1);
+
+ /* Avoid returning a PARALLEL in the trivial cases. */
+ if (k == 1)
+ {
+ if (XEXP (rvec[0], 0) == NULL_RTX)
+ return NULL_RTX;
+
+ if (GET_MODE (XEXP (rvec[0], 0)) == mode)
+ return XEXP (rvec[0], 0);
+ }
+
+ return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+}
+
/* Determine where to put an argument to a function.
Value is zero to push the argument on the stack,
or a hard register in which to store the argument.
@@ -8537,6 +10074,8 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
enum rs6000_abi abi = DEFAULT_ABI;
+ enum machine_mode elt_mode;
+ int n_elts;
/* Return a marker to indicate whether CR1 needs to set or clear the
bit that V.4 uses to say fp args were passed in registers.
@@ -8563,6 +10102,8 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
}
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
+
if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
{
rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
@@ -8571,33 +10112,30 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
/* Else fall through to usual handling. */
}
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named))
- if (TARGET_64BIT && ! cum->prototype)
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
{
- /* Vector parameters get passed in vector register
- and also in GPRs or memory, in absence of prototype. */
- int align_words;
- rtx slot;
- align_words = (cum->words + 1) & ~1;
+ rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+ rtx r, off;
+ int i, k = 0;
- if (align_words >= GP_ARG_NUM_REG)
+ /* Do we also need to pass this argument in the parameter
+ save area? */
+ if (TARGET_64BIT && ! cum->prototype)
{
- slot = NULL_RTX;
+ int align_words = (cum->words + 1) & ~1;
+ k = rs6000_psave_function_arg (mode, type, align_words, rvec);
}
- else
+
+ /* Describe where this argument goes in the vector registers. */
+ for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
{
- slot = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
+ r = gen_rtx_REG (elt_mode, cum->vregno + i);
+ off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
}
- return gen_rtx_PARALLEL (mode,
- gen_rtvec (2,
- gen_rtx_EXPR_LIST (VOIDmode,
- slot, const0_rtx),
- gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (mode, cum->vregno),
- const0_rtx)));
+
+ return rs6000_finish_function_arg (mode, rvec, k);
}
- else
- return gen_rtx_REG (mode, cum->vregno);
else if (TARGET_ALTIVEC_ABI
&& (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
|| (type && TREE_CODE (type) == VECTOR_TYPE
@@ -8612,13 +10150,13 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
int align, align_words, n_words;
enum machine_mode part_mode;
- /* Vector parameters must be 16-byte aligned. This places them at
- 2 mod 4 in terms of words in 32-bit mode, since the parameter
- save area starts at offset 24 from the stack. In 64-bit mode,
- they just have to start on an even word, since the parameter
- save area is 16-byte aligned. */
+ /* Vector parameters must be 16-byte aligned. In 32-bit
+ mode this means we need to take into account the offset
+ to the parameter save area. In 64-bit mode, they just
+ have to start on an even word, since the parameter save
+ area is 16-byte aligned. */
if (TARGET_32BIT)
- align = (2 - cum->words) & 3;
+ align = -(rs6000_parm_offset () + cum->words) & 3;
else
align = cum->words & 1;
align_words = cum->words + align;
@@ -8696,101 +10234,50 @@ rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
/* _Decimal128 must be passed in an even/odd float register pair.
This assumes that the register number is odd when fregno is odd. */
- if (mode == TDmode && (cum->fregno % 2) == 1)
+ if (elt_mode == TDmode && (cum->fregno % 2) == 1)
cum->fregno++;
- if (USE_FP_FOR_ARG_P (cum, mode, type))
+ if (USE_FP_FOR_ARG_P (cum, elt_mode))
{
- rtx rvec[GP_ARG_NUM_REG + 1];
- rtx r;
- int k;
- bool needs_psave;
- enum machine_mode fmode = mode;
- unsigned long n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
+ rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
+ rtx r, off;
+ int i, k = 0;
+ unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
- if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
- {
- /* Currently, we only ever need one reg here because complex
- doubles are split. */
- gcc_assert (cum->fregno == FP_ARG_MAX_REG
- && (fmode == TFmode || fmode == TDmode));
+ /* Do we also need to pass this argument in the parameter
+ save area? */
+ if (type && (cum->nargs_prototype <= 0
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ && TARGET_XL_COMPAT
+ && align_words >= GP_ARG_NUM_REG)))
+ k = rs6000_psave_function_arg (mode, type, align_words, rvec);
- /* Long double or _Decimal128 split over regs and memory. */
+ /* Describe where this argument goes in the fprs. */
+ for (i = 0; i < n_elts
+ && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
+ {
+ /* Check if the argument is split over registers and memory.
+ This can only ever happen for long double or _Decimal128;
+ complex types are handled via split_complex_arg. */
+ enum machine_mode fmode = elt_mode;
+ if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
+ {
+ gcc_assert (fmode == TFmode || fmode == TDmode);
fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
}
- /* Do we also need to pass this arg in the parameter save
- area? */
- needs_psave = (type
- && (cum->nargs_prototype <= 0
- || (DEFAULT_ABI == ABI_AIX
- && TARGET_XL_COMPAT
- && align_words >= GP_ARG_NUM_REG)));
-
- if (!needs_psave && mode == fmode)
- return gen_rtx_REG (fmode, cum->fregno);
-
- k = 0;
- if (needs_psave)
- {
- /* Describe the part that goes in gprs or the stack.
- This piece must come first, before the fprs. */
- if (align_words < GP_ARG_NUM_REG)
- {
- unsigned long n_words = rs6000_arg_size (mode, type);
-
- if (align_words + n_words > GP_ARG_NUM_REG
- || (TARGET_32BIT && TARGET_POWERPC64))
- {
- /* If this is partially on the stack, then we only
- include the portion actually in registers here. */
- enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
- rtx off;
- int i = 0;
- if (align_words + n_words > GP_ARG_NUM_REG)
- /* Not all of the arg fits in gprs. Say that it
- goes in memory too, using a magic NULL_RTX
- component. Also see comment in
- rs6000_mixed_function_arg for why the normal
- function_arg_partial_nregs scheme doesn't work
- in this case. */
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX,
- const0_rtx);
- do
- {
- r = gen_rtx_REG (rmode,
- GP_ARG_MIN_REG + align_words);
- off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
- }
- while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
- }
- else
- {
- /* The whole arg fits in gprs. */
- r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
- }
- }
- else
- /* It's entirely in memory. */
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
+ r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
+ off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+ rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
}
- /* Describe where this piece goes in the fprs. */
- r = gen_rtx_REG (fmode, cum->fregno);
- rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
-
- return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
+ return rs6000_finish_function_arg (mode, rvec, k);
}
else if (align_words < GP_ARG_NUM_REG)
- {
+ {
if (TARGET_32BIT && TARGET_POWERPC64)
return rs6000_mixed_function_arg (mode, type, align_words);
- if (mode == BLKmode)
- mode = Pmode;
-
return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
}
else
@@ -8809,42 +10296,62 @@ rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
tree type, bool named)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+ bool passed_in_gprs = true;
int ret = 0;
int align_words;
+ enum machine_mode elt_mode;
+ int n_elts;
+
+ rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
if (DEFAULT_ABI == ABI_V4)
return 0;
- if (USE_ALTIVEC_FOR_ARG_P (cum, mode, type, named)
- && cum->nargs_prototype >= 0)
+ if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
+ {
+ /* If we are passing this arg in the fixed parameter save area
+ (gprs or memory) as well as VRs, we do not use the partial
+ bytes mechanism; instead, rs6000_function_arg will return a
+ PARALLEL including a memory element as necessary. */
+ if (TARGET_64BIT && ! cum->prototype)
return 0;
+ /* Otherwise, we pass in VRs only. Check for partial copies. */
+ passed_in_gprs = false;
+ if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
+ ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
+ }
+
/* In this complicated case we just disable the partial_nregs code. */
if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
return 0;
align_words = rs6000_parm_start (mode, type, cum->words);
- if (USE_FP_FOR_ARG_P (cum, mode, type))
+ if (USE_FP_FOR_ARG_P (cum, elt_mode))
{
+ unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+
/* If we are passing this arg in the fixed parameter save area
- (gprs or memory) as well as fprs, then this function should
- return the number of partial bytes passed in the parameter
- save area rather than partial bytes passed in fprs. */
+ (gprs or memory) as well as FPRs, we do not use the partial
+ bytes mechanism; instead, rs6000_function_arg will return a
+ PARALLEL including a memory element as necessary. */
if (type
&& (cum->nargs_prototype <= 0
- || (DEFAULT_ABI == ABI_AIX
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& TARGET_XL_COMPAT
&& align_words >= GP_ARG_NUM_REG)))
return 0;
- else if (cum->fregno + ((GET_MODE_SIZE (mode) + 7) >> 3)
- > FP_ARG_MAX_REG + 1)
- ret = (FP_ARG_MAX_REG + 1 - cum->fregno) * 8;
- else if (cum->nargs_prototype >= 0)
- return 0;
+
+ /* Otherwise, we pass in FPRs only. Check for partial copies. */
+ passed_in_gprs = false;
+ if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
+ ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
+ * MIN (8, GET_MODE_SIZE (elt_mode)));
}
- if (align_words < GP_ARG_NUM_REG
+ if (passed_in_gprs
+ && align_words < GP_ARG_NUM_REG
&& GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
@@ -8925,6 +10432,139 @@ rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
return 0;
}
+/* Process parameter of type TYPE after ARGS_SO_FAR parameters were
+ already processes. Return true if the parameter must be passed
+ (fully or partially) on the stack. */
+
+static bool
+rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
+{
+ enum machine_mode mode;
+ int unsignedp;
+ rtx entry_parm;
+
+ /* Catch errors. */
+ if (type == NULL || type == error_mark_node)
+ return true;
+
+ /* Handle types with no storage requirement. */
+ if (TYPE_MODE (type) == VOIDmode)
+ return false;
+
+ /* Handle complex types. */
+ if (TREE_CODE (type) == COMPLEX_TYPE)
+ return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
+ || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
+
+ /* Handle transparent aggregates. */
+ if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
+ && TYPE_TRANSPARENT_AGGR (type))
+ type = TREE_TYPE (first_field (type));
+
+ /* See if this arg was passed by invisible reference. */
+ if (pass_by_reference (get_cumulative_args (args_so_far),
+ TYPE_MODE (type), type, true))
+ type = build_pointer_type (type);
+
+ /* Find mode as it is passed by the ABI. */
+ unsignedp = TYPE_UNSIGNED (type);
+ mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
+
+ /* If we must pass in stack, we need a stack. */
+ if (rs6000_must_pass_in_stack (mode, type))
+ return true;
+
+ /* If there is no incoming register, we need a stack. */
+ entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
+ if (entry_parm == NULL)
+ return true;
+
+ /* Likewise if we need to pass both in registers and on the stack. */
+ if (GET_CODE (entry_parm) == PARALLEL
+ && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
+ return true;
+
+ /* Also true if we're partially in registers and partially not. */
+ if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
+ return true;
+
+ /* Update info on where next arg arrives in registers. */
+ rs6000_function_arg_advance (args_so_far, mode, type, true);
+ return false;
+}
+
+/* Return true if FUN has no prototype, has a variable argument
+ list, or passes any parameter in memory. */
+
+static bool
+rs6000_function_parms_need_stack (tree fun)
+{
+ function_args_iterator args_iter;
+ tree arg_type;
+ CUMULATIVE_ARGS args_so_far_v;
+ cumulative_args_t args_so_far;
+
+ if (!fun)
+ /* Must be a libcall, all of which only use reg parms. */
+ return false;
+ if (!TYPE_P (fun))
+ fun = TREE_TYPE (fun);
+
+ /* Varargs functions need the parameter save area. */
+ if (!prototype_p (fun) || stdarg_p (fun))
+ return true;
+
+ INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fun, NULL_RTX);
+ args_so_far = pack_cumulative_args (&args_so_far_v);
+
+ if (aggregate_value_p (TREE_TYPE (fun), fun))
+ {
+ tree type = build_pointer_type (TREE_TYPE (fun));
+ rs6000_parm_needs_stack (args_so_far, type);
+ }
+
+ FOREACH_FUNCTION_ARGS (fun, arg_type, args_iter)
+ if (rs6000_parm_needs_stack (args_so_far, arg_type))
+ return true;
+
+ return false;
+}
+
+/* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
+ usually a constant depending on the ABI. However, in the ELFv2 ABI
+ the register parameter area is optional when calling a function that
+ has a prototype is scope, has no variable argument list, and passes
+ all parameters in registers. */
+
+int
+rs6000_reg_parm_stack_space (tree fun)
+{
+ int reg_parm_stack_space;
+
+ switch (DEFAULT_ABI)
+ {
+ default:
+ reg_parm_stack_space = 0;
+ break;
+
+ case ABI_AIX:
+ case ABI_DARWIN:
+ reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+ break;
+
+ case ABI_ELFv2:
+ /* ??? Recomputing this every time is a bit expensive. Is there
+ a place to cache this information? */
+ if (rs6000_function_parms_need_stack (fun))
+ reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
+ else
+ reg_parm_stack_space = 0;
+ break;
+ }
+
+ return reg_parm_stack_space;
+}
+
static void
rs6000_move_block_from_reg (int regno, rtx x, int nregs)
{
@@ -9306,8 +10946,10 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
We don't need to check for pass-by-reference because of the test above.
We can return a simplifed answer, since we know there's no offset to add. */
- if (TARGET_MACHO
- && rs6000_darwin64_abi
+ if (((TARGET_MACHO
+ && rs6000_darwin64_abi)
+ || DEFAULT_ABI == ABI_ELFv2
+ || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
&& integer_zerop (TYPE_SIZE (type)))
{
unsigned HOST_WIDE_INT align, boundary;
@@ -9602,6 +11244,7 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9615,6 +11258,7 @@ def_builtin (const char *name, tree type, enum rs6000_builtins code)
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9633,6 +11277,7 @@ static const struct builtin_description bdesc_3arg[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9646,6 +11291,7 @@ static const struct builtin_description bdesc_3arg[] =
{ MASK, ICODE, NAME, ENUM },
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9664,6 +11310,7 @@ static const struct builtin_description bdesc_dst[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9677,6 +11324,7 @@ static const struct builtin_description bdesc_dst[] =
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9693,6 +11341,7 @@ static const struct builtin_description bdesc_2arg[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9704,6 +11353,7 @@ static const struct builtin_description bdesc_2arg[] =
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
{ MASK, ICODE, NAME, ENUM },
@@ -9725,6 +11375,7 @@ static const struct builtin_description bdesc_altivec_preds[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9736,6 +11387,7 @@ static const struct builtin_description bdesc_altivec_preds[] =
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
@@ -9755,6 +11407,7 @@ static const struct builtin_description bdesc_spe_predicates[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9768,6 +11421,7 @@ static const struct builtin_description bdesc_spe_predicates[] =
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
{ MASK, ICODE, NAME, ENUM },
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9785,6 +11439,7 @@ static const struct builtin_description bdesc_spe_evsel[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9796,6 +11451,7 @@ static const struct builtin_description bdesc_spe_evsel[] =
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
{ MASK, ICODE, NAME, ENUM },
@@ -9816,6 +11472,7 @@ static const struct builtin_description bdesc_paired_preds[] =
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9829,6 +11486,7 @@ static const struct builtin_description bdesc_paired_preds[] =
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9846,8 +11504,9 @@ static const struct builtin_description bdesc_abs[] =
#undef RS6000_BUILTIN_2
#undef RS6000_BUILTIN_3
#undef RS6000_BUILTIN_A
-#undef RS6000_BUILTIN_E
#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
@@ -9861,6 +11520,7 @@ static const struct builtin_description bdesc_abs[] =
#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
@@ -9871,17 +11531,49 @@ static const struct builtin_description bdesc_1arg[] =
#include "rs6000-builtin.def"
};
+/* HTM builtins. */
#undef RS6000_BUILTIN_1
#undef RS6000_BUILTIN_2
#undef RS6000_BUILTIN_3
#undef RS6000_BUILTIN_A
#undef RS6000_BUILTIN_D
#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
#undef RS6000_BUILTIN_P
#undef RS6000_BUILTIN_Q
#undef RS6000_BUILTIN_S
#undef RS6000_BUILTIN_X
+#define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
+ { MASK, ICODE, NAME, ENUM },
+
+#define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
+#define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
+
+static const struct builtin_description bdesc_htm[] =
+{
+#include "rs6000-builtin.def"
+};
+
+#undef RS6000_BUILTIN_1
+#undef RS6000_BUILTIN_2
+#undef RS6000_BUILTIN_3
+#undef RS6000_BUILTIN_A
+#undef RS6000_BUILTIN_D
+#undef RS6000_BUILTIN_E
+#undef RS6000_BUILTIN_H
+#undef RS6000_BUILTIN_P
+#undef RS6000_BUILTIN_Q
+#undef RS6000_BUILTIN_S
+
/* Return true if a builtin function is overloaded. */
bool
rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
@@ -10189,6 +11881,100 @@ paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
return target;
}
+/* Return a constant vector for use as a little-endian permute control vector
+ to reverse the order of elements of the given vector mode. */
+static rtx
+swap_selector_for_mode (enum machine_mode mode)
+{
+ /* These are little endian vectors, so their elements are reversed
+ from what you would normally expect for a permute control vector. */
+ unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
+ unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
+ unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
+ unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
+ unsigned int *swaparray, i;
+ rtx perm[16];
+
+ switch (mode)
+ {
+ case V2DFmode:
+ case V2DImode:
+ swaparray = swap2;
+ break;
+ case V4SFmode:
+ case V4SImode:
+ swaparray = swap4;
+ break;
+ case V8HImode:
+ swaparray = swap8;
+ break;
+ case V16QImode:
+ swaparray = swap16;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ for (i = 0; i < 16; ++i)
+ perm[i] = GEN_INT (swaparray[i]);
+
+ return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
+}
+
+/* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
+ with -maltivec=be specified. Issue the load followed by an element-reversing
+ permute. */
+void
+altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+ rtx tmp = gen_reg_rtx (mode);
+ rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
+ rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
+ rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
+ rtx sel = swap_selector_for_mode (mode);
+ rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
+
+ gcc_assert (REG_P (op0));
+ emit_insn (par);
+ emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
+}
+
+/* Generate code for a "stvx" or "stvxl" built-in for a little endian target
+ with -maltivec=be specified. Issue the store preceded by an element-reversing
+ permute. */
+void
+altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+ rtx tmp = gen_reg_rtx (mode);
+ rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
+ rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
+ rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
+ rtx sel = swap_selector_for_mode (mode);
+ rtx vperm;
+
+ gcc_assert (REG_P (op1));
+ vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
+ emit_insn (par);
+}
+
+/* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
+ specified. Issue the store preceded by an element-reversing permute. */
+void
+altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
+{
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ rtx tmp = gen_reg_rtx (mode);
+ rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
+ rtx sel = swap_selector_for_mode (mode);
+ rtx vperm;
+
+ gcc_assert (REG_P (op1));
+ vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
+ emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
+ emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
+}
+
static rtx
altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
{
@@ -10351,6 +12137,197 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
return NULL_RTX;
}
+/* Return the appropriate SPR number associated with the given builtin. */
+static inline HOST_WIDE_INT
+htm_spr_num (enum rs6000_builtins code)
+{
+ if (code == HTM_BUILTIN_GET_TFHAR
+ || code == HTM_BUILTIN_SET_TFHAR)
+ return TFHAR_SPR;
+ else if (code == HTM_BUILTIN_GET_TFIAR
+ || code == HTM_BUILTIN_SET_TFIAR)
+ return TFIAR_SPR;
+ else if (code == HTM_BUILTIN_GET_TEXASR
+ || code == HTM_BUILTIN_SET_TEXASR)
+ return TEXASR_SPR;
+ gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
+ || code == HTM_BUILTIN_SET_TEXASRU);
+ return TEXASRU_SPR;
+}
+
+/* Return the appropriate SPR regno associated with the given builtin. */
+static inline HOST_WIDE_INT
+htm_spr_regno (enum rs6000_builtins code)
+{
+ if (code == HTM_BUILTIN_GET_TFHAR
+ || code == HTM_BUILTIN_SET_TFHAR)
+ return TFHAR_REGNO;
+ else if (code == HTM_BUILTIN_GET_TFIAR
+ || code == HTM_BUILTIN_SET_TFIAR)
+ return TFIAR_REGNO;
+ gcc_assert (code == HTM_BUILTIN_GET_TEXASR
+ || code == HTM_BUILTIN_SET_TEXASR
+ || code == HTM_BUILTIN_GET_TEXASRU
+ || code == HTM_BUILTIN_SET_TEXASRU);
+ return TEXASR_REGNO;
+}
+
+/* Return the correct ICODE value depending on whether we are
+ setting or reading the HTM SPRs. */
+static inline enum insn_code
+rs6000_htm_spr_icode (bool nonvoid)
+{
+ if (nonvoid)
+ return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
+ else
+ return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
+}
+
+/* Expand the HTM builtin in EXP and store the result in TARGET.
+ Store true in *EXPANDEDP if we found a builtin to expand. */
+static rtx
+htm_expand_builtin (tree exp, rtx target, bool * expandedp)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+ enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
+ const struct builtin_description *d;
+ size_t i;
+
+ *expandedp = false;
+
+ /* Expand the HTM builtins. */
+ d = bdesc_htm;
+ for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
+ if (d->code == fcode)
+ {
+ rtx op[MAX_HTM_OPERANDS], pat;
+ int nopnds = 0;
+ tree arg;
+ call_expr_arg_iterator iter;
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ enum insn_code icode = d->icode;
+
+ if (attr & RS6000_BTC_SPR)
+ icode = rs6000_htm_spr_icode (nonvoid);
+
+ if (nonvoid)
+ {
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ const struct insn_operand_data *insn_op;
+
+ if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
+ return NULL_RTX;
+
+ insn_op = &insn_data[icode].operand[nopnds];
+
+ op[nopnds] = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ int arg_num = (nonvoid) ? nopnds : nopnds + 1;
+ if (!CONST_INT_P (op[nopnds]))
+ error ("argument %d must be an unsigned literal", arg_num);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", arg_num);
+ return const0_rtx;
+ }
+ op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
+ }
+
+ nopnds++;
+ }
+
+ /* Handle the builtins for extended mnemonics. These accept
+ no arguments, but map to builtins that take arguments. */
+ switch (fcode)
+ {
+ case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
+ case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
+ op[nopnds++] = GEN_INT (1);
+#ifdef ENABLE_CHECKING
+ attr |= RS6000_BTC_UNARY;
+#endif
+ break;
+ case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
+ op[nopnds++] = GEN_INT (0);
+#ifdef ENABLE_CHECKING
+ attr |= RS6000_BTC_UNARY;
+#endif
+ break;
+ default:
+ break;
+ }
+
+ /* If this builtin accesses SPRs, then pass in the appropriate
+ SPR number and SPR regno as the last two operands. */
+ if (attr & RS6000_BTC_SPR)
+ {
+ op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
+ op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
+ }
+
+#ifdef ENABLE_CHECKING
+ int expected_nopnds = 0;
+ if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
+ expected_nopnds = 1;
+ else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
+ expected_nopnds = 2;
+ else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
+ expected_nopnds = 3;
+ if (!(attr & RS6000_BTC_VOID))
+ expected_nopnds += 1;
+ if (attr & RS6000_BTC_SPR)
+ expected_nopnds += 2;
+
+ gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
+#endif
+
+ switch (nopnds)
+ {
+ case 0:
+ pat = GEN_FCN (icode) (NULL_RTX);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ *expandedp = true;
+ if (nonvoid)
+ return target;
+ return const0_rtx;
+ }
+
+ return NULL_RTX;
+}
+
static rtx
rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
{
@@ -10416,7 +12393,15 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
}
}
else if (icode == CODE_FOR_vsx_set_v2df
- || icode == CODE_FOR_vsx_set_v2di)
+ || icode == CODE_FOR_vsx_set_v2di
+ || icode == CODE_FOR_bcdadd
+ || icode == CODE_FOR_bcdadd_lt
+ || icode == CODE_FOR_bcdadd_eq
+ || icode == CODE_FOR_bcdadd_gt
+ || icode == CODE_FOR_bcdsub
+ || icode == CODE_FOR_bcdsub_lt
+ || icode == CODE_FOR_bcdsub_eq
+ || icode == CODE_FOR_bcdsub_gt)
{
/* Only allow 1-bit unsigned literals. */
STRIP_NOPS (arg2);
@@ -10427,6 +12412,65 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
return const0_rtx;
}
}
+ else if (icode == CODE_FOR_dfp_ddedpd_dd
+ || icode == CODE_FOR_dfp_ddedpd_td)
+ {
+ /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
+ STRIP_NOPS (arg0);
+ if (TREE_CODE (arg0) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg2) & ~0x3)
+ {
+ error ("argument 1 must be 0 or 2");
+ return const0_rtx;
+ }
+ }
+ else if (icode == CODE_FOR_dfp_denbcd_dd
+ || icode == CODE_FOR_dfp_denbcd_td)
+ {
+ /* Only allow 1-bit unsigned literals. */
+ STRIP_NOPS (arg0);
+ if (TREE_CODE (arg0) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg0) & ~0x1)
+ {
+ error ("argument 1 must be a 1-bit unsigned literal");
+ return const0_rtx;
+ }
+ }
+ else if (icode == CODE_FOR_dfp_dscli_dd
+ || icode == CODE_FOR_dfp_dscli_td
+ || icode == CODE_FOR_dfp_dscri_dd
+ || icode == CODE_FOR_dfp_dscri_td)
+ {
+ /* Only allow 6-bit unsigned literals. */
+ STRIP_NOPS (arg1);
+ if (TREE_CODE (arg1) != INTEGER_CST
+ || TREE_INT_CST_LOW (arg1) & ~0x3f)
+ {
+ error ("argument 2 must be a 6-bit unsigned literal");
+ return const0_rtx;
+ }
+ }
+ else if (icode == CODE_FOR_crypto_vshasigmaw
+ || icode == CODE_FOR_crypto_vshasigmad)
+ {
+ /* Check whether the 2nd and 3rd arguments are integer constants and in
+ range and prepare arguments. */
+ STRIP_NOPS (arg1);
+ if (TREE_CODE (arg1) != INTEGER_CST
+ || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+ {
+ error ("argument 2 must be 0 or 1");
+ return const0_rtx;
+ }
+
+ STRIP_NOPS (arg2);
+ if (TREE_CODE (arg2) != INTEGER_CST
+ || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+ {
+ error ("argument 3 must be in the range 0..15");
+ return const0_rtx;
+ }
+ }
if (target == 0
|| GET_MODE (target) != tmode
@@ -10481,6 +12525,8 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
break;
case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
icode = CODE_FOR_vector_altivec_load_v2di;
+ case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
+ icode = CODE_FOR_vector_altivec_load_v1ti;
break;
default:
*expandedp = false;
@@ -10540,6 +12586,8 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
break;
case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
icode = CODE_FOR_vector_altivec_store_v2di;
+ case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
+ icode = CODE_FOR_vector_altivec_store_v1ti;
break;
default:
*expandedp = false;
@@ -10632,21 +12680,33 @@ altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
enum machine_mode tmode = TYPE_MODE (type);
enum machine_mode inner_mode = GET_MODE_INNER (tmode);
int i, n_elt = GET_MODE_NUNITS (tmode);
- rtvec v = rtvec_alloc (n_elt);
gcc_assert (VECTOR_MODE_P (tmode));
gcc_assert (n_elt == call_expr_nargs (exp));
+ if (!target || !register_operand (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ /* If we have a vector compromised of a single element, such as V1TImode, do
+ the initialization directly. */
+ if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
+ {
+ rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
+ emit_move_insn (target, gen_lowpart (tmode, x));
+ }
+ else
+ {
+ rtvec v = rtvec_alloc (n_elt);
+
for (i = 0; i < n_elt; ++i)
{
rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
}
- if (!target || !register_operand (target, tmode))
- target = gen_reg_rtx (tmode);
-
rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
+ }
+
return target;
}
@@ -10769,16 +12829,38 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
switch (fcode)
{
+ case ALTIVEC_BUILTIN_STVX_V2DF:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
+ case ALTIVEC_BUILTIN_STVX_V2DI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
+ case ALTIVEC_BUILTIN_STVX_V4SF:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
case ALTIVEC_BUILTIN_STVX:
+ case ALTIVEC_BUILTIN_STVX_V4SI:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
+ case ALTIVEC_BUILTIN_STVX_V8HI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
+ case ALTIVEC_BUILTIN_STVX_V16QI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
case ALTIVEC_BUILTIN_STVEBX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
case ALTIVEC_BUILTIN_STVEHX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
case ALTIVEC_BUILTIN_STVEWX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
+ case ALTIVEC_BUILTIN_STVXL_V2DF:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
+ case ALTIVEC_BUILTIN_STVXL_V2DI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
+ case ALTIVEC_BUILTIN_STVXL_V4SF:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
case ALTIVEC_BUILTIN_STVXL:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp);
+ case ALTIVEC_BUILTIN_STVXL_V4SI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
+ case ALTIVEC_BUILTIN_STVXL_V8HI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
+ case ALTIVEC_BUILTIN_STVXL_V16QI:
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
case ALTIVEC_BUILTIN_STVLX:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
@@ -10789,6 +12871,8 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_STVRXL:
return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
+ case VSX_BUILTIN_STXVD2X_V1TI:
+ return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
case VSX_BUILTIN_STXVD2X_V2DF:
return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
case VSX_BUILTIN_STXVD2X_V2DI:
@@ -10869,6 +12953,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
case VSX_BUILTIN_VEC_INIT_V2DF:
case VSX_BUILTIN_VEC_INIT_V2DI:
+ case VSX_BUILTIN_VEC_INIT_V1TI:
return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
case ALTIVEC_BUILTIN_VEC_SET_V4SI:
@@ -10877,6 +12962,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_VEC_SET_V4SF:
case VSX_BUILTIN_VEC_SET_V2DF:
case VSX_BUILTIN_VEC_SET_V2DI:
+ case VSX_BUILTIN_VEC_SET_V1TI:
return altivec_expand_vec_set_builtin (exp);
case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
@@ -10885,6 +12971,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
case VSX_BUILTIN_VEC_EXT_V2DF:
case VSX_BUILTIN_VEC_EXT_V2DI:
+ case VSX_BUILTIN_VEC_EXT_V1TI:
return altivec_expand_vec_ext_builtin (exp, target);
default:
@@ -10922,12 +13009,44 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_LVEWX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
exp, target, false);
+ case ALTIVEC_BUILTIN_LVXL_V2DF:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVXL_V2DI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVXL_V4SF:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
+ exp, target, false);
case ALTIVEC_BUILTIN_LVXL:
- return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
+ case ALTIVEC_BUILTIN_LVXL_V4SI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVXL_V8HI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVXL_V16QI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVX_V2DF:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVX_V2DI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVX_V4SF:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
exp, target, false);
case ALTIVEC_BUILTIN_LVX:
+ case ALTIVEC_BUILTIN_LVX_V4SI:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
exp, target, false);
+ case ALTIVEC_BUILTIN_LVX_V8HI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
+ exp, target, false);
+ case ALTIVEC_BUILTIN_LVX_V16QI:
+ return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
+ exp, target, false);
case ALTIVEC_BUILTIN_LVLX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
exp, target, true);
@@ -10940,6 +13059,9 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
case ALTIVEC_BUILTIN_LVRXL:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
exp, target, true);
+ case VSX_BUILTIN_LXVD2X_V1TI:
+ return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
+ exp, target, false);
case VSX_BUILTIN_LXVD2X_V2DF:
return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
exp, target, false);
@@ -11411,12 +13533,24 @@ rs6000_invalid_builtin (enum rs6000_builtins fncode)
error ("Builtin function %s is only valid for the cell processor", name);
else if ((fnmask & RS6000_BTM_VSX) != 0)
error ("Builtin function %s requires the -mvsx option", name);
+ else if ((fnmask & RS6000_BTM_HTM) != 0)
+ error ("Builtin function %s requires the -mhtm option", name);
else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
error ("Builtin function %s requires the -maltivec option", name);
else if ((fnmask & RS6000_BTM_PAIRED) != 0)
error ("Builtin function %s requires the -mpaired option", name);
else if ((fnmask & RS6000_BTM_SPE) != 0)
error ("Builtin function %s requires the -mspe option", name);
+ else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+ == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
+ error ("Builtin function %s requires the -mhard-dfp and"
+ "-mpower8-vector options", name);
+ else if ((fnmask & RS6000_BTM_DFP) != 0)
+ error ("Builtin function %s requires the -mhard-dfp option", name);
+ else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
+ error ("Builtin function %s requires the -mpower8-vector option", name);
+ else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
+ error ("Builtin function %s requires the -mhard-float option", name);
else
error ("Builtin function %s is not supported with the current options",
name);
@@ -11515,7 +13649,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
case ALTIVEC_BUILTIN_MASK_FOR_STORE:
{
- int icode = (int) CODE_FOR_altivec_lvsr;
+ int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr
+ : (int) CODE_FOR_altivec_lvsl);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode = insn_data[icode].operand[1].mode;
tree arg;
@@ -11590,8 +13725,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (success)
return ret;
}
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
- gcc_assert (TARGET_ALTIVEC || TARGET_VSX || TARGET_SPE || TARGET_PAIRED_FLOAT);
+ unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY);
/* Handle simple unary operations. */
d = bdesc_1arg;
@@ -11648,6 +13793,14 @@ rs6000_init_builtins (void)
opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
+ /* We use V1TI mode as a special container to hold __int128_t items that
+ must live in VSX registers. */
+ if (intTI_type_node)
+ {
+ V1TI_type_node = build_vector_type (intTI_type_node, 1);
+ unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
+ }
+
/* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
types, especially in C++ land. Similarly, 'vector pixel' is distinct from
'vector unsigned short'. */
@@ -11670,8 +13823,13 @@ rs6000_init_builtins (void)
uintSI_type_internal_node = unsigned_intSI_type_node;
intDI_type_internal_node = intDI_type_node;
uintDI_type_internal_node = unsigned_intDI_type_node;
+ intTI_type_internal_node = intTI_type_node;
+ uintTI_type_internal_node = unsigned_intTI_type_node;
float_type_internal_node = float_type_node;
double_type_internal_node = double_type_node;
+ long_double_type_internal_node = long_double_type_node;
+ dfloat64_type_internal_node = dfloat64_type_node;
+ dfloat128_type_internal_node = dfloat128_type_node;
void_type_internal_node = void_type_node;
/* Initialize the modes for builtin_function_type, mapping a machine mode to
@@ -11682,8 +13840,15 @@ rs6000_init_builtins (void)
builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
builtin_mode_to_type[DImode][0] = intDI_type_node;
builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
+ builtin_mode_to_type[TImode][0] = intTI_type_node;
+ builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
builtin_mode_to_type[SFmode][0] = float_type_node;
builtin_mode_to_type[DFmode][0] = double_type_node;
+ builtin_mode_to_type[TFmode][0] = long_double_type_node;
+ builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
+ builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
+ builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
+ builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
@@ -11752,14 +13917,41 @@ rs6000_init_builtins (void)
tdecl = add_builtin_type ("__vector double", V2DF_type_node);
TYPE_NAME (V2DF_type_node) = tdecl;
+ if (TARGET_POWERPC64)
+ {
tdecl = add_builtin_type ("__vector long", V2DI_type_node);
TYPE_NAME (V2DI_type_node) = tdecl;
- tdecl = add_builtin_type ("__vector unsigned long", unsigned_V2DI_type_node);
+ tdecl = add_builtin_type ("__vector unsigned long",
+ unsigned_V2DI_type_node);
TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
TYPE_NAME (bool_V2DI_type_node) = tdecl;
+ }
+ else
+ {
+ tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
+ TYPE_NAME (V2DI_type_node) = tdecl;
+
+ tdecl = add_builtin_type ("__vector unsigned long long",
+ unsigned_V2DI_type_node);
+ TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
+
+ tdecl = add_builtin_type ("__vector __bool long long",
+ bool_V2DI_type_node);
+ TYPE_NAME (bool_V2DI_type_node) = tdecl;
+ }
+
+ if (V1TI_type_node)
+ {
+ tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
+ TYPE_NAME (V1TI_type_node) = tdecl;
+
+ tdecl = add_builtin_type ("__vector unsigned __int128",
+ unsigned_V1TI_type_node);
+ TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
+ }
/* Paired and SPE builtins are only available if you build a compiler with
the appropriate options, so only create those builtins with the
@@ -11772,6 +13964,9 @@ rs6000_init_builtins (void)
spe_init_builtins ();
if (TARGET_EXTRA_BUILTINS)
altivec_init_builtins ();
+ if (TARGET_HTM)
+ htm_init_builtins ();
+
if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
rs6000_common_init_builtins ();
@@ -12117,6 +14312,10 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
integer_type_node, V4SI_type_node,
V4SI_type_node, NULL_TREE);
+ tree int_ftype_int_v2di_v2di
+ = build_function_type_list (integer_type_node,
+ integer_type_node, V2DI_type_node,
+ V2DI_type_node, NULL_TREE);
tree void_ftype_v4si
= build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
tree v8hi_ftype_void
@@ -12199,6 +14398,8 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
integer_type_node, V2DF_type_node,
V2DF_type_node, NULL_TREE);
+ tree v2di_ftype_v2di
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
tree v4si_ftype_v4si
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
tree v8hi_ftype_v8hi
@@ -12224,10 +14425,58 @@ altivec_init_builtins (void)
def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
+ def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVXL_V2DF);
+ def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVXL_V2DI);
+ def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVXL_V4SF);
+ def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVXL_V4SI);
+ def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVXL_V8HI);
+ def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVXL_V16QI);
def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
+ def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVX_V2DF);
+ def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVX_V2DI);
+ def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVX_V4SF);
+ def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVX_V4SI);
+ def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVX_V8HI);
+ def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_LVX_V16QI);
def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
+ def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
+ ALTIVEC_BUILTIN_STVX_V2DF);
+ def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
+ ALTIVEC_BUILTIN_STVX_V2DI);
+ def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
+ ALTIVEC_BUILTIN_STVX_V4SF);
+ def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
+ ALTIVEC_BUILTIN_STVX_V4SI);
+ def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
+ ALTIVEC_BUILTIN_STVX_V8HI);
+ def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
+ ALTIVEC_BUILTIN_STVX_V16QI);
def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
+ def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
+ ALTIVEC_BUILTIN_STVXL_V2DF);
+ def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
+ ALTIVEC_BUILTIN_STVXL_V2DI);
+ def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
+ ALTIVEC_BUILTIN_STVXL_V4SF);
+ def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
+ ALTIVEC_BUILTIN_STVXL_V4SI);
+ def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
+ ALTIVEC_BUILTIN_STVXL_V8HI);
+ def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
+ ALTIVEC_BUILTIN_STVXL_V16QI);
def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
@@ -12334,6 +14583,9 @@ altivec_init_builtins (void)
case VOIDmode:
type = int_ftype_int_opaque_opaque;
break;
+ case V2DImode:
+ type = int_ftype_int_v2di_v2di;
+ break;
case V4SImode:
type = int_ftype_int_v4si_v4si;
break;
@@ -12367,6 +14619,9 @@ altivec_init_builtins (void)
switch (mode0)
{
+ case V2DImode:
+ type = v2di_ftype_v2di;
+ break;
case V4SImode:
type = v4si_ftype_v4si;
break;
@@ -12497,6 +14752,107 @@ altivec_init_builtins (void)
ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
integer_type_node, NULL_TREE);
def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
+
+
+ if (V1TI_type_node)
+ {
+ tree v1ti_ftype_long_pcvoid
+ = build_function_type_list (V1TI_type_node,
+ long_integer_type_node, pcvoid_type_node,
+ NULL_TREE);
+ tree void_ftype_v1ti_long_pvoid
+ = build_function_type_list (void_type_node,
+ V1TI_type_node, long_integer_type_node,
+ pvoid_type_node, NULL_TREE);
+ def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
+ VSX_BUILTIN_LXVD2X_V1TI);
+ def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
+ VSX_BUILTIN_STXVD2X_V1TI);
+ ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
+ NULL_TREE, NULL_TREE);
+ def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
+ ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
+ intTI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
+ ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
+ integer_type_node, NULL_TREE);
+ def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
+ }
+
+}
+
+static void
+htm_init_builtins (void)
+{
+ HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
+ const struct builtin_description *d;
+ size_t i;
+
+ d = bdesc_htm;
+ for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
+ {
+ tree op[MAX_HTM_OPERANDS], type;
+ HOST_WIDE_INT mask = d->mask;
+ unsigned attr = rs6000_builtin_info[d->code].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ int attr_args = (attr & RS6000_BTC_TYPE_MASK);
+ int nopnds = 0;
+ tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
+ : unsigned_type_node;
+
+ if ((mask & builtin_mask) != mask)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
+ continue;
+ }
+
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
+ (long unsigned) i);
+ continue;
+ }
+
+ op[nopnds++] = (void_func) ? void_type_node : argtype;
+
+ if (attr_args == RS6000_BTC_UNARY)
+ op[nopnds++] = argtype;
+ else if (attr_args == RS6000_BTC_BINARY)
+ {
+ op[nopnds++] = argtype;
+ op[nopnds++] = argtype;
+ }
+ else if (attr_args == RS6000_BTC_TERNARY)
+ {
+ op[nopnds++] = argtype;
+ op[nopnds++] = argtype;
+ op[nopnds++] = argtype;
+ }
+
+ switch (nopnds)
+ {
+ case 1:
+ type = build_function_type_list (op[0], NULL_TREE);
+ break;
+ case 2:
+ type = build_function_type_list (op[0], op[1], NULL_TREE);
+ break;
+ case 3:
+ type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
+ break;
+ case 4:
+ type = build_function_type_list (op[0], op[1], op[2], op[3],
+ NULL_TREE);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ def_builtin (d->name, type, d->code);
+ }
}
/* Hash function for builtin functions with up to 3 arguments and a return
@@ -12572,11 +14928,34 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
are type correct. */
switch (builtin)
{
+ /* unsigned 1 argument functions. */
+ case CRYPTO_BUILTIN_VSBOX:
+ case P8V_BUILTIN_VGBBD:
+ case MISC_BUILTIN_CDTBCD:
+ case MISC_BUILTIN_CBCDTD:
+ h.uns_p[0] = 1;
+ h.uns_p[1] = 1;
+ break;
+
/* unsigned 2 argument functions. */
case ALTIVEC_BUILTIN_VMULEUB_UNS:
case ALTIVEC_BUILTIN_VMULEUH_UNS:
case ALTIVEC_BUILTIN_VMULOUB_UNS:
case ALTIVEC_BUILTIN_VMULOUH_UNS:
+ case CRYPTO_BUILTIN_VCIPHER:
+ case CRYPTO_BUILTIN_VCIPHERLAST:
+ case CRYPTO_BUILTIN_VNCIPHER:
+ case CRYPTO_BUILTIN_VNCIPHERLAST:
+ case CRYPTO_BUILTIN_VPMSUMB:
+ case CRYPTO_BUILTIN_VPMSUMH:
+ case CRYPTO_BUILTIN_VPMSUMW:
+ case CRYPTO_BUILTIN_VPMSUMD:
+ case CRYPTO_BUILTIN_VPMSUM:
+ case MISC_BUILTIN_ADDG6S:
+ case MISC_BUILTIN_DIVWEU:
+ case MISC_BUILTIN_DIVWEUO:
+ case MISC_BUILTIN_DIVDEU:
+ case MISC_BUILTIN_DIVDEUO:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@@ -12599,6 +14978,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
case VSX_BUILTIN_XXSEL_8HI_UNS:
case VSX_BUILTIN_XXSEL_4SI_UNS:
case VSX_BUILTIN_XXSEL_2DI_UNS:
+ case CRYPTO_BUILTIN_VPERMXOR:
+ case CRYPTO_BUILTIN_VPERMXOR_V2DI:
+ case CRYPTO_BUILTIN_VPERMXOR_V4SI:
+ case CRYPTO_BUILTIN_VPERMXOR_V8HI:
+ case CRYPTO_BUILTIN_VPERMXOR_V16QI:
+ case CRYPTO_BUILTIN_VSHASIGMAW:
+ case CRYPTO_BUILTIN_VSHASIGMAD:
+ case CRYPTO_BUILTIN_VSHASIGMA:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@@ -12630,9 +15017,18 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
/* signed args, unsigned return. */
case VSX_BUILTIN_XVCVDPUXDS_UNS:
case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
+ case MISC_BUILTIN_UNPACK_TD:
+ case MISC_BUILTIN_UNPACK_V1TI:
h.uns_p[0] = 1;
break;
+ /* unsigned arguments for 128-bit pack instructions. */
+ case MISC_BUILTIN_PACK_TD:
+ case MISC_BUILTIN_PACK_V1TI:
+ h.uns_p[1] = 1;
+ h.uns_p[2] = 1;
+ break;
+
default:
break;
}
@@ -12740,8 +15136,23 @@ rs6000_common_init_builtins (void)
else
{
enum insn_code icode = d->icode;
- if (d->name == 0 || icode == CODE_FOR_nothing)
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+ (long unsigned)i);
+
+ continue;
+ }
+
+ if (icode == CODE_FOR_nothing)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+ d->name);
+
continue;
+ }
type = builtin_function_type (insn_data[icode].operand[0].mode,
insn_data[icode].operand[1].mode,
@@ -12780,8 +15191,23 @@ rs6000_common_init_builtins (void)
else
{
enum insn_code icode = d->icode;
- if (d->name == 0 || icode == CODE_FOR_nothing)
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+ (long unsigned)i);
+
+ continue;
+ }
+
+ if (icode == CODE_FOR_nothing)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+ d->name);
+
continue;
+ }
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
@@ -12842,8 +15268,23 @@ rs6000_common_init_builtins (void)
else
{
enum insn_code icode = d->icode;
- if (d->name == 0 || icode == CODE_FOR_nothing)
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+ (long unsigned)i);
+
+ continue;
+ }
+
+ if (icode == CODE_FOR_nothing)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+ d->name);
+
continue;
+ }
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
@@ -13631,7 +16072,7 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
static bool eliminated = false;
rtx ret;
- if (mode != SDmode)
+ if (mode != SDmode || TARGET_NO_SDMODE_STACK)
ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
else
{
@@ -13660,6 +16101,17 @@ rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
return ret;
}
+/* Return the mode to be used for memory when a secondary memory
+ location is needed. For SDmode values we need to use DDmode, in
+ all other cases we can use the same mode. */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+ if (mode == SDmode)
+ return DDmode;
+ return mode;
+}
+
static tree
rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
{
@@ -13690,29 +16142,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
return NULL_TREE;
}
-enum reload_reg_type {
- GPR_REGISTER_TYPE,
- VECTOR_REGISTER_TYPE,
- OTHER_REGISTER_TYPE
-};
+/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
+ on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+ only work on the traditional altivec registers, note if an altivec register
+ was chosen. */
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
{
- switch (rclass)
+ HOST_WIDE_INT regno;
+ enum reg_class rclass;
+
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+
+ if (!REG_P (reg))
+ return NO_REG_TYPE;
+
+ regno = REGNO (reg);
+ if (regno >= FIRST_PSEUDO_REGISTER)
{
- case GENERAL_REGS:
- case BASE_REGS:
- return GPR_REGISTER_TYPE;
+ if (!lra_in_progress && !reload_in_progress && !reload_completed)
+ return PSEUDO_REG_TYPE;
- case FLOAT_REGS:
- case ALTIVEC_REGS:
- case VSX_REGS:
- return VECTOR_REGISTER_TYPE;
+ regno = true_regnum (reg);
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+ return PSEUDO_REG_TYPE;
+ }
- default:
- return OTHER_REGISTER_TYPE;
+ gcc_assert (regno >= 0);
+
+ if (is_altivec && ALTIVEC_REGNO_P (regno))
+ *is_altivec = true;
+
+ rclass = rs6000_regno_regclass[regno];
+ return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+ different register classe is really a simple move. */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode)
+{
+ int size;
+
+ /* Add support for various direct moves available. In this function, we only
+ look at cases where we don't need any extra registers, and one or more
+ simple move insns are issued. At present, 32-bit integers are not allowed
+ in FPR/VSX registers. Single precision binary floating is not a simple
+ move because we need to convert to the single precision memory layout.
+ The 4-byte SDmode can be moved. */
+ size = GET_MODE_SIZE (mode);
+ if (TARGET_DIRECT_MOVE
+ && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+ && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+ || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+ || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+ special direct moves that involve allocating an extra register, return the
+ insn code of the helper function if there is such a function or
+ CODE_FOR_nothing if not. */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ bool ret = false;
+ enum insn_code icode = CODE_FOR_nothing;
+ int cost = 0;
+ int size = GET_MODE_SIZE (mode);
+
+ if (TARGET_POWERPC64)
+ {
+ if (size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reg_addr[mode].reload_vsx_gpr;
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reg_addr[mode].reload_gpr_vsx;
+ }
+ }
+
+ else if (mode == SFmode)
+ {
+ if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* xscvdpspn, mfvsrd, and. */
+ icode = reg_addr[mode].reload_gpr_vsx;
+ }
+
+ else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 2; /* mtvsrz, xscvspdpn. */
+ icode = reg_addr[mode].reload_vsx_gpr;
+ }
+ }
+ }
+
+ if (TARGET_POWERPC64 && size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reg_addr[mode].reload_vsx_gpr;
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reg_addr[mode].reload_gpr_vsx;
+ }
+ }
+
+ else if (!TARGET_POWERPC64 && size == 8)
+ {
+ /* Handle moving 64-bit values from GPRs to floating point registers on
+ power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+ values back together. Altivec register classes must be handled
+ specially since a different instruction is used, and the secondary
+ reload support requires a single instruction class in the scratch
+ register constraint. However, right now TFmode is not allowed in
+ Altivec registers, so the pattern will never match. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+ {
+ cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
+ icode = reg_addr[mode].reload_fpr_gpr;
+ }
}
+
+ if (icode != CODE_FOR_nothing)
+ {
+ ret = true;
+ if (sri)
+ {
+ sri->icode = icode;
+ sri->extra_cost = cost;
+ }
+ }
+
+ return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+ directly (simple move) or via a pattern that uses a single extra temporary
+ (using power8's direct move in this case. */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ /* Fall back to load/store reloads if either type is not a register. */
+ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+ return false;
+
+ /* If we haven't allocated registers yet, assume the move can be done for the
+ standard register types. */
+ if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+ || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+ || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+ return true;
+
+ /* Moves to the same set of registers is a simple move for non-specialized
+ registers. */
+ if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+ return true;
+
+ /* Check whether a simple move can be done directly. */
+ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+ {
+ if (sri)
+ {
+ sri->icode = CODE_FOR_nothing;
+ sri->extra_cost = 0;
+ }
+ return true;
+ }
+
+ /* Now check if we can do it in a few steps. */
+ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+ altivec_p);
}
/* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13738,11 +16387,35 @@ rs6000_secondary_reload (bool in_p,
bool default_p = false;
sri->icode = CODE_FOR_nothing;
+ icode = ((in_p)
+ ? reg_addr[mode].reload_load
+ : reg_addr[mode].reload_store);
- /* Convert vector loads and stores into gprs to use an additional base
- register. */
- icode = rs6000_vector_reload[mode][in_p != false];
- if (icode != CODE_FOR_nothing)
+ if (REG_P (x) || register_operand (x, mode))
+ {
+ enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+ bool altivec_p = (rclass == ALTIVEC_REGS);
+ enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+ if (!in_p)
+ {
+ enum rs6000_reg_type exchange = to_type;
+ to_type = from_type;
+ from_type = exchange;
+ }
+
+ /* Can we do a direct move of some sort? */
+ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+ altivec_p))
+ {
+ icode = (enum insn_code)sri->icode;
+ default_p = false;
+ ret = NO_REGS;
+ }
+ }
+
+ /* Handle vector moves with reload helper functions. */
+ if (ret == ALL_REGS && icode != CODE_FOR_nothing)
{
ret = NO_REGS;
sri->icode = CODE_FOR_nothing;
@@ -13754,11 +16427,20 @@ rs6000_secondary_reload (bool in_p,
/* Loads to and stores from gprs can do reg+offset, and wouldn't need
an extra register in that case, but it would need an extra
- register if the addressing is reg+reg or (reg+reg)&(-16). */
+ register if the addressing is reg+reg or (reg+reg)&(-16). Special
+ case load/store quad. */
if (rclass == GENERAL_REGS || rclass == BASE_REGS)
{
- if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (TImode, addr,
+ if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+ && GET_MODE_SIZE (mode) == 16
+ && quad_memory_operand (x, mode))
+ {
+ sri->icode = icode;
+ sri->extra_cost = 2;
+ }
+
+ else if (!legitimate_indirect_address_p (addr, false)
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
false, true))
{
sri->icode = icode;
@@ -13768,8 +16450,20 @@ rs6000_secondary_reload (bool in_p,
+ ((GET_CODE (addr) == AND) ? 1 : 0));
}
}
+ /* Allow scalar loads to/from the traditional floating point
+ registers, even if VSX memory is set. */
+ else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
+ && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+ && (legitimate_indirect_address_p (addr, false)
+ || legitimate_indirect_address_p (addr, false)
+ || rs6000_legitimate_offset_address_p (mode, addr,
+ false, true)))
+
+ ;
/* Loads to and stores from vector registers can only do reg+reg
- addressing. Altivec registers can also do (reg+reg)&(-16). */
+ addressing. Altivec registers can also do (reg+reg)&(-16). Allow
+ scalar modes loading up the traditional floating point registers
+ to use offset addresses. */
else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
|| rclass == FLOAT_REGS || rclass == NO_REGS)
{
@@ -13813,12 +16507,12 @@ rs6000_secondary_reload (bool in_p,
else
{
enum reg_class xclass = REGNO_REG_CLASS (regno);
- enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
- enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+ enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+ enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
/* If memory is needed, use default_secondary_reload to create the
stack slot. */
- if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+ if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
default_p = true;
else
ret = NO_REGS;
@@ -13828,7 +16522,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
{
@@ -13867,7 +16561,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (!TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
{
@@ -13945,6 +16639,36 @@ rs6000_secondary_reload (bool in_p,
return ret;
}
+/* Better tracing for rs6000_secondary_reload_inner. */
+
+static void
+rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
+ bool store_p)
+{
+ rtx set, clobber;
+
+ gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
+
+ fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
+ store_p ? "store" : "load");
+
+ if (store_p)
+ set = gen_rtx_SET (VOIDmode, mem, reg);
+ else
+ set = gen_rtx_SET (VOIDmode, reg, mem);
+
+ clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
+ debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+}
+
+static void
+rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
+ bool store_p)
+{
+ rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
+ gcc_unreachable ();
+}
+
/* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
to SP+reg addressing. */
@@ -13963,21 +16687,16 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
rtx cc_clobber;
if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "\nrs6000_secondary_reload_inner, type = %s\n",
- store_p ? "store" : "load");
- fprintf (stderr, "reg:\n");
- debug_rtx (reg);
- fprintf (stderr, "mem:\n");
- debug_rtx (mem);
- fprintf (stderr, "scratch:\n");
- debug_rtx (scratch);
- }
+ rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
+
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+ if (GET_CODE (mem) != MEM)
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
- gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
- gcc_assert (GET_CODE (mem) == MEM);
rclass = REGNO_REG_CLASS (regno);
- addr = XEXP (mem, 0);
+ addr = find_replacement (&XEXP (mem, 0));
switch (rclass)
{
@@ -13988,25 +16707,31 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
if (GET_CODE (addr) == AND)
{
and_op2 = XEXP (addr, 1);
- addr = XEXP (addr, 0);
+ addr = find_replacement (&XEXP (addr, 0));
}
if (GET_CODE (addr) == PRE_MODIFY)
{
- scratch_or_premodify = XEXP (addr, 0);
- gcc_assert (REG_P (scratch_or_premodify));
- gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
- addr = XEXP (addr, 1);
+ scratch_or_premodify = find_replacement (&XEXP (addr, 0));
+ if (!REG_P (scratch_or_premodify))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+ addr = find_replacement (&XEXP (addr, 1));
+ if (GET_CODE (addr) != PLUS)
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
}
if (GET_CODE (addr) == PLUS
&& (and_op2 != NULL_RTX
- || !rs6000_legitimate_offset_address_p (TImode, addr,
+ || !rs6000_legitimate_offset_address_p (PTImode, addr,
false, true)))
{
+ /* find_replacement already recurses into both operands of
+ PLUS so we don't need to call it here. */
addr_op1 = XEXP (addr, 0);
addr_op2 = XEXP (addr, 1);
- gcc_assert (legitimate_indirect_address_p (addr_op1, false));
+ if (!legitimate_indirect_address_p (addr_op1, false))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
if (!REG_P (addr_op2)
&& (GET_CODE (addr_op2) != CONST_INT
@@ -14034,7 +16759,7 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
scratch_or_premodify = scratch;
}
else if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (TImode, addr,
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
false, true))
{
if (TARGET_DEBUG_ADDR)
@@ -14050,9 +16775,21 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
}
break;
- /* Float/Altivec registers can only handle reg+reg addressing. Move
- other addresses into a scratch register. */
+ /* Float registers can do offset+reg addressing for scalar types. */
case FLOAT_REGS:
+ if (legitimate_indirect_address_p (addr, false) /* reg */
+ || legitimate_indexed_address_p (addr, false) /* reg+reg */
+ || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
+ && and_op2 == NULL_RTX
+ && scratch_or_premodify == scratch
+ && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+ break;
+
+ /* If this isn't a legacy floating point load/store, fall through to the
+ VSX defaults. */
+
+ /* VSX/Altivec registers can only handle reg+reg addressing. Move other
+ addresses into a scratch register. */
case VSX_REGS:
case ALTIVEC_REGS:
@@ -14066,42 +16803,43 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
|| !VECTOR_MEM_ALTIVEC_P (mode)))
{
and_op2 = XEXP (addr, 1);
- addr = XEXP (addr, 0);
+ addr = find_replacement (&XEXP (addr, 0));
}
/* If we aren't using a VSX load, save the PRE_MODIFY register and use it
as the address later. */
if (GET_CODE (addr) == PRE_MODIFY
- && (!VECTOR_MEM_VSX_P (mode)
+ && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
+ && (rclass != FLOAT_REGS
+ || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
|| and_op2 != NULL_RTX
|| !legitimate_indexed_address_p (XEXP (addr, 1), false)))
{
- scratch_or_premodify = XEXP (addr, 0);
- gcc_assert (legitimate_indirect_address_p (scratch_or_premodify,
- false));
- gcc_assert (GET_CODE (XEXP (addr, 1)) == PLUS);
- addr = XEXP (addr, 1);
+ scratch_or_premodify = find_replacement (&XEXP (addr, 0));
+ if (!legitimate_indirect_address_p (scratch_or_premodify, false))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
+
+ addr = find_replacement (&XEXP (addr, 1));
+ if (GET_CODE (addr) != PLUS)
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
}
if (legitimate_indirect_address_p (addr, false) /* reg */
|| legitimate_indexed_address_p (addr, false) /* reg+reg */
- || GET_CODE (addr) == PRE_MODIFY /* VSX pre-modify */
|| (GET_CODE (addr) == AND /* Altivec memory */
+ && rclass == ALTIVEC_REGS
&& GET_CODE (XEXP (addr, 1)) == CONST_INT
&& INTVAL (XEXP (addr, 1)) == -16
- && VECTOR_MEM_ALTIVEC_P (mode))
- || (rclass == FLOAT_REGS /* legacy float mem */
- && GET_MODE_SIZE (mode) == 8
- && and_op2 == NULL_RTX
- && scratch_or_premodify == scratch
- && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
+ && (legitimate_indirect_address_p (XEXP (addr, 0), false)
+ || legitimate_indexed_address_p (XEXP (addr, 0), false))))
;
else if (GET_CODE (addr) == PLUS)
{
addr_op1 = XEXP (addr, 0);
addr_op2 = XEXP (addr, 1);
- gcc_assert (REG_P (addr_op1));
+ if (!REG_P (addr_op1))
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
if (TARGET_DEBUG_ADDR)
{
@@ -14120,7 +16858,8 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
}
else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
- || GET_CODE (addr) == CONST_INT || REG_P (addr))
+ || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
+ || REG_P (addr))
{
if (TARGET_DEBUG_ADDR)
{
@@ -14136,12 +16875,12 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
}
else
- gcc_unreachable ();
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
break;
default:
- gcc_unreachable ();
+ rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
}
/* If the original address involved a pre-modify that we couldn't use the VSX
@@ -14188,7 +16927,7 @@ rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
/* Adjust the address if it changed. */
if (addr != XEXP (mem, 0))
{
- mem = change_address (mem, mode, addr);
+ mem = replace_equiv_address_nv (mem, addr);
if (TARGET_DEBUG_ADDR)
fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
}
@@ -14253,8 +16992,10 @@ rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
return;
}
-/* Allocate a 64-bit stack slot to be used for copying SDmode
- values through if this function has any SDmode references. */
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
+ this function has any SDmode references. If we are on a power7 or later, we
+ don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
+ can load/store the value. */
static void
rs6000_alloc_sdmode_stack_slot (void)
@@ -14264,6 +17005,13 @@ rs6000_alloc_sdmode_stack_slot (void)
gimple_stmt_iterator gsi;
gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+ /* We use a different approach for dealing with the secondary
+ memory in LRA. */
+ if (ira_use_lra_p)
+ return;
+
+ if (TARGET_NO_SDMODE_STACK)
+ return;
FOR_EACH_BB (bb)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
@@ -14325,8 +17073,7 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
{
enum machine_mode mode = GET_MODE (x);
- if (VECTOR_UNIT_VSX_P (mode)
- && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+ if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
return rclass;
if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
@@ -14334,8 +17081,14 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
&& easy_vector_constant (x, mode))
return ALTIVEC_REGS;
- if (CONSTANT_P (x) && reg_classes_intersect_p (rclass, FLOAT_REGS))
+ if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
+ {
+ if (reg_class_subset_p (GENERAL_REGS, rclass))
+ return GENERAL_REGS;
+ if (reg_class_subset_p (BASE_REGS, rclass))
+ return BASE_REGS;
return NO_REGS;
+ }
if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
return GENERAL_REGS;
@@ -14349,7 +17102,8 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
if (GET_MODE_SIZE (mode) <= 8)
return FLOAT_REGS;
- if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode))
+ if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
+ || mode == V1TImode)
return ALTIVEC_REGS;
return rclass;
@@ -14381,42 +17135,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
set and vice versa. */
static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- if (class1 == class2)
- return false;
-
- /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
- ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
- between these classes. But we need memory for other things that can go in
- FLOAT_REGS like SFmode. */
- if (TARGET_VSX
- && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
- && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
- || class1 == FLOAT_REGS))
- return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
- && class2 != FLOAT_REGS);
+ enum rs6000_reg_type from_type, to_type;
+ bool altivec_p = ((from_class == ALTIVEC_REGS)
+ || (to_class == ALTIVEC_REGS));
- if (class1 == VSX_REGS || class2 == VSX_REGS)
- return true;
-
- if (class1 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ /* If a simple/direct move is available, we don't need secondary memory */
+ from_type = reg_class_to_reg_type[(int)from_class];
+ to_type = reg_class_to_reg_type[(int)to_class];
- if (class2 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ if (rs6000_secondary_reload_move (to_type, from_type, mode,
+ (secondary_reload_info *)0, altivec_p))
+ return false;
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ /* If we have a floating point or vector register class, we need to use
+ memory to transfer the data. */
+ if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
return true;
return false;
@@ -14424,17 +17161,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
/* Debug version of rs6000_secondary_memory_needed. */
static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+ bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
fprintf (stderr,
- "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
- "class2 = %s, mode = %s\n",
- ret ? "true" : "false", reg_class_names[class1],
- reg_class_names[class2], GET_MODE_NAME (mode));
+ "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+ "to_class = %s, mode = %s\n",
+ ret ? "true" : "false",
+ reg_class_names[from_class],
+ reg_class_names[to_class],
+ GET_MODE_NAME (mode));
return ret;
}
@@ -14498,14 +17237,20 @@ rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
/* Constants, memory, and FP registers can go into FP registers. */
if ((regno == -1 || FP_REGNO_P (regno))
&& (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
- return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+ return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
/* Memory, and FP/altivec registers can go into fp/altivec registers under
- VSX. */
+ VSX. However, for scalar variables, use the traditional floating point
+ registers so that we can use offset+register addressing. */
if (TARGET_VSX
&& (regno == -1 || VSX_REGNO_P (regno))
&& VSX_REG_CLASS_P (rclass))
+ {
+ if (GET_MODE_SIZE (mode) < 16)
+ return FLOAT_REGS;
+
return NO_REGS;
+ }
/* Memory, and AltiVec registers can go into AltiVec registers. */
if ((regno == -1 || ALTIVEC_REGNO_P (regno))
@@ -14550,8 +17295,42 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
if (from_size != to_size)
{
enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
- return ((from_size < 8 || to_size < 8 || TARGET_IEEEQUAD)
- && reg_classes_intersect_p (xclass, rclass));
+
+ if (reg_classes_intersect_p (xclass, rclass))
+ {
+ unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
+ unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+
+ /* Don't allow 64-bit types to overlap with 128-bit types that take a
+ single register under VSX because the scalar part of the register
+ is in the upper 64-bits, and not the lower 64-bits. Types like
+ TFmode/TDmode that take 2 scalar register can overlap. 128-bit
+ IEEE floating point can't overlap, and neither can small
+ values. */
+
+ if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+ return true;
+
+ /* TDmode in floating-mode registers must always go into a register
+ pair with the most significant word in the even-numbered register
+ to match ISA requirements. In little-endian mode, this does not
+ match subreg numbering, so we cannot allow subregs. */
+ if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
+ return true;
+
+ if (from_size < 8 || to_size < 8)
+ return true;
+
+ if (from_size == 8 && (8 * to_nregs) != to_size)
+ return true;
+
+ if (to_size == 8 && (8 * from_nregs) != from_size)
+ return true;
+
+ return false;
+ }
+ else
+ return false;
}
if (TARGET_E500_DOUBLE
@@ -14565,9 +17344,18 @@ rs6000_cannot_change_mode_class (enum machine_mode from,
/* Since the VSX register set includes traditional floating point registers
and altivec registers, just check for the size being different instead of
trying to check whether the modes are vector modes. Otherwise it won't
- allow say DF and DI to change classes. */
+ allow say DF and DI to change classes. For types like TFmode and TDmode
+ that take 2 64-bit registers, rather than a single 128-bit register, don't
+ allow subregs of those types to other 128 bit types. */
if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
+ {
+ unsigned num_regs = (from_size + 15) / 16;
+ if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
+ || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
+ return true;
+
return (from_size != 8 && from_size != 16);
+ }
if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
&& (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
@@ -14598,6 +17386,186 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
return ret;
}
+/* Return a string to do a move operation of 128 bits of data. */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ enum machine_mode mode = GET_MODE (dest);
+ int dest_regno;
+ int src_regno;
+ bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
+ bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
+
+ if (REG_P (dest))
+ {
+ dest_regno = REGNO (dest);
+ dest_gpr_p = INT_REGNO_P (dest_regno);
+ dest_fp_p = FP_REGNO_P (dest_regno);
+ dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
+ dest_vsx_p = dest_fp_p | dest_vmx_p;
+ }
+ else
+ {
+ dest_regno = -1;
+ dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
+ }
+
+ if (REG_P (src))
+ {
+ src_regno = REGNO (src);
+ src_gpr_p = INT_REGNO_P (src_regno);
+ src_fp_p = FP_REGNO_P (src_regno);
+ src_vmx_p = ALTIVEC_REGNO_P (src_regno);
+ src_vsx_p = src_fp_p | src_vmx_p;
+ }
+ else
+ {
+ src_regno = -1;
+ src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
+ }
+
+ /* Register moves. */
+ if (dest_regno >= 0 && src_regno >= 0)
+ {
+ if (dest_gpr_p)
+ {
+ if (src_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+ return "#";
+ }
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (src_vsx_p)
+ return "xxlor %x0,%x1,%x1";
+
+ else if (TARGET_DIRECT_MOVE && src_gpr_p)
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
+ return "vor %0,%1,%1";
+
+ else if (dest_fp_p && src_fp_p)
+ return "#";
+ }
+
+ /* Loads. */
+ else if (dest_regno >= 0 && MEM_P (src))
+ {
+ if (dest_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+ return "lq %0,%1";
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_vmx_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "lvx %0,%y1";
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "lxvw4x %x0,%y1";
+ else
+ return "lxvd2x %x0,%y1";
+ }
+
+ else if (TARGET_ALTIVEC && dest_vmx_p)
+ return "lvx %0,%y1";
+
+ else if (dest_fp_p)
+ return "#";
+ }
+
+ /* Stores. */
+ else if (src_regno >= 0 && MEM_P (dest))
+ {
+ if (src_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
+ return "stq %1,%0";
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && src_vmx_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "stvx %1,%y0";
+
+ else if (TARGET_VSX && src_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "stxvw4x %x1,%y0";
+ else
+ return "stxvd2x %x1,%y0";
+ }
+
+ else if (TARGET_ALTIVEC && src_vmx_p)
+ return "stvx %1,%y0";
+
+ else if (src_fp_p)
+ return "#";
+ }
+
+ /* Constants. */
+ else if (dest_regno >= 0
+ && (GET_CODE (src) == CONST_INT
+ || GET_CODE (src) == CONST_DOUBLE
+ || GET_CODE (src) == CONST_VECTOR))
+ {
+ if (dest_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+ return "xxlxor %x0,%x0,%x0";
+
+ else if (TARGET_ALTIVEC && dest_vmx_p)
+ return output_vec_const_move (operands);
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n===== Bad 128 bit move:\n");
+ debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+ }
+
+ gcc_unreachable ();
+}
+
+/* Validate a 128-bit move. */
+bool
+rs6000_move_128bit_ok_p (rtx operands[])
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ return (gpc_reg_operand (operands[0], mode)
+ || gpc_reg_operand (operands[1], mode));
+}
+
+/* Return true if a 128-bit move needs to be split. */
+bool
+rs6000_split_128bit_ok_p (rtx operands[])
+{
+ if (!reload_completed)
+ return false;
+
+ if (!gpr_or_gpr_p (operands[0], operands[1]))
+ return false;
+
+ if (quad_load_store_p (operands[0], operands[1]))
+ return false;
+
+ return true;
+}
+
+
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
@@ -14823,6 +17791,7 @@ rs6000_output_function_entry (FILE *file, const char *fname)
ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
break;
+ case ABI_ELFv2:
case ABI_V4:
case ABI_DARWIN:
break;
@@ -15302,7 +18271,7 @@ print_operand (FILE *file, rtx x, int code)
return;
case 'Y':
- /* Like 'L', for third word of TImode */
+ /* Like 'L', for third word of TImode/PTImode */
if (REG_P (x))
fputs (reg_names[REGNO (x) + 2], file);
else if (MEM_P (x))
@@ -15352,7 +18321,7 @@ print_operand (FILE *file, rtx x, int code)
return;
case 'Z':
- /* Like 'L', for last word of TImode. */
+ /* Like 'L', for last word of TImode/PTImode. */
if (REG_P (x))
fputs (reg_names[REGNO (x) + 3], file);
else if (MEM_P (x))
@@ -15383,7 +18352,8 @@ print_operand (FILE *file, rtx x, int code)
if ((TARGET_SPE || TARGET_E500_DOUBLE)
&& (GET_MODE_SIZE (GET_MODE (x)) == 8
|| GET_MODE (x) == TFmode
- || GET_MODE (x) == TImode))
+ || GET_MODE (x) == TImode
+ || GET_MODE (x) == PTImode))
{
/* Handle [reg]. */
if (REG_P (tmp))
@@ -16766,7 +19736,8 @@ emit_unlikely_jump (rtx cond, rtx label)
}
/* A subroutine of the atomic operation splitters. Emit a load-locked
- instruction in MODE. */
+ instruction in MODE. For QI/HImode, possibly use a pattern than includes
+ the zero_extend operation. */
static void
emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -16775,12 +19746,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
switch (mode)
{
+ case QImode:
+ fn = gen_load_lockedqi;
+ break;
+ case HImode:
+ fn = gen_load_lockedhi;
+ break;
case SImode:
+ if (GET_MODE (mem) == QImode)
+ fn = gen_load_lockedqi_si;
+ else if (GET_MODE (mem) == HImode)
+ fn = gen_load_lockedhi_si;
+ else
fn = gen_load_lockedsi;
break;
case DImode:
fn = gen_load_lockeddi;
break;
+ case TImode:
+ fn = gen_load_lockedti;
+ break;
default:
gcc_unreachable ();
}
@@ -16797,12 +19782,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
switch (mode)
{
+ case QImode:
+ fn = gen_store_conditionalqi;
+ break;
+ case HImode:
+ fn = gen_store_conditionalhi;
+ break;
case SImode:
fn = gen_store_conditionalsi;
break;
case DImode:
fn = gen_store_conditionaldi;
break;
+ case TImode:
+ fn = gen_store_conditionalti;
+ break;
default:
gcc_unreachable ();
}
@@ -16949,7 +19943,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
{
rtx boolval, retval, mem, oldval, newval, cond;
rtx label1, label2, x, mask, shift;
- enum machine_mode mode;
+ enum machine_mode mode, orig_mode;
enum memmodel mod_s, mod_f;
bool is_weak;
@@ -16961,15 +19955,21 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
is_weak = (INTVAL (operands[5]) != 0);
mod_s = (enum memmodel) INTVAL (operands[6]);
mod_f = (enum memmodel) INTVAL (operands[7]);
- mode = GET_MODE (mem);
+ orig_mode = mode = GET_MODE (mem);
mask = shift = NULL_RTX;
if (mode == QImode || mode == HImode)
{
+ /* Before power8, we didn't have access to lbarx/lharx, so generate a
+ lwarx and shift/mask operations. With power8, we need to do the
+ comparison in SImode, but the store is still done in QI/HImode. */
+ oldval = convert_modes (SImode, mode, oldval, 1);
+
+ if (!TARGET_SYNC_HI_QI)
+ {
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
/* Shift and mask OLDVAL into position with the word. */
- oldval = convert_modes (SImode, mode, oldval, 1);
oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
@@ -16977,6 +19977,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
newval = convert_modes (SImode, mode, newval, 1);
newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ }
/* Prepare to adjust the return value. */
retval = gen_reg_rtx (SImode);
@@ -17005,7 +20006,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
}
cond = gen_reg_rtx (CCmode);
+ /* If we have TImode, synthesize a comparison. */
+ if (mode != TImode)
x = gen_rtx_COMPARE (CCmode, x, oldval);
+ else
+ {
+ rtx xor1_result = gen_reg_rtx (DImode);
+ rtx xor2_result = gen_reg_rtx (DImode);
+ rtx or_result = gen_reg_rtx (DImode);
+ rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+ rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+ rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+ rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+ emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+ emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+ emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+ x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+ }
+
emit_insn (gen_rtx_SET (VOIDmode, cond, x));
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17015,7 +20034,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
if (mask)
x = rs6000_mask_atomic_subword (retval, newval, mask);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (orig_mode, cond, mem, x);
if (!is_weak)
{
@@ -17033,6 +20052,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
if (shift)
rs6000_finish_atomic_subword (operands[1], retval, shift);
+ else if (mode != GET_MODE (operands[1]))
+ convert_move (operands[1], retval, 1);
/* In all cases, CR0 contains EQ on success, and NE on failure. */
x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17056,7 +20077,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
mode = GET_MODE (mem);
mask = shift = NULL_RTX;
- if (mode == QImode || mode == HImode)
+ if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
{
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
@@ -17105,12 +20126,28 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
enum machine_mode mode = GET_MODE (mem);
+ enum machine_mode store_mode = mode;
rtx label, x, cond, mask, shift;
rtx before = orig_before, after = orig_after;
mask = shift = NULL_RTX;
+ /* On power8, we want to use SImode for the operation. On previous systems,
+ use the operation in a subword and shift/mask to get the proper byte or
+ halfword. */
if (mode == QImode || mode == HImode)
{
+ if (TARGET_SYNC_HI_QI)
+ {
+ val = convert_modes (SImode, mode, val, 1);
+
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ mode = SImode;
+ }
+ else
+ {
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
/* Shift and mask VAL into position with the word. */
@@ -17151,7 +20188,8 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
before = gen_reg_rtx (SImode);
if (after)
after = gen_reg_rtx (SImode);
- mode = SImode;
+ store_mode = mode = SImode;
+ }
}
mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17184,9 +20222,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
x = rs6000_mask_atomic_subword (before, x, mask);
}
+ else if (store_mode != mode)
+ x = convert_modes (store_mode, mode, x, 1);
cond = gen_reg_rtx (CCmode);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (store_mode, cond, mem, x);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
emit_unlikely_jump (x, label);
@@ -17195,11 +20235,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
if (shift)
{
+ /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+ then do the calcuations in a SImode register. */
if (orig_before)
rs6000_finish_atomic_subword (orig_before, before, shift);
if (orig_after)
rs6000_finish_atomic_subword (orig_after, after, shift);
}
+ else if (store_mode != mode)
+ {
+ /* QImode/HImode on machines with lbarx/lharx where we do the native
+ operation and then do the calcuations in a SImode register. */
+ if (orig_before)
+ convert_move (orig_before, before, 1);
+ if (orig_after)
+ convert_move (orig_after, after, 1);
+ }
else if (orig_after && after != orig_after)
emit_move_insn (orig_after, after);
}
@@ -17239,6 +20290,39 @@ rs6000_split_multireg_move (rtx dst, rtx src)
gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
+ /* TDmode residing in FP registers is special, since the ISA requires that
+ the lower-numbered word of a register pair is always the most significant
+ word, even in little-endian mode. This does not match the usual subreg
+ semantics, so we cannnot use simplify_gen_subreg in those cases. Access
+ the appropriate constituent registers "by hand" in little-endian mode.
+
+ Note we do not need to check for destructive overlap here since TDmode
+ can only reside in even/odd register pairs. */
+ if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
+ {
+ rtx p_src, p_dst;
+ int i;
+
+ for (i = 0; i < nregs; i++)
+ {
+ if (REG_P (src) && FP_REGNO_P (REGNO (src)))
+ p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
+ else
+ p_src = simplify_gen_subreg (reg_mode, src, mode,
+ i * reg_mode_size);
+
+ if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
+ p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
+ else
+ p_dst = simplify_gen_subreg (reg_mode, dst, mode,
+ i * reg_mode_size);
+
+ emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
+ }
+
+ return;
+ }
+
if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
{
/* Move register range backwards, if we might have destructive
@@ -17693,7 +20777,7 @@ rs6000_savres_strategy (rs6000_stack_t *info,
}
else
{
- gcc_checking_assert (DEFAULT_ABI == ABI_AIX);
+ gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
if (info->first_fp_reg_save > 61)
strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
@@ -17704,7 +20788,8 @@ rs6000_savres_strategy (rs6000_stack_t *info,
by the static chain. It would require too much fiddling and the
static chain is rarely used anyway. FPRs are saved w.r.t the stack
pointer on Darwin, and AIX uses r1 or r12. */
- if (using_static_chain_p && DEFAULT_ABI != ABI_AIX)
+ if (using_static_chain_p
+ && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
| SAVE_INLINE_GPRS
| SAVE_INLINE_VRS | REST_INLINE_VRS);
@@ -17837,6 +20922,34 @@ rs6000_savres_strategy (rs6000_stack_t *info,
The required alignment for AIX configurations is two words (i.e., 8
or 16 bytes).
+ The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
+
+ SP----> +---------------------------------------+
+ | Back chain to caller | 0
+ +---------------------------------------+
+ | Save area for CR | 8
+ +---------------------------------------+
+ | Saved LR | 16
+ +---------------------------------------+
+ | Saved TOC pointer | 24
+ +---------------------------------------+
+ | Parameter save area (P) | 32
+ +---------------------------------------+
+ | Alloca space (A) | 32+P
+ +---------------------------------------+
+ | Local variable space (L) | 32+P+A
+ +---------------------------------------+
+ | Save area for AltiVec registers (W) | 32+P+A+L
+ +---------------------------------------+
+ | AltiVec alignment padding (Y) | 32+P+A+L+W
+ +---------------------------------------+
+ | Save area for GP registers (G) | 32+P+A+L+W+Y
+ +---------------------------------------+
+ | Save area for FP registers (F) | 32+P+A+L+W+Y+G
+ +---------------------------------------+
+ old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
+ +---------------------------------------+
+
V.4 stack frames look like:
@@ -17897,6 +21010,7 @@ rs6000_stack_info (void)
rs6000_stack_t *info_ptr = &stack_info;
int reg_size = TARGET_32BIT ? 4 : 8;
int ehrd_size;
+ int ehcr_size;
int save_align;
int first_gp;
HOST_WIDE_INT non_fixed_size;
@@ -17990,6 +21104,18 @@ rs6000_stack_info (void)
else
ehrd_size = 0;
+ /* In the ELFv2 ABI, we also need to allocate space for separate
+ CR field save areas if the function calls __builtin_eh_return. */
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+ {
+ /* This hard-codes that we have three call-saved CR fields. */
+ ehcr_size = 3 * reg_size;
+ /* We do *not* use the regular CR save mechanism. */
+ info_ptr->cr_save_p = 0;
+ }
+ else
+ ehcr_size = 0;
+
/* Determine various sizes. */
info_ptr->reg_size = reg_size;
info_ptr->fixed_size = RS6000_SAVE_AREA;
@@ -18029,6 +21155,7 @@ rs6000_stack_info (void)
gcc_unreachable ();
case ABI_AIX:
+ case ABI_ELFv2:
case ABI_DARWIN:
info_ptr->fp_save_offset = - info_ptr->fp_size;
info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
@@ -18058,6 +21185,8 @@ rs6000_stack_info (void)
}
else
info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
+
+ info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
info_ptr->lr_save_offset = 2*reg_size;
break;
@@ -18120,6 +21249,7 @@ rs6000_stack_info (void)
+ info_ptr->spe_gp_size
+ info_ptr->spe_padding_size
+ ehrd_size
+ + ehcr_size
+ info_ptr->cr_size
+ info_ptr->vrsave_size,
save_align);
@@ -18133,7 +21263,7 @@ rs6000_stack_info (void)
/* Determine if we need to save the link register. */
if (info_ptr->calls_p
- || (DEFAULT_ABI == ABI_AIX
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& crtl->profile
&& !TARGET_PROFILE_KERNEL)
|| (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
@@ -18279,6 +21409,7 @@ debug_stack_info (rs6000_stack_t *info)
default: abi_string = "Unknown"; break;
case ABI_NONE: abi_string = "NONE"; break;
case ABI_AIX: abi_string = "AIX"; break;
+ case ABI_ELFv2: abi_string = "ELFv2"; break;
case ABI_DARWIN: abi_string = "Darwin"; break;
case ABI_V4: abi_string = "V.4"; break;
}
@@ -18400,7 +21531,8 @@ rs6000_return_addr (int count, rtx frame)
/* Currently we don't optimize very well between prolog and body
code and for PIC code the code can be actually quite bad, so
don't try to be too clever here. */
- if (count != 0 || (DEFAULT_ABI != ABI_AIX && flag_pic))
+ if (count != 0
+ || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
{
cfun->machine->ra_needs_full_frame = 1;
@@ -18459,13 +21591,13 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
return false;
}
- /* Under the AIX ABI we can't allow calls to non-local functions,
- because the callee may have a different TOC pointer to the
- caller and there's no way to ensure we restore the TOC when we
- return. With the secure-plt SYSV ABI we can't make non-local
+ /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
+ functions, because the callee may have a different TOC pointer to
+ the caller and there's no way to ensure we restore the TOC when
+ we return. With the secure-plt SYSV ABI we can't make non-local
calls when -fpic/PIC because the plt call stubs use r30. */
if (DEFAULT_ABI == ABI_DARWIN
- || (DEFAULT_ABI == ABI_AIX
+ || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& decl
&& !DECL_EXTERNAL (decl)
&& (*targetm.binds_local_p) (decl))
@@ -18566,7 +21698,7 @@ rs6000_emit_load_toc_table (int fromprolog)
rtx dest;
dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
- if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI != ABI_AIX && flag_pic)
+ if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
{
char buf[30];
rtx lab, tmp1, tmp2, got;
@@ -18594,7 +21726,7 @@ rs6000_emit_load_toc_table (int fromprolog)
emit_insn (gen_load_toc_v4_pic_si ());
emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
}
- else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
+ else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
{
char buf[30];
rtx temp0 = (fromprolog
@@ -18642,7 +21774,7 @@ rs6000_emit_load_toc_table (int fromprolog)
}
else
{
- gcc_assert (DEFAULT_ABI == ABI_AIX);
+ gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
if (TARGET_32BIT)
emit_insn (gen_load_toc_aix_si (dest));
@@ -19047,7 +22179,7 @@ output_probe_stack_range (rtx reg1, rtx reg2)
static rtx
rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
- rtx reg2, rtx rreg)
+ rtx reg2, rtx rreg, rtx split_reg)
{
rtx real, temp;
@@ -19138,6 +22270,11 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
}
}
+ /* If a store insn has been split into multiple insns, the
+ true source register is given by split_reg. */
+ if (split_reg != NULL_RTX)
+ real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
+
RTX_FRAME_RELATED_P (insn) = 1;
add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
@@ -19245,7 +22382,7 @@ emit_frame_save (rtx frame_reg, enum machine_mode mode,
reg = gen_rtx_REG (mode, regno);
insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
}
/* Emit an offset memory reference suitable for a frame store, while
@@ -19361,7 +22498,7 @@ rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
if ((sel & SAVRES_LR))
suffix = "_x";
}
- else if (DEFAULT_ABI == ABI_AIX)
+ else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
{
#if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
/* No out-of-line save/restore routines for GPRs on AIX. */
@@ -19502,7 +22639,7 @@ rs6000_emit_stack_reset (rs6000_stack_t *info,
static inline unsigned
ptr_regno_for_savres (int sel)
{
- if (DEFAULT_ABI == ABI_AIX)
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
}
@@ -19587,6 +22724,43 @@ rs6000_emit_savres_rtx (rs6000_stack_t *info,
return insn;
}
+/* Emit code to store CR fields that need to be saved into REG. */
+
+static void
+rs6000_emit_move_from_cr (rtx reg)
+{
+ /* Only the ELFv2 ABI allows storing only selected fields. */
+ if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
+ {
+ int i, cr_reg[8], count = 0;
+
+ /* Collect CR fields that must be saved. */
+ for (i = 0; i < 8; i++)
+ if (save_reg_p (CR0_REGNO + i))
+ cr_reg[count++] = i;
+
+ /* If it's just a single one, use mfcrf. */
+ if (count == 1)
+ {
+ rtvec p = rtvec_alloc (1);
+ rtvec r = rtvec_alloc (2);
+ RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
+ RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
+ RTVEC_ELT (p, 0)
+ = gen_rtx_SET (VOIDmode, reg,
+ gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
+
+ emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+ return;
+ }
+
+ /* ??? It might be better to handle count == 2 / 3 cases here
+ as well, using logical operations to combine the values. */
+ }
+
+ emit_insn (gen_movesi_from_cr (reg));
+}
+
/* Determine whether the gp REG is really used. */
static bool
@@ -19652,6 +22826,17 @@ rs6000_emit_prologue (void)
#define NOT_INUSE(R) do {} while (0)
#endif
+ if (DEFAULT_ABI == ABI_ELFv2)
+ {
+ cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
+
+ /* With -mminimal-toc we may generate an extra use of r2 below. */
+ if (!TARGET_SINGLE_PIC_BASE
+ && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+ cfun->machine->r2_setup_needed = true;
+ }
+
+
if (flag_stack_usage_info)
current_function_static_stack_size = info->total_size;
@@ -19766,7 +22951,7 @@ rs6000_emit_prologue (void)
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
- treg, GEN_INT (-info->total_size));
+ treg, GEN_INT (-info->total_size), NULL_RTX);
sp_off = frame_off = info->total_size;
}
@@ -19851,14 +23036,14 @@ rs6000_emit_prologue (void)
insn = emit_move_insn (mem, reg);
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
END_USE (0);
}
}
/* If we need to save CR, put it into r12 or r11. Choose r12 except when
r12 will be needed by out-of-line gpr restore. */
- cr_save_regno = (DEFAULT_ABI == ABI_AIX
+ cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& !(strategy & (SAVE_INLINE_GPRS
| SAVE_NOINLINE_GPRS_SAVES_LR))
? 11 : 12);
@@ -19867,21 +23052,9 @@ rs6000_emit_prologue (void)
&& REGNO (frame_reg_rtx) != cr_save_regno
&& !(using_static_chain_p && cr_save_regno == 11))
{
- rtx set;
-
cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
START_USE (cr_save_regno);
- insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
- RTX_FRAME_RELATED_P (insn) = 1;
- /* Now, there's no way that dwarf2out_frame_debug_expr is going
- to understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)'.
- But that's OK. All we have to do is specify that _one_ condition
- code register is saved in this stack slot. The thrower's epilogue
- will then restore all the call-saved registers.
- We use CR2_REGNO (70) to be compatible with gcc-2.95 on Linux. */
- set = gen_rtx_SET (VOIDmode, cr_save_rtx,
- gen_rtx_REG (SImode, CR2_REGNO));
- add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
+ rs6000_emit_move_from_cr (cr_save_rtx);
}
/* Do any required saving of fpr's. If only one or two to save, do
@@ -19919,7 +23092,7 @@ rs6000_emit_prologue (void)
info->lr_save_offset,
DFmode, sel);
rs6000_frame_related (insn, ptr_reg, sp_off,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
if (lr)
END_USE (0);
}
@@ -19998,7 +23171,7 @@ rs6000_emit_prologue (void)
SAVRES_SAVE | SAVRES_GPR);
rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
}
/* Move the static chain pointer back. */
@@ -20048,7 +23221,7 @@ rs6000_emit_prologue (void)
info->lr_save_offset + ptr_off,
reg_mode, sel);
rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
if (lr)
END_USE (0);
}
@@ -20064,7 +23237,7 @@ rs6000_emit_prologue (void)
info->gp_save_offset + frame_off + reg_size * i);
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
}
else if (!WORLD_SAVE_P (info))
{
@@ -20133,7 +23306,8 @@ rs6000_emit_prologue (void)
be updated if we arrived at this function via a plt call or
toc adjusting stub. */
emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
- toc_restore_insn = TARGET_32BIT ? 0x80410014 : 0xE8410028;
+ toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
+ + RS6000_TOC_SAVE_SLOT);
hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
@@ -20152,7 +23326,7 @@ rs6000_emit_prologue (void)
LABEL_NUSES (toc_save_done) += 1;
save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
- TOC_REGNUM, frame_off + 5 * reg_size,
+ TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
sp_off - frame_off);
emit_label (toc_save_done);
@@ -20192,26 +23366,121 @@ rs6000_emit_prologue (void)
rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
GEN_INT (info->cr_save_offset + frame_off));
rtx mem = gen_frame_mem (SImode, addr);
- /* See the large comment above about why CR2_REGNO is used. */
- rtx magic_eh_cr_reg = gen_rtx_REG (SImode, CR2_REGNO);
/* If we didn't copy cr before, do so now using r0. */
if (cr_save_rtx == NULL_RTX)
{
- rtx set;
-
START_USE (0);
cr_save_rtx = gen_rtx_REG (SImode, 0);
- insn = emit_insn (gen_movesi_from_cr (cr_save_rtx));
+ rs6000_emit_move_from_cr (cr_save_rtx);
+ }
+
+ /* Saving CR requires a two-instruction sequence: one instruction
+ to move the CR to a general-purpose register, and a second
+ instruction that stores the GPR to memory.
+
+ We do not emit any DWARF CFI records for the first of these,
+ because we cannot properly represent the fact that CR is saved in
+ a register. One reason is that we cannot express that multiple
+ CR fields are saved; another reason is that on 64-bit, the size
+ of the CR register in DWARF (4 bytes) differs from the size of
+ a general-purpose register.
+
+ This means if any intervening instruction were to clobber one of
+ the call-saved CR fields, we'd have incorrect CFI. To prevent
+ this from happening, we mark the store to memory as a use of
+ those CR fields, which prevents any such instruction from being
+ scheduled in between the two instructions. */
+ rtx crsave_v[9];
+ int n_crsave = 0;
+ int i;
+
+ crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
+ for (i = 0; i < 8; i++)
+ if (save_reg_p (CR0_REGNO + i))
+ crsave_v[n_crsave++]
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (n_crsave, crsave_v)));
+ END_USE (REGNO (cr_save_rtx));
+
+ /* Now, there's no way that dwarf2out_frame_debug_expr is going to
+ understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
+ so we need to construct a frame expression manually. */
RTX_FRAME_RELATED_P (insn) = 1;
- set = gen_rtx_SET (VOIDmode, cr_save_rtx, magic_eh_cr_reg);
+
+ /* Update address to be stack-pointer relative, like
+ rs6000_frame_related would do. */
+ addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
+ GEN_INT (info->cr_save_offset + sp_off));
+ mem = gen_frame_mem (SImode, addr);
+
+ if (DEFAULT_ABI == ABI_ELFv2)
+ {
+ /* In the ELFv2 ABI we generate separate CFI records for each
+ CR field that was actually saved. They all point to the
+ same 32-bit stack slot. */
+ rtx crframe[8];
+ int n_crframe = 0;
+
+ for (i = 0; i < 8; i++)
+ if (save_reg_p (CR0_REGNO + i))
+ {
+ crframe[n_crframe]
+ = gen_rtx_SET (VOIDmode, mem,
+ gen_rtx_REG (SImode, CR0_REGNO + i));
+
+ RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
+ n_crframe++;
+ }
+
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec_v (n_crframe, crframe)));
+ }
+ else
+ {
+ /* In other ABIs, by convention, we use a single CR regnum to
+ represent the fact that all call-saved CR fields are saved.
+ We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
+ rtx set = gen_rtx_SET (VOIDmode, mem,
+ gen_rtx_REG (SImode, CR2_REGNO));
add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
}
- insn = emit_move_insn (mem, cr_save_rtx);
- END_USE (REGNO (cr_save_rtx));
+ }
- rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
- NULL_RTX, NULL_RTX);
+ /* In the ELFv2 ABI we need to save all call-saved CR fields into
+ *separate* slots if the routine calls __builtin_eh_return, so
+ that they can be independently restored by the unwinder. */
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+ {
+ int i, cr_off = info->ehcr_offset;
+ rtx crsave;
+
+ /* ??? We might get better performance by using multiple mfocrf
+ instructions. */
+ crsave = gen_rtx_REG (SImode, 0);
+ emit_insn (gen_movesi_from_cr (crsave));
+
+ for (i = 0; i < 8; i++)
+ if (!call_used_regs[CR0_REGNO + i])
+ {
+ rtvec p = rtvec_alloc (2);
+ RTVEC_ELT (p, 0)
+ = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
+ RTVEC_ELT (p, 1)
+ = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
+
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ add_reg_note (insn, REG_FRAME_RELATED_EXPR,
+ gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
+ sp_reg_rtx, cr_off + sp_off));
+
+ cr_off += reg_size;
+ }
}
/* Update stack and set back pointer unless this is V.4,
@@ -20291,7 +23560,7 @@ rs6000_emit_prologue (void)
info->altivec_save_offset + ptr_off,
0, V4SImode, SAVRES_SAVE | SAVRES_VR);
rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
- NULL_RTX, NULL_RTX);
+ NULL_RTX, NULL_RTX, NULL_RTX);
if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
{
/* The oddity mentioned above clobbered our frame reg. */
@@ -20307,7 +23576,7 @@ rs6000_emit_prologue (void)
for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
{
- rtx areg, savereg, mem;
+ rtx areg, savereg, mem, split_reg;
int offset;
offset = (info->altivec_save_offset + frame_off
@@ -20325,8 +23594,18 @@ rs6000_emit_prologue (void)
insn = emit_move_insn (mem, savereg);
+ /* When we split a VSX store into two insns, we need to make
+ sure the DWARF info knows which register we are storing.
+ Pass it in to be used on the appropriate note. */
+ if (!BYTES_BIG_ENDIAN
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
+ split_reg = savereg;
+ else
+ split_reg = NULL_RTX;
+
rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
- areg, GEN_INT (offset));
+ areg, GEN_INT (offset), split_reg);
}
}
@@ -20350,7 +23629,8 @@ rs6000_emit_prologue (void)
be using r12 as frame_reg_rtx and r11 as the static chain
pointer for nested functions. */
save_regno = 12;
- if (DEFAULT_ABI == ABI_AIX && !using_static_chain_p)
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ && !using_static_chain_p)
save_regno = 11;
else if (REGNO (frame_reg_rtx) == 12)
{
@@ -20389,7 +23669,7 @@ rs6000_emit_prologue (void)
can use register 0. This allows us to use a plain 'blr' to return
from the procedure more often. */
int save_LR_around_toc_setup = (TARGET_ELF
- && DEFAULT_ABI != ABI_AIX
+ && DEFAULT_ABI == ABI_V4
&& flag_pic
&& ! info->lr_save_p
&& EDGE_COUNT (EXIT_BLOCK_PTR->preds) > 0);
@@ -20451,7 +23731,7 @@ rs6000_emit_prologue (void)
if (rs6000_save_toc_in_prologue_p ())
{
rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
- emit_insn (gen_frame_store (reg, sp_reg_rtx, 5 * reg_size));
+ emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
}
}
@@ -20492,6 +23772,49 @@ rs6000_output_function_prologue (FILE *file,
}
}
+ /* ELFv2 ABI r2 setup code and local entry point. This must follow
+ immediately after the global entry point label. */
+ if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
+ {
+ const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
+
+ fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
+ fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
+
+ fputs ("\t.localentry\t", file);
+ assemble_name (file, name);
+ fputs (",.-", file);
+ assemble_name (file, name);
+ fputs ("\n", file);
+ }
+
+ /* Output -mprofile-kernel code. This needs to be done here instead of
+ in output_function_profile since it must go after the ELFv2 ABI
+ local entry point. */
+ if (TARGET_PROFILE_KERNEL)
+ {
+ gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
+ gcc_assert (!TARGET_32BIT);
+
+ asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
+ asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
+
+ /* In the ELFv2 ABI we have no compiler stack word. It must be
+ the resposibility of _mcount to preserve the static chain
+ register if required. */
+ if (DEFAULT_ABI != ABI_ELFv2
+ && cfun->static_chain_decl != NULL)
+ {
+ asm_fprintf (file, "\tstd %s,24(%s)\n",
+ reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+ fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+ asm_fprintf (file, "\tld %s,24(%s)\n",
+ reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
+ }
+ else
+ fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
+ }
+
rs6000_pic_labelno++;
}
@@ -20544,6 +23867,7 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
if (using_mfcr_multiple && count > 1)
{
+ rtx insn;
rtvec p;
int ndx;
@@ -20561,16 +23885,43 @@ restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
ndx++;
}
- emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
+ insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
gcc_assert (ndx == count);
+
+ /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+ CR field separately. */
+ if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+ {
+ for (i = 0; i < 8; i++)
+ if (save_reg_p (CR0_REGNO + i))
+ add_reg_note (insn, REG_CFA_RESTORE,
+ gen_rtx_REG (SImode, CR0_REGNO + i));
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
}
else
for (i = 0; i < 8; i++)
if (save_reg_p (CR0_REGNO + i))
- emit_insn (gen_movsi_to_cr_one (gen_rtx_REG (CCmode, CR0_REGNO + i),
- reg));
+ {
+ rtx insn = emit_insn (gen_movsi_to_cr_one
+ (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
- if (!exit_func && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
+ /* For the ELFv2 ABI we generate a CFA_RESTORE for each
+ CR field separately, attached to the insn that in fact
+ restores this particular CR field. */
+ if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
+ {
+ add_reg_note (insn, REG_CFA_RESTORE,
+ gen_rtx_REG (SImode, CR0_REGNO + i));
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+ }
+
+ /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
+ if (!exit_func && DEFAULT_ABI != ABI_ELFv2
+ && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
{
rtx insn = get_last_insn ();
rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
@@ -20611,10 +23962,22 @@ restore_saved_lr (int regno, bool exit_func)
static rtx
add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
{
- if (info->cr_save_p)
+ if (DEFAULT_ABI == ABI_ELFv2)
+ {
+ int i;
+ for (i = 0; i < 8; i++)
+ if (save_reg_p (CR0_REGNO + i))
+ {
+ rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
+ cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
+ cfa_restores);
+ }
+ }
+ else if (info->cr_save_p)
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
gen_rtx_REG (SImode, CR2_REGNO),
cfa_restores);
+
if (info->lr_save_p)
cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
gen_rtx_REG (Pmode, LR_REGNO),
@@ -21112,6 +24475,35 @@ rs6000_emit_epilogue (int sibcall)
|| (!restoring_GPRs_inline
&& info->first_fp_reg_save == 64));
+ /* In the ELFv2 ABI we need to restore all call-saved CR fields from
+ *separate* slots if the routine calls __builtin_eh_return, so
+ that they can be independently restored by the unwinder. */
+ if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
+ {
+ int i, cr_off = info->ehcr_offset;
+
+ for (i = 0; i < 8; i++)
+ if (!call_used_regs[CR0_REGNO + i])
+ {
+ rtx reg = gen_rtx_REG (SImode, 0);
+ emit_insn (gen_frame_load (reg, frame_reg_rtx,
+ cr_off + frame_off));
+
+ insn = emit_insn (gen_movsi_to_cr_one
+ (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
+
+ if (!exit_func && flag_shrink_wrap)
+ {
+ add_reg_note (insn, REG_CFA_RESTORE,
+ gen_rtx_REG (SImode, CR0_REGNO + i));
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ }
+
+ cr_off += reg_size;
+ }
+ }
+
/* Get the old lr if we saved it. If we are restoring registers
out-of-line, then the out-of-line routines can do this for us. */
if (restore_lr && restoring_GPRs_inline)
@@ -21155,7 +24547,7 @@ rs6000_emit_epilogue (int sibcall)
{
rtx reg = gen_rtx_REG (reg_mode, 2);
emit_insn (gen_frame_load (reg, frame_reg_rtx,
- frame_off + 5 * reg_size));
+ frame_off + RS6000_TOC_SAVE_SLOT));
}
for (i = 0; ; ++i)
@@ -21441,6 +24833,7 @@ rs6000_emit_epilogue (int sibcall)
if (! restoring_FPRs_inline)
{
int i;
+ int reg;
rtx sym;
if (flag_shrink_wrap)
@@ -21449,10 +24842,9 @@ rs6000_emit_epilogue (int sibcall)
sym = rs6000_savres_routine_sym (info,
SAVRES_FPR | (lr ? SAVRES_LR : 0));
RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
- RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode,
- gen_rtx_REG (Pmode,
- DEFAULT_ABI == ABI_AIX
- ? 1 : 11));
+ reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
+ RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
+
for (i = 0; i < 64 - info->first_fp_reg_save; i++)
{
rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
@@ -21530,7 +24922,8 @@ rs6000_output_function_epilogue (FILE *file,
System V.4 Powerpc's (and the embedded ABI derived from it) use a
different traceback table. */
- if (DEFAULT_ABI == ABI_AIX && ! flag_inhibit_size_directive
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ && ! flag_inhibit_size_directive
&& rs6000_traceback != traceback_none && !cfun->is_thunk)
{
const char *fname = NULL;
@@ -21858,6 +25251,12 @@ rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
SIBLING_CALL_P (insn) = 1;
emit_barrier ();
+ /* Ensure we have a global entry point for the thunk. ??? We could
+ avoid that if the target routine doesn't need a global entry point,
+ but we do not know whether this is the case at this point. */
+ if (DEFAULT_ABI == ABI_ELFv2)
+ cfun->machine->r2_setup_needed = true;
+
/* Run just enough of rest_of_compilation to get the insns emitted.
There's not really enough bulk here to make other passes such as
instruction scheduling worth while. Note that use_thunk calls
@@ -22554,7 +25953,7 @@ output_profile_hook (int labelno ATTRIBUTE_UNUSED)
if (TARGET_PROFILE_KERNEL)
return;
- if (DEFAULT_ABI == ABI_AIX)
+ if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
{
#ifndef NO_PROFILE_COUNTERS
# define NO_PROFILE_COUNTERS 0
@@ -22698,29 +26097,9 @@ output_function_profiler (FILE *file, int labelno)
break;
case ABI_AIX:
+ case ABI_ELFv2:
case ABI_DARWIN:
- if (!TARGET_PROFILE_KERNEL)
- {
/* Don't do anything, done in output_profile_hook (). */
- }
- else
- {
- gcc_assert (!TARGET_32BIT);
-
- asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
- asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
-
- if (cfun->static_chain_decl != NULL)
- {
- asm_fprintf (file, "\tstd %s,24(%s)\n",
- reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
- fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
- asm_fprintf (file, "\tld %s,24(%s)\n",
- reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
- }
- else
- fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
- }
break;
}
}
@@ -22846,6 +26225,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|| rs6000_cpu_attr == CPU_POWER4
|| rs6000_cpu_attr == CPU_POWER5
|| rs6000_cpu_attr == CPU_POWER7
+ || rs6000_cpu_attr == CPU_POWER8
|| rs6000_cpu_attr == CPU_CELL)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
@@ -23128,7 +26508,8 @@ is_microcoded_insn (rtx insn)
if (rs6000_cpu_attr == CPU_CELL)
return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
- if (rs6000_sched_groups)
+ if (rs6000_sched_groups
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
if (type == TYPE_LOAD_EXT_U
@@ -23153,7 +26534,8 @@ is_cracked_insn (rtx insn)
|| GET_CODE (PATTERN (insn)) == CLOBBER)
return false;
- if (rs6000_sched_groups)
+ if (rs6000_sched_groups
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
if (type == TYPE_LOAD_U || type == TYPE_STORE_U
@@ -23432,6 +26814,8 @@ rs6000_issue_rate (void)
case CPU_POWER6:
case CPU_POWER7:
return 5;
+ case CPU_POWER8:
+ return 7;
default:
return 1;
}
@@ -24059,6 +27443,39 @@ insn_must_be_first_in_group (rtx insn)
break;
}
break;
+ case PROCESSOR_POWER8:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_CR_LOGICAL:
+ case TYPE_DELAYED_CR:
+ case TYPE_MFCR:
+ case TYPE_MFCRF:
+ case TYPE_MTCR:
+ case TYPE_COMPARE:
+ case TYPE_DELAYED_COMPARE:
+ case TYPE_VAR_DELAYED_COMPARE:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ case TYPE_SYNC:
+ case TYPE_ISYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_LOAD_EXT:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_UX:
+ case TYPE_VECSTORE:
+ case TYPE_MFJMPR:
+ case TYPE_MTJMPR:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -24137,6 +27554,25 @@ insn_must_be_last_in_group (rtx insn)
break;
}
break;
+ case PROCESSOR_POWER8:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_MFCR:
+ case TYPE_MTCR:
+ case TYPE_ISYNC:
+ case TYPE_SYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_UX:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -24226,8 +27662,9 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
if (can_issue_more && !is_branch_slot_insn (next_insn))
can_issue_more--;
- /* Power6 and Power7 have special group ending nop. */
- if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
+ /* Do we have a special group ending nop? */
+ if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+ || rs6000_cpu_attr == CPU_POWER8)
{
nop = gen_group_ending_nop ();
emit_insn_before (nop, next_insn);
@@ -24598,6 +28035,11 @@ rs6000_trampoline_size (void)
ret = (TARGET_32BIT) ? 12 : 24;
break;
+ case ABI_ELFv2:
+ gcc_assert (!TARGET_32BIT);
+ ret = 32;
+ break;
+
case ABI_DARWIN:
case ABI_V4:
ret = (TARGET_32BIT) ? 40 : 48;
@@ -24653,6 +28095,7 @@ rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
break;
/* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
+ case ABI_ELFv2:
case ABI_DARWIN:
case ABI_V4:
emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
@@ -24743,6 +28186,9 @@ rs6000_handle_altivec_attribute (tree *node,
unsigned_p = TYPE_UNSIGNED (type);
switch (mode)
{
+ case TImode:
+ result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+ break;
case DImode:
result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
break;
@@ -24947,7 +28393,7 @@ rs6000_ms_bitfield_layout_p (const_tree record_type)
static void
rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
{
- if (DEFAULT_ABI == ABI_AIX
+ if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& TARGET_MINIMAL_TOC
&& !TARGET_RELOCATABLE)
{
@@ -24968,7 +28414,8 @@ rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
else
fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
}
- else if (DEFAULT_ABI == ABI_AIX && !TARGET_RELOCATABLE)
+ else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
+ && !TARGET_RELOCATABLE)
fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
else
{
@@ -25518,7 +28965,7 @@ rs6000_elf_reloc_rw_mask (void)
{
if (flag_pic)
return 3;
- else if (DEFAULT_ABI == ABI_AIX)
+ else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
return 2;
else
return 0;
@@ -25594,7 +29041,7 @@ rs6000_elf_asm_out_destructor (rtx symbol, int priority)
void
rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
{
- if (TARGET_64BIT)
+ if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
{
fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
ASM_OUTPUT_LABEL (file, name);
@@ -25660,7 +29107,6 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
fprintf (file, "%s:\n", desc_name);
fprintf (file, "\t.long %s\n", orig_name);
fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
- if (DEFAULT_ABI == ABI_AIX)
fputs ("\t.long 0\n", file);
fprintf (file, "\t.previous\n");
}
@@ -25690,7 +29136,7 @@ rs6000_elf_file_end (void)
}
#endif
#if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
- if (TARGET_32BIT)
+ if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
file_end_indicate_exec_stack ();
#endif
}
@@ -25829,10 +29275,23 @@ rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
name, suffix[smclass], flags & SECTION_ENTSIZE);
}
+#define IN_NAMED_SECTION(DECL) \
+ ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
+ && DECL_SECTION_NAME (DECL) != NULL_TREE)
+
static section *
rs6000_xcoff_select_section (tree decl, int reloc,
- unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
+ unsigned HOST_WIDE_INT align)
{
+ /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
+ named section. */
+ if (align > BIGGEST_ALIGNMENT)
+ {
+ resolve_unique_section (decl, reloc, true);
+ if (IN_NAMED_SECTION (decl))
+ return get_named_section (decl, NULL, reloc);
+ }
+
if (decl_readonly_section (decl, reloc))
{
if (TREE_PUBLIC (decl))
@@ -25870,10 +29329,12 @@ rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
{
const char *name;
- /* Use select_section for private and uninitialized data. */
+ /* Use select_section for private data and uninitialized data with
+ alignment <= BIGGEST_ALIGNMENT. */
if (!TREE_PUBLIC (decl)
|| DECL_COMMON (decl)
- || DECL_INITIAL (decl) == NULL_TREE
+ || (DECL_INITIAL (decl) == NULL_TREE
+ && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
|| DECL_INITIAL (decl) == error_mark_node
|| (flag_zero_initialized_in_bss
&& initializer_zerop (DECL_INITIAL (decl))))
@@ -26430,7 +29891,8 @@ rs6000_register_move_cost (enum machine_mode mode,
/* For those processors that have slow LR/CTR moves, make them more
expensive than memory in order to bias spills to memory .*/
else if ((rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7)
+ || rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8)
&& reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
ret = 6 * hard_regno_nregs[0][mode];
@@ -26440,7 +29902,7 @@ rs6000_register_move_cost (enum machine_mode mode,
}
/* If we have VSX, we can easily move between FPR or Altivec registers. */
- else if (VECTOR_UNIT_VSX_P (mode)
+ else if (VECTOR_MEM_VSX_P (mode)
&& reg_classes_intersect_p (to, VSX_REGS)
&& reg_classes_intersect_p (from, VSX_REGS))
ret = 2 * hard_regno_nregs[32][mode];
@@ -26481,7 +29943,8 @@ rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
if (reg_classes_intersect_p (rclass, GENERAL_REGS))
ret = 4 * hard_regno_nregs[0][mode];
- else if (reg_classes_intersect_p (rclass, FLOAT_REGS))
+ else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
+ || reg_classes_intersect_p (rclass, VSX_REGS)))
ret = 4 * hard_regno_nregs[32][mode];
else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
@@ -26643,54 +30106,26 @@ rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
emit_insn (gen_rtx_SET (VOIDmode, dst, r));
}
-/* Newton-Raphson approximation of floating point divide with just 2 passes
- (either single precision floating point, or newer machines with higher
- accuracy estimates). Support both scalar and vector divide. Assumes no
- trapping math and finite arguments. */
+/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
+ add a reg_note saying that this was a division. Support both scalar and
+ vector divide. Assumes no trapping math and finite arguments. */
-static void
-rs6000_emit_swdiv_high_precision (rtx dst, rtx n, rtx d)
+void
+rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
{
enum machine_mode mode = GET_MODE (dst);
- rtx x0, e0, e1, y1, u0, v0;
- enum insn_code code = optab_handler (smul_optab, mode);
- insn_gen_fn gen_mul = GEN_FCN (code);
- rtx one = rs6000_load_constant_and_splat (mode, dconst1);
-
- gcc_assert (code != CODE_FOR_nothing);
-
- /* x0 = 1./d estimate */
- x0 = gen_reg_rtx (mode);
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
- UNSPEC_FRES)));
-
- e0 = gen_reg_rtx (mode);
- rs6000_emit_nmsub (e0, d, x0, one); /* e0 = 1. - (d * x0) */
-
- e1 = gen_reg_rtx (mode);
- rs6000_emit_madd (e1, e0, e0, e0); /* e1 = (e0 * e0) + e0 */
-
- y1 = gen_reg_rtx (mode);
- rs6000_emit_madd (y1, e1, x0, x0); /* y1 = (e1 * x0) + x0 */
-
- u0 = gen_reg_rtx (mode);
- emit_insn (gen_mul (u0, n, y1)); /* u0 = n * y1 */
-
- v0 = gen_reg_rtx (mode);
- rs6000_emit_nmsub (v0, d, u0, n); /* v0 = n - (d * u0) */
-
- rs6000_emit_madd (dst, v0, y1, u0); /* dst = (v0 * y1) + u0 */
-}
+ rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
+ int i;
-/* Newton-Raphson approximation of floating point divide that has a low
- precision estimate. Assumes no trapping math and finite arguments. */
+ /* Low precision estimates guarantee 5 bits of accuracy. High
+ precision estimates guarantee 14 bits of accuracy. SFmode
+ requires 23 bits of accuracy. DFmode requires 52 bits of
+ accuracy. Each pass at least doubles the accuracy, leading
+ to the following. */
+ int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+ if (mode == DFmode || mode == V2DFmode)
+ passes++;
-static void
-rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
-{
- enum machine_mode mode = GET_MODE (dst);
- rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one;
enum insn_code code = optab_handler (smul_optab, mode);
insn_gen_fn gen_mul = GEN_FCN (code);
@@ -26704,46 +30139,44 @@ rs6000_emit_swdiv_low_precision (rtx dst, rtx n, rtx d)
gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
UNSPEC_FRES)));
- e0 = gen_reg_rtx (mode);
- rs6000_emit_nmsub (e0, d, x0, one); /* e0 = 1. - d * x0 */
-
- y1 = gen_reg_rtx (mode);
- rs6000_emit_madd (y1, e0, x0, x0); /* y1 = x0 + e0 * x0 */
+ /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
+ if (passes > 1) {
- e1 = gen_reg_rtx (mode);
- emit_insn (gen_mul (e1, e0, e0)); /* e1 = e0 * e0 */
+ /* e0 = 1. - d * x0 */
+ e0 = gen_reg_rtx (mode);
+ rs6000_emit_nmsub (e0, d, x0, one);
- y2 = gen_reg_rtx (mode);
- rs6000_emit_madd (y2, e1, y1, y1); /* y2 = y1 + e1 * y1 */
+ /* x1 = x0 + e0 * x0 */
+ x1 = gen_reg_rtx (mode);
+ rs6000_emit_madd (x1, e0, x0, x0);
- e2 = gen_reg_rtx (mode);
- emit_insn (gen_mul (e2, e1, e1)); /* e2 = e1 * e1 */
+ for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
+ ++i, xprev = xnext, eprev = enext) {
- y3 = gen_reg_rtx (mode);
- rs6000_emit_madd (y3, e2, y2, y2); /* y3 = y2 + e2 * y2 */
+ /* enext = eprev * eprev */
+ enext = gen_reg_rtx (mode);
+ emit_insn (gen_mul (enext, eprev, eprev));
- u0 = gen_reg_rtx (mode);
- emit_insn (gen_mul (u0, n, y3)); /* u0 = n * y3 */
+ /* xnext = xprev + enext * xprev */
+ xnext = gen_reg_rtx (mode);
+ rs6000_emit_madd (xnext, enext, xprev, xprev);
+ }
- v0 = gen_reg_rtx (mode);
- rs6000_emit_nmsub (v0, d, u0, n); /* v0 = n - d * u0 */
+ } else
+ xprev = x0;
- rs6000_emit_madd (dst, v0, y3, u0); /* dst = u0 + v0 * y3 */
-}
+ /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
-/* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
- add a reg_note saying that this was a division. Support both scalar and
- vector divide. Assumes no trapping math and finite arguments. */
+ /* u = n * xprev */
+ u = gen_reg_rtx (mode);
+ emit_insn (gen_mul (u, n, xprev));
-void
-rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
-{
- enum machine_mode mode = GET_MODE (dst);
+ /* v = n - (d * u) */
+ v = gen_reg_rtx (mode);
+ rs6000_emit_nmsub (v, d, u, n);
- if (RS6000_RECIP_HIGH_PRECISION_P (mode))
- rs6000_emit_swdiv_high_precision (dst, n, d);
- else
- rs6000_emit_swdiv_low_precision (dst, n, d);
+ /* dst = (v * xprev) + u */
+ rs6000_emit_madd (dst, v, xprev, u);
if (note_p)
add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
@@ -26758,7 +30191,16 @@ rs6000_emit_swrsqrt (rtx dst, rtx src)
enum machine_mode mode = GET_MODE (src);
rtx x0 = gen_reg_rtx (mode);
rtx y = gen_reg_rtx (mode);
- int passes = (TARGET_RECIP_PRECISION) ? 2 : 3;
+
+ /* Low precision estimates guarantee 5 bits of accuracy. High
+ precision estimates guarantee 14 bits of accuracy. SFmode
+ requires 23 bits of accuracy. DFmode requires 52 bits of
+ accuracy. Each pass at least doubles the accuracy, leading
+ to the following. */
+ int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
+ if (mode == DFmode || mode == V2DFmode)
+ passes++;
+
REAL_VALUE_TYPE dconst3_2;
int i;
rtx halfthree;
@@ -26920,6 +30362,136 @@ rs6000_emit_parity (rtx dst, rtx src)
}
}
+/* Expand an Altivec constant permutation for little endian mode.
+ There are two issues: First, the two input operands must be
+ swapped so that together they form a double-wide array in LE
+ order. Second, the vperm instruction has surprising behavior
+ in LE mode: it interprets the elements of the source vectors
+ in BE mode ("left to right") and interprets the elements of
+ the destination vector in LE mode ("right to left"). To
+ correct for this, we must subtract each element of the permute
+ control vector from 31.
+
+ For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
+ with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
+ We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
+ serve as the permute control vector. Then, in BE mode,
+
+ vperm 9,10,11,12
+
+ places the desired result in vr9. However, in LE mode the
+ vector contents will be
+
+ vr10 = 00000003 00000002 00000001 00000000
+ vr11 = 00000007 00000006 00000005 00000004
+
+ The result of the vperm using the same permute control vector is
+
+ vr9 = 05000000 07000000 01000000 03000000
+
+ That is, the leftmost 4 bytes of vr10 are interpreted as the
+ source for the rightmost 4 bytes of vr9, and so on.
+
+ If we change the permute control vector to
+
+ vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
+
+ and issue
+
+ vperm 9,11,10,12
+
+ we get the desired
+
+ vr9 = 00000006 00000004 00000002 00000000. */
+
+void
+altivec_expand_vec_perm_const_le (rtx operands[4])
+{
+ unsigned int i;
+ rtx perm[16];
+ rtx constv, unspec;
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx op1 = operands[2];
+ rtx sel = operands[3];
+
+ /* Unpack and adjust the constant selector. */
+ for (i = 0; i < 16; ++i)
+ {
+ rtx e = XVECEXP (sel, 0, i);
+ unsigned int elt = 31 - (INTVAL (e) & 31);
+ perm[i] = GEN_INT (elt);
+ }
+
+ /* Expand to a permute, swapping the inputs and using the
+ adjusted selector. */
+ if (!REG_P (op0))
+ op0 = force_reg (V16QImode, op0);
+ if (!REG_P (op1))
+ op1 = force_reg (V16QImode, op1);
+
+ constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
+ constv = force_reg (V16QImode, constv);
+ unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
+ UNSPEC_VPERM);
+ if (!REG_P (target))
+ {
+ rtx tmp = gen_reg_rtx (V16QImode);
+ emit_move_insn (tmp, unspec);
+ unspec = tmp;
+ }
+
+ emit_move_insn (target, unspec);
+}
+
+/* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
+ permute control vector. But here it's not a constant, so we must
+ generate a vector NAND or NOR to do the adjustment. */
+
+void
+altivec_expand_vec_perm_le (rtx operands[4])
+{
+ rtx notx, iorx, unspec;
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx op1 = operands[2];
+ rtx sel = operands[3];
+ rtx tmp = target;
+ rtx norreg = gen_reg_rtx (V16QImode);
+ enum machine_mode mode = GET_MODE (target);
+
+ /* Get everything in regs so the pattern matches. */
+ if (!REG_P (op0))
+ op0 = force_reg (mode, op0);
+ if (!REG_P (op1))
+ op1 = force_reg (mode, op1);
+ if (!REG_P (sel))
+ sel = force_reg (V16QImode, sel);
+ if (!REG_P (target))
+ tmp = gen_reg_rtx (mode);
+
+ /* Invert the selector with a VNAND if available, else a VNOR.
+ The VNAND is preferred for future fusion opportunities. */
+ notx = gen_rtx_NOT (V16QImode, sel);
+ iorx = (TARGET_P8_VECTOR
+ ? gen_rtx_IOR (V16QImode, notx, notx)
+ : gen_rtx_AND (V16QImode, notx, notx));
+ emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
+
+ /* Permute with operands reversed and adjusted selector. */
+ unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
+ UNSPEC_VPERM);
+
+ /* Copy into target, possibly by way of a register. */
+ if (!REG_P (target))
+ {
+ emit_move_insn (tmp, unspec);
+ unspec = tmp;
+ }
+
+ emit_move_insn (target, unspec);
+}
+
/* Expand an Altivec constant permutation. Return true if we match
an efficient implementation; false to fall back to VPERM. */
@@ -26927,26 +30499,43 @@ bool
altivec_expand_vec_perm_const (rtx operands[4])
{
struct altivec_perm_insn {
+ HOST_WIDE_INT mask;
enum insn_code impl;
unsigned char perm[16];
};
static const struct altivec_perm_insn patterns[] = {
- { CODE_FOR_altivec_vpkuhum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
- { CODE_FOR_altivec_vpkuwum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
- { CODE_FOR_altivec_vmrghb,
+ { OPTION_MASK_ALTIVEC,
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
+ : CODE_FOR_altivec_vmrglb_direct),
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
- { CODE_FOR_altivec_vmrghh,
+ { OPTION_MASK_ALTIVEC,
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
+ : CODE_FOR_altivec_vmrglh_direct),
{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
- { CODE_FOR_altivec_vmrghw,
+ { OPTION_MASK_ALTIVEC,
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
+ : CODE_FOR_altivec_vmrglw_direct),
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
- { CODE_FOR_altivec_vmrglb,
+ { OPTION_MASK_ALTIVEC,
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
+ : CODE_FOR_altivec_vmrghb_direct),
{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
- { CODE_FOR_altivec_vmrglh,
+ { OPTION_MASK_ALTIVEC,
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
+ : CODE_FOR_altivec_vmrghh_direct),
{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
- { CODE_FOR_altivec_vmrglw,
- { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+ { OPTION_MASK_ALTIVEC,
+ (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
+ : CODE_FOR_altivec_vmrghw_direct),
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+ { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
};
unsigned int i, j, elt, which;
@@ -27003,7 +30592,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
break;
if (i == 16)
{
- emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
+ if (!BYTES_BIG_ENDIAN)
+ elt = 15 - elt;
+ emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
return true;
}
@@ -27014,9 +30605,10 @@ altivec_expand_vec_perm_const (rtx operands[4])
break;
if (i == 16)
{
+ int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
x = gen_reg_rtx (V8HImode);
- emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
- GEN_INT (elt / 2)));
+ emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
+ GEN_INT (field)));
emit_move_insn (target, gen_lowpart (V16QImode, x));
return true;
}
@@ -27032,9 +30624,10 @@ altivec_expand_vec_perm_const (rtx operands[4])
break;
if (i == 16)
{
+ int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
x = gen_reg_rtx (V4SImode);
- emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
- GEN_INT (elt / 4)));
+ emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
+ GEN_INT (field)));
emit_move_insn (target, gen_lowpart (V16QImode, x));
return true;
}
@@ -27046,6 +30639,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
{
bool swapped;
+ if ((patterns[j].mask & rs6000_isa_flags) == 0)
+ continue;
+
elt = patterns[j].perm[0];
if (perm[0] == elt)
swapped = false;
@@ -27069,7 +30665,30 @@ altivec_expand_vec_perm_const (rtx operands[4])
enum machine_mode omode = insn_data[icode].operand[0].mode;
enum machine_mode imode = insn_data[icode].operand[1].mode;
- if (swapped)
+ /* For little-endian, don't use vpkuwum and vpkuhum if the
+ underlying vector type is not V4SI and V8HI, respectively.
+ For example, using vpkuwum with a V8HI picks up the even
+ halfwords (BE numbering) when the even halfwords (LE
+ numbering) are what we need. */
+ if (!BYTES_BIG_ENDIAN
+ && icode == CODE_FOR_altivec_vpkuwum_direct
+ && ((GET_CODE (op0) == REG
+ && GET_MODE (op0) != V4SImode)
+ || (GET_CODE (op0) == SUBREG
+ && GET_MODE (XEXP (op0, 0)) != V4SImode)))
+ continue;
+ if (!BYTES_BIG_ENDIAN
+ && icode == CODE_FOR_altivec_vpkuhum_direct
+ && ((GET_CODE (op0) == REG
+ && GET_MODE (op0) != V8HImode)
+ || (GET_CODE (op0) == SUBREG
+ && GET_MODE (XEXP (op0, 0)) != V8HImode)))
+ continue;
+
+ /* For little-endian, the two input operands must be swapped
+ (or swapped back) to ensure proper right-to-left numbering
+ from 0 to 2N-1. */
+ if (swapped ^ !BYTES_BIG_ENDIAN)
x = op0, op0 = op1, op1 = x;
if (imode != V16QImode)
{
@@ -27087,6 +30706,12 @@ altivec_expand_vec_perm_const (rtx operands[4])
}
}
+ if (!BYTES_BIG_ENDIAN)
+ {
+ altivec_expand_vec_perm_const_le (operands);
+ return true;
+ }
+
return false;
}
@@ -27135,7 +30760,6 @@ rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
vmode = GET_MODE (target);
gcc_assert (GET_MODE_NUNITS (vmode) == 2);
dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
-
x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
@@ -27231,7 +30855,7 @@ rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
rtx perm[16];
- high = (highp == BYTES_BIG_ENDIAN ? 0 : nelt / 2);
+ high = (highp ? 0 : nelt / 2);
for (i = 0; i < nelt / 2; i++)
{
perm[i * 2] = GEN_INT (i + high);
@@ -27286,6 +30910,8 @@ rs6000_function_value (const_tree valtype,
{
enum machine_mode mode;
unsigned int regno;
+ enum machine_mode elt_mode;
+ int n_elts;
/* Special handling for structs in darwin64. */
if (TARGET_MACHO
@@ -27305,6 +30931,36 @@ rs6000_function_value (const_tree valtype,
/* Otherwise fall through to standard ABI rules. */
}
+ /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
+ if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
+ &elt_mode, &n_elts))
+ {
+ int first_reg, n_regs, i;
+ rtx par;
+
+ if (SCALAR_FLOAT_MODE_P (elt_mode))
+ {
+ /* _Decimal128 must use even/odd register pairs. */
+ first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
+ n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
+ }
+ else
+ {
+ first_reg = ALTIVEC_ARG_RETURN;
+ n_regs = 1;
+ }
+
+ par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
+ rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
+ XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
+ }
+
+ return par;
+ }
+
if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
{
/* Long long return value need be split in -mpowerpc64, 32bit ABI. */
@@ -27417,6 +31073,13 @@ rs6000_libcall_value (enum machine_mode mode)
}
+/* Return true if we use LRA instead of reload pass. */
+static bool
+rs6000_lra_p (void)
+{
+ return rs6000_lra_flag;
+}
+
/* Given FROM and TO register numbers, say whether this elimination is allowed.
Frame pointer elimination is automatically handled.
@@ -27679,22 +31342,33 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{
{ "altivec", OPTION_MASK_ALTIVEC, false, true },
{ "cmpb", OPTION_MASK_CMPB, false, true },
+ { "crypto", OPTION_MASK_CRYPTO, false, true },
+ { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
+ { "htm", OPTION_MASK_HTM, false, true },
{ "isel", OPTION_MASK_ISEL, false, true },
{ "mfcrf", OPTION_MASK_MFCRF, false, true },
{ "mfpgpr", OPTION_MASK_MFPGPR, false, true },
{ "mulhw", OPTION_MASK_MULHW, false, true },
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
- { "update", OPTION_MASK_NO_UPDATE, true , true },
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
{ "popcntd", OPTION_MASK_POPCNTD, false, true },
+ { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
+ { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
+ { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
+ { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
+ { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "string", OPTION_MASK_STRING, false, true },
+ { "update", OPTION_MASK_NO_UPDATE, true , true },
+ { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
+ { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
{ "vsx", OPTION_MASK_VSX, false, true },
+ { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
#ifdef OPTION_MASK_64BIT
#if TARGET_AIX_OS
{ "aix64", OPTION_MASK_64BIT, false, false },
@@ -27734,6 +31408,11 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
{ "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
{ "popcntd", RS6000_BTM_POPCNTD, false, false },
{ "cell", RS6000_BTM_CELL, false, false },
+ { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
+ { "crypto", RS6000_BTM_CRYPTO, false, false },
+ { "htm", RS6000_BTM_HTM, false, false },
+ { "hard-dfp", RS6000_BTM_DFP, false, false },
+ { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
};
/* Option variables that we want to support inside attribute((target)) and
@@ -28250,7 +31929,6 @@ rs6000_print_options_internal (FILE *file,
size_t cur_column;
size_t max_column = 76;
const char *comma = "";
- const char *nl = "\n";
if (indent)
start_column += fprintf (file, "%*s", indent, "");
@@ -28281,7 +31959,6 @@ rs6000_print_options_internal (FILE *file,
fprintf (stderr, ", \\\n%*s", (int)start_column, "");
cur_column = start_column + len;
comma = "";
- nl = "\n\n";
}
fprintf (file, "%s%s%s%s", comma, prefix, no_str,
@@ -28291,7 +31968,7 @@ rs6000_print_options_internal (FILE *file,
}
}
- fputs (nl, file);
+ fputs ("\n", file);
}
/* Helper function to print the current isa options on a line. */
@@ -28467,118 +32144,149 @@ rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
}
-/* A function pointer under AIX is a pointer to a data area whose first word
- contains the actual address of the function, whose second word contains a
- pointer to its TOC, and whose third word contains a value to place in the
- static chain register (r11). Note that if we load the static chain, our
- "trampoline" need not have any executable code. */
+
+/* Expand code to perform a call under the AIX or ELFv2 ABI. */
void
-rs6000_call_indirect_aix (rtx value, rtx func_desc, rtx flag)
+rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
{
+ rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
+ rtx toc_load = NULL_RTX;
+ rtx toc_restore = NULL_RTX;
rtx func_addr;
- rtx toc_reg;
- rtx sc_reg;
- rtx stack_ptr;
- rtx stack_toc_offset;
- rtx stack_toc_mem;
- rtx func_toc_offset;
- rtx func_toc_mem;
- rtx func_sc_offset;
- rtx func_sc_mem;
+ rtx abi_reg = NULL_RTX;
+ rtx call[4];
+ int n_call;
rtx insn;
- rtx (*call_func) (rtx, rtx, rtx, rtx);
- rtx (*call_value_func) (rtx, rtx, rtx, rtx, rtx);
- stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
- toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
+ /* Handle longcall attributes. */
+ if (INTVAL (cookie) & CALL_LONG)
+ func_desc = rs6000_longcall_ref (func_desc);
+
+ /* Handle indirect calls. */
+ if (GET_CODE (func_desc) != SYMBOL_REF
+ || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
+ {
+ /* Save the TOC into its reserved slot before the call,
+ and prepare to restore it after the call. */
+ rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
+ rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
+ rtx stack_toc_mem = gen_frame_mem (Pmode,
+ gen_rtx_PLUS (Pmode, stack_ptr,
+ stack_toc_offset));
+ toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
+
+ /* Can we optimize saving the TOC in the prologue or
+ do we need to do it at every call? */
+ if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
+ cfun->machine->save_toc_in_prologue = true;
+ else
+ {
+ MEM_VOLATILE_P (stack_toc_mem) = 1;
+ emit_move_insn (stack_toc_mem, toc_reg);
+ }
+
+ if (DEFAULT_ABI == ABI_ELFv2)
+ {
+ /* A function pointer in the ELFv2 ABI is just a plain address, but
+ the ABI requires it to be loaded into r12 before the call. */
+ func_addr = gen_rtx_REG (Pmode, 12);
+ emit_move_insn (func_addr, func_desc);
+ abi_reg = func_addr;
+ }
+ else
+ {
+ /* A function pointer under AIX is a pointer to a data area whose
+ first word contains the actual address of the function, whose
+ second word contains a pointer to its TOC, and whose third word
+ contains a value to place in the static chain register (r11).
+ Note that if we load the static chain, our "trampoline" need
+ not have any executable code. */
/* Load up address of the actual function. */
func_desc = force_reg (Pmode, func_desc);
func_addr = gen_reg_rtx (Pmode);
emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
- if (TARGET_32BIT)
- {
-
- stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_32BIT);
- func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_32BIT);
- func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_32BIT);
+ /* Prepare to load the TOC of the called function. Note that the
+ TOC load must happen immediately before the actual call so
+ that unwinding the TOC registers works correctly. See the
+ comment in frob_update_context. */
+ rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
+ rtx func_toc_mem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, func_desc,
+ func_toc_offset));
+ toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
+
+ /* If we have a static chain, load it up. */
if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
{
- call_func = gen_call_indirect_aix32bit;
- call_value_func = gen_call_value_indirect_aix32bit;
- }
- else
- {
- call_func = gen_call_indirect_aix32bit_nor11;
- call_value_func = gen_call_value_indirect_aix32bit_nor11;
+ rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
+ rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
+ rtx func_sc_mem = gen_rtx_MEM (Pmode,
+ gen_rtx_PLUS (Pmode, func_desc,
+ func_sc_offset));
+ emit_move_insn (sc_reg, func_sc_mem);
+ abi_reg = sc_reg;
}
}
- else
- {
- stack_toc_offset = GEN_INT (TOC_SAVE_OFFSET_64BIT);
- func_toc_offset = GEN_INT (AIX_FUNC_DESC_TOC_64BIT);
- func_sc_offset = GEN_INT (AIX_FUNC_DESC_SC_64BIT);
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
- {
- call_func = gen_call_indirect_aix64bit;
- call_value_func = gen_call_value_indirect_aix64bit;
}
else
{
- call_func = gen_call_indirect_aix64bit_nor11;
- call_value_func = gen_call_value_indirect_aix64bit_nor11;
- }
+ /* Direct calls use the TOC: for local calls, the callee will
+ assume the TOC register is set; for non-local calls, the
+ PLT stub needs the TOC register. */
+ abi_reg = toc_reg;
+ func_addr = func_desc;
}
- /* Reserved spot to store the TOC. */
- stack_toc_mem = gen_frame_mem (Pmode,
- gen_rtx_PLUS (Pmode,
- stack_ptr,
- stack_toc_offset));
+ /* Create the call. */
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
+ if (value != NULL_RTX)
+ call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
+ n_call = 1;
- gcc_assert (cfun);
- gcc_assert (cfun->machine);
+ if (toc_load)
+ call[n_call++] = toc_load;
+ if (toc_restore)
+ call[n_call++] = toc_restore;
- /* Can we optimize saving the TOC in the prologue or do we need to do it at
- every call? */
- if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
- cfun->machine->save_toc_in_prologue = true;
+ call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
- else
- {
- MEM_VOLATILE_P (stack_toc_mem) = 1;
- emit_move_insn (stack_toc_mem, toc_reg);
- }
+ insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
+ insn = emit_call_insn (insn);
- /* Calculate the address to load the TOC of the called function. We don't
- actually load this until the split after reload. */
- func_toc_mem = gen_rtx_MEM (Pmode,
- gen_rtx_PLUS (Pmode,
- func_desc,
- func_toc_offset));
+ /* Mention all registers defined by the ABI to hold information
+ as uses in CALL_INSN_FUNCTION_USAGE. */
+ if (abi_reg)
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
+}
- /* If we have a static chain, load it up. */
- if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
- {
- func_sc_mem = gen_rtx_MEM (Pmode,
- gen_rtx_PLUS (Pmode,
- func_desc,
- func_sc_offset));
+/* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
- sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
- emit_move_insn (sc_reg, func_sc_mem);
- }
+void
+rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
+{
+ rtx call[2];
+ rtx insn;
+
+ gcc_assert (INTVAL (cookie) == 0);
/* Create the call. */
- if (value)
- insn = call_value_func (value, func_addr, flag, func_toc_mem,
- stack_toc_mem);
- else
- insn = call_func (func_addr, flag, func_toc_mem, stack_toc_mem);
+ call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
+ if (value != NULL_RTX)
+ call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
- emit_call_insn (insn);
+ call[1] = simple_return_rtx;
+
+ insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
+ insn = emit_call_insn (insn);
+
+ /* Note use of the TOC register. */
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
+ /* We need to also mark a use of the link register since the function we
+ sibling-call to will use it to return to our caller. */
+ use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
}
/* Return whether we need to always update the saved TOC pointer when we update
@@ -28679,6 +32387,661 @@ rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
}
+
+/* Helper function for rs6000_split_logical to emit a logical instruction after
+ spliting the operation to single GPR registers.
+
+ DEST is the destination register.
+ OP1 and OP2 are the input source registers.
+ CODE is the base operation (AND, IOR, XOR, NOT).
+ MODE is the machine mode.
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
+ formation of the AND instructions. */
+
+static void
+rs6000_split_logical_inner (rtx dest,
+ rtx op1,
+ rtx op2,
+ enum rtx_code code,
+ enum machine_mode mode,
+ bool complement_final_p,
+ bool complement_op1_p,
+ bool complement_op2_p,
+ rtx clobber_reg)
+{
+ rtx bool_rtx;
+ rtx set_rtx;
+
+ /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
+ if (op2 && GET_CODE (op2) == CONST_INT
+ && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
+ && !complement_final_p && !complement_op1_p && !complement_op2_p)
+ {
+ HOST_WIDE_INT mask = GET_MODE_MASK (mode);
+ HOST_WIDE_INT value = INTVAL (op2) & mask;
+
+ /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
+ if (code == AND)
+ {
+ if (value == 0)
+ {
+ emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
+ return;
+ }
+
+ else if (value == mask)
+ {
+ if (!rtx_equal_p (dest, op1))
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+ return;
+ }
+ }
+
+ /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
+ into separate ORI/ORIS or XORI/XORIS instrucitons. */
+ else if (code == IOR || code == XOR)
+ {
+ if (value == 0)
+ {
+ if (!rtx_equal_p (dest, op1))
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
+ return;
+ }
+ }
+ }
+
+ if (complement_op1_p)
+ op1 = gen_rtx_NOT (mode, op1);
+
+ if (complement_op2_p)
+ op2 = gen_rtx_NOT (mode, op2);
+
+ bool_rtx = ((code == NOT)
+ ? gen_rtx_NOT (mode, op1)
+ : gen_rtx_fmt_ee (code, mode, op1, op2));
+
+ if (complement_final_p)
+ bool_rtx = gen_rtx_NOT (mode, bool_rtx);
+
+ set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
+
+ /* Is this AND with an explicit clobber? */
+ if (clobber_reg)
+ {
+ rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
+ set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
+ }
+
+ emit_insn (set_rtx);
+ return;
+}
+
+/* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
+ operations are split immediately during RTL generation to allow for more
+ optimizations of the AND/IOR/XOR.
+
+ OPERANDS is an array containing the destination and two input operands.
+ CODE is the base operation (AND, IOR, XOR, NOT).
+ MODE is the machine mode.
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
+ formation of the AND instructions. */
+
+static void
+rs6000_split_logical_di (rtx operands[3],
+ enum rtx_code code,
+ bool complement_final_p,
+ bool complement_op1_p,
+ bool complement_op2_p,
+ rtx clobber_reg)
+{
+ const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
+ const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
+ const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
+ enum hi_lo { hi = 0, lo = 1 };
+ rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
+ size_t i;
+
+ op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
+ op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
+ op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
+ op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
+
+ if (code == NOT)
+ op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
+ else
+ {
+ if (GET_CODE (operands[2]) != CONST_INT)
+ {
+ op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
+ op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
+ }
+ else
+ {
+ HOST_WIDE_INT value = INTVAL (operands[2]);
+ HOST_WIDE_INT value_hi_lo[2];
+
+ gcc_assert (!complement_final_p);
+ gcc_assert (!complement_op1_p);
+ gcc_assert (!complement_op2_p);
+
+ value_hi_lo[hi] = value >> 32;
+ value_hi_lo[lo] = value & lower_32bits;
+
+ for (i = 0; i < 2; i++)
+ {
+ HOST_WIDE_INT sub_value = value_hi_lo[i];
+
+ if (sub_value & sign_bit)
+ sub_value |= upper_32bits;
+
+ op2_hi_lo[i] = GEN_INT (sub_value);
+
+ /* If this is an AND instruction, check to see if we need to load
+ the value in a register. */
+ if (code == AND && sub_value != -1 && sub_value != 0
+ && !and_operand (op2_hi_lo[i], SImode))
+ op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
+ }
+ }
+ }
+
+ for (i = 0; i < 2; i++)
+ {
+ /* Split large IOR/XOR operations. */
+ if ((code == IOR || code == XOR)
+ && GET_CODE (op2_hi_lo[i]) == CONST_INT
+ && !complement_final_p
+ && !complement_op1_p
+ && !complement_op2_p
+ && clobber_reg == NULL_RTX
+ && !logical_const_operand (op2_hi_lo[i], SImode))
+ {
+ HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
+ HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
+ HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
+ rtx tmp = gen_reg_rtx (SImode);
+
+ /* Make sure the constant is sign extended. */
+ if ((hi_16bits & sign_bit) != 0)
+ hi_16bits |= upper_32bits;
+
+ rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
+ code, SImode, false, false, false,
+ NULL_RTX);
+
+ rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
+ code, SImode, false, false, false,
+ NULL_RTX);
+ }
+ else
+ rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
+ code, SImode, complement_final_p,
+ complement_op1_p, complement_op2_p,
+ clobber_reg);
+ }
+
+ return;
+}
+
+/* Split the insns that make up boolean operations operating on multiple GPR
+ registers. The boolean MD patterns ensure that the inputs either are
+ exactly the same as the output registers, or there is no overlap.
+
+ OPERANDS is an array containing the destination and two input operands.
+ CODE is the base operation (AND, IOR, XOR, NOT).
+ MODE is the machine mode.
+ If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
+ If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
+ If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
+ CLOBBER_REG is either NULL or a scratch register of type CC to allow
+ formation of the AND instructions. */
+
+void
+rs6000_split_logical (rtx operands[3],
+ enum rtx_code code,
+ bool complement_final_p,
+ bool complement_op1_p,
+ bool complement_op2_p,
+ rtx clobber_reg)
+{
+ enum machine_mode mode = GET_MODE (operands[0]);
+ enum machine_mode sub_mode;
+ rtx op0, op1, op2;
+ int sub_size, regno0, regno1, nregs, i;
+
+ /* If this is DImode, use the specialized version that can run before
+ register allocation. */
+ if (mode == DImode && !TARGET_POWERPC64)
+ {
+ rs6000_split_logical_di (operands, code, complement_final_p,
+ complement_op1_p, complement_op2_p,
+ clobber_reg);
+ return;
+ }
+
+ op0 = operands[0];
+ op1 = operands[1];
+ op2 = (code == NOT) ? NULL_RTX : operands[2];
+ sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
+ sub_size = GET_MODE_SIZE (sub_mode);
+ regno0 = REGNO (op0);
+ regno1 = REGNO (op1);
+
+ gcc_assert (reload_completed);
+ gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+ gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+ nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
+ gcc_assert (nregs > 1);
+
+ if (op2 && REG_P (op2))
+ gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
+
+ for (i = 0; i < nregs; i++)
+ {
+ int offset = i * sub_size;
+ rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
+ rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
+ rtx sub_op2 = ((code == NOT)
+ ? NULL_RTX
+ : simplify_subreg (sub_mode, op2, mode, offset));
+
+ rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
+ complement_final_p, complement_op1_p,
+ complement_op2_p, clobber_reg);
+ }
+
+ return;
+}
+
+
+/* Return true if the peephole2 can combine a load involving a combination of
+ an addis instruction and a load with an offset that can be fused together on
+ a power8.
+
+ The operands are:
+ operands[0] register set with addis
+ operands[1] value set via addis
+ operands[2] target register being loaded
+ operands[3] D-form memory reference using operands[0].
+
+ In addition, we are passed a boolean that is true if this is a peephole2,
+ and we can use see if the addis_reg is dead after the insn and can be
+ replaced by the target register. */
+
+bool
+fusion_gpr_load_p (rtx *operands, bool peep2_p)
+{
+ rtx addis_reg = operands[0];
+ rtx addis_value = operands[1];
+ rtx target = operands[2];
+ rtx mem = operands[3];
+ rtx addr;
+ rtx base_reg;
+
+ /* Validate arguments. */
+ if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
+ return false;
+
+ if (!base_reg_operand (target, GET_MODE (target)))
+ return false;
+
+ if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
+ return false;
+
+ if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
+ return false;
+
+ /* Allow sign/zero extension. */
+ if (GET_CODE (mem) == ZERO_EXTEND
+ || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
+ mem = XEXP (mem, 0);
+
+ if (!MEM_P (mem))
+ return false;
+
+ addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+ return false;
+
+ /* Validate that the register used to load the high value is either the
+ register being loaded, or we can safely replace its use in a peephole2.
+
+ If this is a peephole2, we assume that there are 2 instructions in the
+ peephole (addis and load), so we want to check if the target register was
+ not used in the memory address and the register to hold the addis result
+ is dead after the peephole. */
+ if (REGNO (addis_reg) != REGNO (target))
+ {
+ if (!peep2_p)
+ return false;
+
+ if (reg_mentioned_p (target, mem))
+ return false;
+
+ if (!peep2_reg_dead_p (2, addis_reg))
+ return false;
+
+ /* If the target register being loaded is the stack pointer, we must
+ avoid loading any other value into it, even temporarily. */
+ if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
+ return false;
+ }
+
+ base_reg = XEXP (addr, 0);
+ return REGNO (addis_reg) == REGNO (base_reg);
+}
+
+/* During the peephole2 pass, adjust and expand the insns for a load fusion
+ sequence. We adjust the addis register to use the target register. If the
+ load sign extends, we adjust the code to do the zero extending load, and an
+ explicit sign extension later since the fusion only covers zero extending
+ loads.
+
+ The operands are:
+ operands[0] register set with addis (to be replaced with target)
+ operands[1] value set via addis
+ operands[2] target register being loaded
+ operands[3] D-form memory reference using operands[0]. */
+
+void
+expand_fusion_gpr_load (rtx *operands)
+{
+ rtx addis_value = operands[1];
+ rtx target = operands[2];
+ rtx orig_mem = operands[3];
+ rtx new_addr, new_mem, orig_addr, offset;
+ enum rtx_code plus_or_lo_sum;
+ enum machine_mode target_mode = GET_MODE (target);
+ enum machine_mode extend_mode = target_mode;
+ enum machine_mode ptr_mode = Pmode;
+ enum rtx_code extend = UNKNOWN;
+ rtx addis_reg = ((ptr_mode == target_mode)
+ ? target
+ : simplify_subreg (ptr_mode, target, target_mode, 0));
+
+ if (GET_CODE (orig_mem) == ZERO_EXTEND
+ || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
+ {
+ extend = GET_CODE (orig_mem);
+ orig_mem = XEXP (orig_mem, 0);
+ target_mode = GET_MODE (orig_mem);
+ }
+
+ gcc_assert (MEM_P (orig_mem));
+
+ orig_addr = XEXP (orig_mem, 0);
+ plus_or_lo_sum = GET_CODE (orig_addr);
+ gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
+
+ offset = XEXP (orig_addr, 1);
+ new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_reg, offset);
+ new_mem = change_address (orig_mem, target_mode, new_addr);
+
+ if (extend != UNKNOWN)
+ new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
+
+ emit_insn (gen_rtx_SET (VOIDmode, addis_reg, addis_value));
+ emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
+
+ if (extend == SIGN_EXTEND)
+ {
+ int sub_off = ((BYTES_BIG_ENDIAN)
+ ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
+ : 0);
+ rtx sign_reg
+ = simplify_subreg (target_mode, target, extend_mode, sub_off);
+
+ emit_insn (gen_rtx_SET (VOIDmode, target,
+ gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
+ }
+
+ return;
+}
+
+/* Return a string to fuse an addis instruction with a gpr load to the same
+ register that we loaded up the addis instruction. The code is complicated,
+ so we call output_asm_insn directly, and just return "".
+
+ The operands are:
+ operands[0] register set with addis (must be same reg as target).
+ operands[1] value set via addis
+ operands[2] target register being loaded
+ operands[3] D-form memory reference using operands[0]. */
+
+const char *
+emit_fusion_gpr_load (rtx *operands)
+{
+ rtx addis_reg = operands[0];
+ rtx addis_value = operands[1];
+ rtx target = operands[2];
+ rtx mem = operands[3];
+ rtx fuse_ops[10];
+ rtx addr;
+ rtx load_offset;
+ const char *addis_str = NULL;
+ const char *load_str = NULL;
+ const char *extend_insn = NULL;
+ const char *mode_name = NULL;
+ char insn_template[80];
+ enum machine_mode mode;
+ const char *comment_str = ASM_COMMENT_START;
+ bool sign_p = false;
+
+ gcc_assert (REG_P (addis_reg) && REG_P (target));
+ gcc_assert (REGNO (addis_reg) == REGNO (target));
+
+ if (*comment_str == ' ')
+ comment_str++;
+
+ /* Allow sign/zero extension. */
+ if (GET_CODE (mem) == ZERO_EXTEND)
+ mem = XEXP (mem, 0);
+
+ else if (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN)
+ {
+ sign_p = true;
+ mem = XEXP (mem, 0);
+ }
+
+ gcc_assert (MEM_P (mem));
+ addr = XEXP (mem, 0);
+ if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
+ gcc_unreachable ();
+
+ load_offset = XEXP (addr, 1);
+
+ /* Now emit the load instruction to the same register. */
+ mode = GET_MODE (mem);
+ switch (mode)
+ {
+ case QImode:
+ mode_name = "char";
+ load_str = "lbz";
+ extend_insn = "extsb %0,%0";
+ break;
+
+ case HImode:
+ mode_name = "short";
+ load_str = "lhz";
+ extend_insn = "extsh %0,%0";
+ break;
+
+ case SImode:
+ mode_name = "int";
+ load_str = "lwz";
+ extend_insn = "extsw %0,%0";
+ break;
+
+ case DImode:
+ if (TARGET_POWERPC64)
+ {
+ mode_name = "long";
+ load_str = "ld";
+ }
+ else
+ gcc_unreachable ();
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Emit the addis instruction. */
+ fuse_ops[0] = target;
+ if (satisfies_constraint_L (addis_value))
+ {
+ fuse_ops[1] = addis_value;
+ addis_str = "lis %0,%v1";
+ }
+
+ else if (GET_CODE (addis_value) == PLUS)
+ {
+ rtx op0 = XEXP (addis_value, 0);
+ rtx op1 = XEXP (addis_value, 1);
+
+ if (REG_P (op0) && CONST_INT_P (op1)
+ && satisfies_constraint_L (op1))
+ {
+ fuse_ops[1] = op0;
+ fuse_ops[2] = op1;
+ addis_str = "addis %0,%1,%v2";
+ }
+ }
+
+ else if (GET_CODE (addis_value) == HIGH)
+ {
+ rtx value = XEXP (addis_value, 0);
+ if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
+ {
+ fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
+ fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
+ if (TARGET_ELF)
+ addis_str = "addis %0,%2,%1@toc@ha";
+
+ else if (TARGET_XCOFF)
+ addis_str = "addis %0,%1@u(%2)";
+
+ else
+ gcc_unreachable ();
+ }
+
+ else if (GET_CODE (value) == PLUS)
+ {
+ rtx op0 = XEXP (value, 0);
+ rtx op1 = XEXP (value, 1);
+
+ if (GET_CODE (op0) == UNSPEC
+ && XINT (op0, 1) == UNSPEC_TOCREL
+ && CONST_INT_P (op1))
+ {
+ fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
+ fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
+ fuse_ops[3] = op1;
+ if (TARGET_ELF)
+ addis_str = "addis %0,%2,%1+%3@toc@ha";
+
+ else if (TARGET_XCOFF)
+ addis_str = "addis %0,%1+%3@u(%2)";
+
+ else
+ gcc_unreachable ();
+ }
+ }
+
+ else if (satisfies_constraint_L (value))
+ {
+ fuse_ops[1] = value;
+ addis_str = "lis %0,%v1";
+ }
+
+ else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
+ {
+ fuse_ops[1] = value;
+ addis_str = "lis %0,%1@ha";
+ }
+ }
+
+ if (!addis_str)
+ fatal_insn ("Could not generate addis value for fusion", addis_value);
+
+ sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
+ comment_str, mode_name);
+ output_asm_insn (insn_template, fuse_ops);
+
+ /* Emit the D-form load instruction. */
+ if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
+ {
+ sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
+ fuse_ops[1] = load_offset;
+ output_asm_insn (insn_template, fuse_ops);
+ }
+
+ else if (GET_CODE (load_offset) == UNSPEC
+ && XINT (load_offset, 1) == UNSPEC_TOCREL)
+ {
+ if (TARGET_ELF)
+ sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
+
+ else if (TARGET_XCOFF)
+ sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+ else
+ gcc_unreachable ();
+
+ fuse_ops[1] = XVECEXP (load_offset, 0, 0);
+ output_asm_insn (insn_template, fuse_ops);
+ }
+
+ else if (GET_CODE (load_offset) == PLUS
+ && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
+ && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
+ && CONST_INT_P (XEXP (load_offset, 1)))
+ {
+ rtx tocrel_unspec = XEXP (load_offset, 0);
+ if (TARGET_ELF)
+ sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
+
+ else if (TARGET_XCOFF)
+ sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
+
+ else
+ gcc_unreachable ();
+
+ fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
+ fuse_ops[2] = XEXP (load_offset, 1);
+ output_asm_insn (insn_template, fuse_ops);
+ }
+
+ else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
+ {
+ sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
+
+ fuse_ops[1] = load_offset;
+ output_asm_insn (insn_template, fuse_ops);
+ }
+
+ else
+ fatal_insn ("Unable to generate load offset for fusion", load_offset);
+
+ /* Handle sign extension. The peephole2 pass generates this as a separate
+ insn, but we handle it just in case it got reattached. */
+ if (sign_p)
+ {
+ gcc_assert (extend_insn != NULL);
+ output_asm_insn (extend_insn, fuse_ops);
+ }
+
+ return "";
+}
+
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rs6000.h"