aboutsummaryrefslogtreecommitdiffstats
path: root/gcc-4.9/gcc
diff options
context:
space:
mode:
authorjgreenhalgh <jgreenhalgh@138bc75d-0d04-0410-961f-82ee72b054a4>2014-05-16 08:41:46 (GMT)
committerEvandro Menezes <e.menezes@samsung.com>2015-08-28 22:41:17 (GMT)
commit220882082b32a048e224bc678f5a3b6fe87259b7 (patch)
treeffd12ae94328132d05afb37f29ed5e256af74315 /gcc-4.9/gcc
parent6cf9a82e4165911e0df28baf66d71b33253108a6 (diff)
downloadtoolchain_gcc-220882082b32a048e224bc678f5a3b6fe87259b7.zip
toolchain_gcc-220882082b32a048e224bc678f5a3b6fe87259b7.tar.gz
toolchain_gcc-220882082b32a048e224bc678f5a3b6fe87259b7.tar.bz2
[4.9] Adjust several costs for AArch64.
Backport from trunk 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Refactor aarch64_address_costs. gcc/ * config/aarch64/aarch64-protos.h (scale_addr_mode_cost): New. (cpu_addrcost_table): Use it. * config/aarch64/aarch64.c (generic_addrcost_table): Initialize it. (aarch64_address_cost): Rewrite using aarch64_classify_address, move it. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210493 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Add cost tables for Cortex-A57 gcc/ * config/aarch64/aarch64.c (cortexa57_addrcost_table): New. (cortexa57_vector_cost): Likewise. (cortexa57_tunings): Use them. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210494 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Better estimate cost of building a constant gcc/ * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally emit instructions, return number of instructions which would be emitted. (aarch64_add_constant): Update call to aarch64_build_constant. (aarch64_output_mi_thunk): Likewise. (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost a CONST_DOUBLE. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210496 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Add cost tables for Cortex-A57 gcc/ * config/aarch64/aarch64.c (cortexa57_addrcost_table): New. (cortexa57_vector_cost): Likewise. (cortexa57_tunings): Use them. 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Wrap aarch64_rtx_costs to dump verbose output gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs_wrapper): New. (TARGET_RTX_COSTS): Call it. 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philipp Tomsich <philipp.tomsich@theobroma-systems.com> Better estimate cost of building a constant gcc/ * config/aarch64/aarch64.c (aarch64_build_constant): Conditionally emit instructions, return number of instructions which would be emitted. (aarch64_add_constant): Update call to aarch64_build_constant. (aarch64_output_mi_thunk): Likewise. (aarch64_rtx_costs): Estimate cost of a CONST_INT, cost a CONST_DOUBLE. 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philipp Tomsich <philipp.tomsich@theobroma-systems.com> Factor out common MULT cases gcc/ * config/aarch64/aarch64.c (aarch64_strip_shift_or_extend): Rename to... (aarch64_strip_extend): ...this, don't strip shifts, check RTX is well formed. (aarch64_rtx_mult_cost): New. (aarch64_rtx_costs): Use it, refactor as appropriate. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210497 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Set default costs and handle vector modes. gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Set default costs. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210498 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philip Tomsich <philipp.tomsich@theobroma-systems.com> Improve SET cost. gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costing for SET RTX. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210499 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philip Tomsich <philipp.tomsich@theobroma-systems.com> Cost memory accesses using address costs gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Use address costs when costing loads and stores to memory. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210500 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philip Tomsich <philipp.tomsich@theobroma-systems.com> Better cost logical operations gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve cost for logical operations. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210501 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philip Tomsich <philipp.tomsich@theobroma-systems.com> Improve costs for sign/zero extend operations gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost ZERO_EXTEND and SIGN_EXTEND better. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210502 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philip Tomsich <philipp.tomsich@theobroma-systems.com> Improve costs for rotate and shift operations. * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for rotates and shifts. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210503 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philip Tomsich <philipp.tomsich@theobroma-systems.com> Improve costs for sign/zero extracts gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for SIGN/ZERO_EXTRACT. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210504 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Improve costs for div/mod gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Improve costs for DIV/MOD. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210505 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philipp Tomsich <philipp.tomsich@theobroma-systems.com> Cost comparisons, flag setting operators and IF_THEN_ELSE gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost comparison operators. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210506 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philipp Tomsich <philipp.tomsich@theobroma-systems.com> Cost more Floating point RTX. gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost FMA, FLOAT_EXTEND, FLOAT_TRUNCATE, ABS, SMAX, and SMIN. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210507 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Philipp Tomsich <philipp.tomsich@theobroma-systems.com> Cost TRUNCATE gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost TRUNCATE. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210508 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Cost for SYMBOL_REF, HIGH and LO_SUM gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Cost SYMBOL_REF, HIGH, LO_SUM. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210509 138bc75d-0d04-0410-961f-82ee72b054a4 2014-05-16 James Greenhalgh <james.greenhalgh@arm.com> Dump a message if we are unable to cost an insn. gcc/ * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle the case where we were unable to cost an RTX. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@210510 138bc75d-0d04-0410-961f-82ee72b054a4 2014-08-26 Evandro Menezes <e.menezes@samsung.com> Fix typos in cost data structure. * config/arm/aarch64/aarch64.c (generic_addrcost_table): Delete qi cost; add di cost. (cortexa57_addrcost_table): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@214503 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc-4.9/gcc')
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64-protos.h13
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.c1285
2 files changed, 1092 insertions, 206 deletions
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
index 8b0a705..e78348e 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h
@@ -108,9 +108,22 @@ enum aarch64_symbol_type
cost models and vectors for address cost calculations, register
move costs and memory move costs. */
+/* Scaled addressing modes can vary cost depending on the mode of the
+ value to be loaded/stored. QImode values cannot use scaled
+ addressing modes. */
+
+struct scale_addr_mode_cost
+{
+ const int hi;
+ const int si;
+ const int di;
+ const int ti;
+};
+
/* Additional cost for addresses. */
struct cpu_addrcost_table
{
+ const struct scale_addr_mode_cost addr_scale_costs;
const int pre_modify;
const int post_modify;
const int register_offset;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 6d45984..d8895ee 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -141,6 +141,7 @@ static bool aarch64_const_vec_all_same_int_p (rtx,
static bool aarch64_vectorize_vec_perm_const_ok (enum machine_mode vmode,
const unsigned char *sel);
+static int aarch64_address_cost (rtx, enum machine_mode, addr_space_t, bool);
/* The processor for which instructions should be scheduled. */
enum aarch64_processor aarch64_tune = cortexa53;
@@ -171,6 +172,15 @@ __extension__
#endif
static const struct cpu_addrcost_table generic_addrcost_table =
{
+#if HAVE_DESIGNATED_INITIALIZERS
+ .addr_scale_costs =
+#endif
+ {
+ NAMED_PARAM (hi, 0),
+ NAMED_PARAM (si, 0),
+ NAMED_PARAM (di, 0),
+ NAMED_PARAM (ti, 0),
+ },
NAMED_PARAM (pre_modify, 0),
NAMED_PARAM (post_modify, 0),
NAMED_PARAM (register_offset, 0),
@@ -181,6 +191,27 @@ static const struct cpu_addrcost_table generic_addrcost_table =
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
+static const struct cpu_addrcost_table cortexa57_addrcost_table =
+{
+#if HAVE_DESIGNATED_INITIALIZERS
+ .addr_scale_costs =
+#endif
+ {
+ NAMED_PARAM (hi, 1),
+ NAMED_PARAM (si, 0),
+ NAMED_PARAM (di, 0),
+ NAMED_PARAM (ti, 1),
+ },
+ NAMED_PARAM (pre_modify, 0),
+ NAMED_PARAM (post_modify, 0),
+ NAMED_PARAM (register_offset, 0),
+ NAMED_PARAM (register_extend, 0),
+ NAMED_PARAM (imm_offset, 0),
+};
+
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
static const struct cpu_regmove_cost generic_regmove_cost =
{
NAMED_PARAM (GP2GP, 1),
@@ -214,6 +245,26 @@ static const struct cpu_vector_cost generic_vector_cost =
NAMED_PARAM (cond_not_taken_branch_cost, 1)
};
+/* Generic costs for vector insn classes. */
+#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
+__extension__
+#endif
+static const struct cpu_vector_cost cortexa57_vector_cost =
+{
+ NAMED_PARAM (scalar_stmt_cost, 1),
+ NAMED_PARAM (scalar_load_cost, 4),
+ NAMED_PARAM (scalar_store_cost, 1),
+ NAMED_PARAM (vec_stmt_cost, 3),
+ NAMED_PARAM (vec_to_scalar_cost, 8),
+ NAMED_PARAM (scalar_to_vec_cost, 8),
+ NAMED_PARAM (vec_align_load_cost, 5),
+ NAMED_PARAM (vec_unalign_load_cost, 5),
+ NAMED_PARAM (vec_unalign_store_cost, 1),
+ NAMED_PARAM (vec_store_cost, 1),
+ NAMED_PARAM (cond_taken_branch_cost, 1),
+ NAMED_PARAM (cond_not_taken_branch_cost, 1)
+};
+
#if HAVE_DESIGNATED_INITIALIZERS && GCC_VERSION >= 2007
__extension__
#endif
@@ -240,9 +291,9 @@ static const struct tune_params cortexa53_tunings =
static const struct tune_params cortexa57_tunings =
{
&cortexa57_extra_costs,
- &generic_addrcost_table,
+ &cortexa57_addrcost_table,
&generic_regmove_cost,
- &generic_vector_cost,
+ &cortexa57_vector_cost,
NAMED_PARAM (memmov_cost, 4),
NAMED_PARAM (issue_rate, 3)
};
@@ -446,7 +497,7 @@ aarch64_is_long_call_p (rtx sym)
represent an expression that matches an extend operation. The
operands represent the paramters from
- (extract (mult (reg) (mult_imm)) (extract_imm) (const_int 0)). */
+ (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
bool
aarch64_is_extend_from_extract (enum machine_mode mode, rtx mult_imm,
rtx extract_imm)
@@ -2463,12 +2514,22 @@ aarch64_final_eh_return_addr (void)
- 2 * UNITS_PER_WORD));
}
-/* Output code to build up a constant in a register. */
-static void
-aarch64_build_constant (int regnum, HOST_WIDE_INT val)
+/* Possibly output code to build up a constant in a register. For
+ the benefit of the costs infrastructure, returns the number of
+ instructions which would be emitted. GENERATE inhibits or
+ enables code generation. */
+
+static int
+aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
{
+ int insns = 0;
+
if (aarch64_bitmask_imm (val, DImode))
- emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+ {
+ if (generate)
+ emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
+ insns = 1;
+ }
else
{
int i;
@@ -2499,15 +2560,19 @@ aarch64_build_constant (int regnum, HOST_WIDE_INT val)
the same. */
if (ncount < zcount)
{
- emit_move_insn (gen_rtx_REG (Pmode, regnum),
- GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
+ if (generate)
+ emit_move_insn (gen_rtx_REG (Pmode, regnum),
+ GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
tval = 0xffff;
+ insns++;
}
else
{
- emit_move_insn (gen_rtx_REG (Pmode, regnum),
- GEN_INT (val & 0xffff));
+ if (generate)
+ emit_move_insn (gen_rtx_REG (Pmode, regnum),
+ GEN_INT (val & 0xffff));
tval = 0;
+ insns++;
}
val >>= 16;
@@ -2515,11 +2580,17 @@ aarch64_build_constant (int regnum, HOST_WIDE_INT val)
for (i = 16; i < 64; i += 16)
{
if ((val & 0xffff) != tval)
- emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
- GEN_INT (i), GEN_INT (val & 0xffff)));
+ {
+ if (generate)
+ emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
+ GEN_INT (i),
+ GEN_INT (val & 0xffff)));
+ insns++;
+ }
val >>= 16;
}
}
+ return insns;
}
static void
@@ -2534,7 +2605,7 @@ aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
if (mdelta >= 4096 * 4096)
{
- aarch64_build_constant (scratchreg, delta);
+ (void) aarch64_build_constant (scratchreg, delta, true);
emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
}
else if (mdelta > 0)
@@ -2608,7 +2679,7 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
addr = plus_constant (Pmode, temp0, vcall_offset);
else
{
- aarch64_build_constant (IP1_REGNUM, vcall_offset);
+ (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
addr = gen_rtx_PLUS (Pmode, temp0, temp1);
}
@@ -4471,12 +4542,12 @@ aarch64_strip_shift (rtx x)
return x;
}
-/* Helper function for rtx cost calculation. Strip a shift or extend
+/* Helper function for rtx cost calculation. Strip an extend
expression from X. Returns the inner operand if successful, or the
original expression on failure. We deal with a number of possible
canonicalization variations here. */
static rtx
-aarch64_strip_shift_or_extend (rtx x)
+aarch64_strip_extend (rtx x)
{
rtx op = x;
@@ -4512,7 +4583,246 @@ aarch64_strip_shift_or_extend (rtx x)
if (op != x)
return op;
- return aarch64_strip_shift (x);
+ return x;
+}
+
+/* Helper function for rtx cost calculation. Calculate the cost of
+ a MULT, which may be part of a multiply-accumulate rtx. Return
+ the calculated cost of the expression, recursing manually in to
+ operands where needed. */
+
+static int
+aarch64_rtx_mult_cost (rtx x, int code, int outer, bool speed)
+{
+ rtx op0, op1;
+ const struct cpu_cost_table *extra_cost
+ = aarch64_tune_params->insn_extra_cost;
+ int cost = 0;
+ bool maybe_fma = (outer == PLUS || outer == MINUS);
+ enum machine_mode mode = GET_MODE (x);
+
+ gcc_checking_assert (code == MULT);
+
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+ if (VECTOR_MODE_P (mode))
+ mode = GET_MODE_INNER (mode);
+
+ /* Integer multiply/fma. */
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ {
+ /* The multiply will be canonicalized as a shift, cost it as such. */
+ if (CONST_INT_P (op1)
+ && exact_log2 (INTVAL (op1)) > 0)
+ {
+ if (speed)
+ {
+ if (maybe_fma)
+ /* ADD (shifted register). */
+ cost += extra_cost->alu.arith_shift;
+ else
+ /* LSL (immediate). */
+ cost += extra_cost->alu.shift;
+ }
+
+ cost += rtx_cost (op0, GET_CODE (op0), 0, speed);
+
+ return cost;
+ }
+
+ /* Integer multiplies or FMAs have zero/sign extending variants. */
+ if ((GET_CODE (op0) == ZERO_EXTEND
+ && GET_CODE (op1) == ZERO_EXTEND)
+ || (GET_CODE (op0) == SIGN_EXTEND
+ && GET_CODE (op1) == SIGN_EXTEND))
+ {
+ cost += rtx_cost (XEXP (op0, 0), MULT, 0, speed)
+ + rtx_cost (XEXP (op1, 0), MULT, 1, speed);
+
+ if (speed)
+ {
+ if (maybe_fma)
+ /* MADD/SMADDL/UMADDL. */
+ cost += extra_cost->mult[0].extend_add;
+ else
+ /* MUL/SMULL/UMULL. */
+ cost += extra_cost->mult[0].extend;
+ }
+
+ return cost;
+ }
+
+ /* This is either an integer multiply or an FMA. In both cases
+ we want to recurse and cost the operands. */
+ cost += rtx_cost (op0, MULT, 0, speed)
+ + rtx_cost (op1, MULT, 1, speed);
+
+ if (speed)
+ {
+ if (maybe_fma)
+ /* MADD. */
+ cost += extra_cost->mult[mode == DImode].add;
+ else
+ /* MUL. */
+ cost += extra_cost->mult[mode == DImode].simple;
+ }
+
+ return cost;
+ }
+ else
+ {
+ if (speed)
+ {
+ /* Floating-point FMA can also support negations of the
+ operands. */
+ if (GET_CODE (op0) == NEG)
+ {
+ maybe_fma = true;
+ op0 = XEXP (op0, 0);
+ }
+ if (GET_CODE (op1) == NEG)
+ {
+ maybe_fma = true;
+ op1 = XEXP (op1, 0);
+ }
+
+ if (maybe_fma)
+ /* FMADD/FNMADD/FNMSUB/FMSUB. */
+ cost += extra_cost->fp[mode == DFmode].fma;
+ else
+ /* FMUL. */
+ cost += extra_cost->fp[mode == DFmode].mult;
+ }
+
+ cost += rtx_cost (op0, MULT, 0, speed)
+ + rtx_cost (op1, MULT, 1, speed);
+ return cost;
+ }
+}
+
+static int
+aarch64_address_cost (rtx x,
+ enum machine_mode mode,
+ addr_space_t as ATTRIBUTE_UNUSED,
+ bool speed)
+{
+ enum rtx_code c = GET_CODE (x);
+ const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
+ struct aarch64_address_info info;
+ int cost = 0;
+ info.shift = 0;
+
+ if (!aarch64_classify_address (&info, x, mode, c, false))
+ {
+ if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF)
+ {
+ /* This is a CONST or SYMBOL ref which will be split
+ in a different way depending on the code model in use.
+ Cost it through the generic infrastructure. */
+ int cost_symbol_ref = rtx_cost (x, MEM, 1, speed);
+ /* Divide through by the cost of one instruction to
+ bring it to the same units as the address costs. */
+ cost_symbol_ref /= COSTS_N_INSNS (1);
+ /* The cost is then the cost of preparing the address,
+ followed by an immediate (possibly 0) offset. */
+ return cost_symbol_ref + addr_cost->imm_offset;
+ }
+ else
+ {
+ /* This is most likely a jump table from a case
+ statement. */
+ return addr_cost->register_offset;
+ }
+ }
+
+ switch (info.type)
+ {
+ case ADDRESS_LO_SUM:
+ case ADDRESS_SYMBOLIC:
+ case ADDRESS_REG_IMM:
+ cost += addr_cost->imm_offset;
+ break;
+
+ case ADDRESS_REG_WB:
+ if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
+ cost += addr_cost->pre_modify;
+ else if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
+ cost += addr_cost->post_modify;
+ else
+ gcc_unreachable ();
+
+ break;
+
+ case ADDRESS_REG_REG:
+ cost += addr_cost->register_offset;
+ break;
+
+ case ADDRESS_REG_UXTW:
+ case ADDRESS_REG_SXTW:
+ cost += addr_cost->register_extend;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+
+ if (info.shift > 0)
+ {
+ /* For the sake of calculating the cost of the shifted register
+ component, we can treat same sized modes in the same way. */
+ switch (GET_MODE_BITSIZE (mode))
+ {
+ case 16:
+ cost += addr_cost->addr_scale_costs.hi;
+ break;
+
+ case 32:
+ cost += addr_cost->addr_scale_costs.si;
+ break;
+
+ case 64:
+ cost += addr_cost->addr_scale_costs.di;
+ break;
+
+ /* We can't tell, or this is a 128-bit vector. */
+ default:
+ cost += addr_cost->addr_scale_costs.ti;
+ break;
+ }
+ }
+
+ return cost;
+}
+
+/* Return true if the RTX X in mode MODE is a zero or sign extract
+ usable in an ADD or SUB (extended register) instruction. */
+static bool
+aarch64_rtx_arith_op_extract_p (rtx x, enum machine_mode mode)
+{
+ /* Catch add with a sign extract.
+ This is add_<optab><mode>_multp2. */
+ if (GET_CODE (x) == SIGN_EXTRACT
+ || GET_CODE (x) == ZERO_EXTRACT)
+ {
+ rtx op0 = XEXP (x, 0);
+ rtx op1 = XEXP (x, 1);
+ rtx op2 = XEXP (x, 2);
+
+ if (GET_CODE (op0) == MULT
+ && CONST_INT_P (op1)
+ && op2 == const0_rtx
+ && CONST_INT_P (XEXP (op0, 1))
+ && aarch64_is_extend_from_extract (mode,
+ XEXP (op0, 1),
+ op1))
+ {
+ return true;
+ }
+ }
+
+ return false;
}
/* Calculate the cost of calculating X, storing it in *COST. Result
@@ -4521,13 +4831,31 @@ static bool
aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
int param ATTRIBUTE_UNUSED, int *cost, bool speed)
{
- rtx op0, op1;
+ rtx op0, op1, op2;
const struct cpu_cost_table *extra_cost
= aarch64_tune_params->insn_extra_cost;
+ enum machine_mode mode = GET_MODE (x);
+
+ /* By default, assume that everything has equivalent cost to the
+ cheapest instruction. Any additional costs are applied as a delta
+ above this default. */
+ *cost = COSTS_N_INSNS (1);
+
+ /* TODO: The cost infrastructure currently does not handle
+ vector operations. Assume that all vector operations
+ are equally expensive. */
+ if (VECTOR_MODE_P (mode))
+ {
+ if (speed)
+ *cost += extra_cost->vect.alu;
+ return true;
+ }
switch (code)
{
case SET:
+ /* The cost depends entirely on the operands to SET. */
+ *cost = 0;
op0 = SET_DEST (x);
op1 = SET_SRC (x);
@@ -4535,25 +4863,47 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
{
case MEM:
if (speed)
- *cost += extra_cost->ldst.store;
+ {
+ rtx address = XEXP (op0, 0);
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ *cost += extra_cost->ldst.store;
+ else if (mode == SFmode)
+ *cost += extra_cost->ldst.storef;
+ else if (mode == DFmode)
+ *cost += extra_cost->ldst.stored;
+
+ *cost +=
+ COSTS_N_INSNS (aarch64_address_cost (address, mode,
+ 0, speed));
+ }
- if (op1 != const0_rtx)
- *cost += rtx_cost (op1, SET, 1, speed);
+ *cost += rtx_cost (op1, SET, 1, speed);
return true;
case SUBREG:
if (! REG_P (SUBREG_REG (op0)))
*cost += rtx_cost (SUBREG_REG (op0), SET, 0, speed);
+
/* Fall through. */
case REG:
- /* Cost is just the cost of the RHS of the set. */
- *cost += rtx_cost (op1, SET, 1, true);
+ /* const0_rtx is in general free, but we will use an
+ instruction to set a register to 0. */
+ if (REG_P (op1) || op1 == const0_rtx)
+ {
+ /* The cost is 1 per register copied. */
+ int n_minus_1 = (GET_MODE_SIZE (GET_MODE (op0)) - 1)
+ / UNITS_PER_WORD;
+ *cost = COSTS_N_INSNS (n_minus_1 + 1);
+ }
+ else
+ /* Cost is just the cost of the RHS of the set. */
+ *cost += rtx_cost (op1, SET, 1, speed);
return true;
- case ZERO_EXTRACT: /* Bit-field insertion. */
+ case ZERO_EXTRACT:
case SIGN_EXTRACT:
- /* Strip any redundant widening of the RHS to meet the width of
- the target. */
+ /* Bit-field insertion. Strip any redundant widening of
+ the RHS to meet the width of the target. */
if (GET_CODE (op1) == SUBREG)
op1 = SUBREG_REG (op1);
if ((GET_CODE (op1) == ZERO_EXTEND
@@ -4562,24 +4912,138 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
&& (GET_MODE_BITSIZE (GET_MODE (XEXP (op1, 0)))
>= INTVAL (XEXP (op0, 1))))
op1 = XEXP (op1, 0);
- *cost += rtx_cost (op1, SET, 1, speed);
+
+ if (CONST_INT_P (op1))
+ {
+ /* MOV immediate is assumed to always be cheap. */
+ *cost = COSTS_N_INSNS (1);
+ }
+ else
+ {
+ /* BFM. */
+ if (speed)
+ *cost += extra_cost->alu.bfi;
+ *cost += rtx_cost (op1, (enum rtx_code) code, 1, speed);
+ }
+
return true;
default:
+ /* We can't make sense of this, assume default cost. */
+ *cost = COSTS_N_INSNS (1);
break;
}
return false;
+ case CONST_INT:
+ /* If an instruction can incorporate a constant within the
+ instruction, the instruction's expression avoids calling
+ rtx_cost() on the constant. If rtx_cost() is called on a
+ constant, then it is usually because the constant must be
+ moved into a register by one or more instructions.
+
+ The exception is constant 0, which can be expressed
+ as XZR/WZR and is therefore free. The exception to this is
+ if we have (set (reg) (const0_rtx)) in which case we must cost
+ the move. However, we can catch that when we cost the SET, so
+ we don't need to consider that here. */
+ if (x == const0_rtx)
+ *cost = 0;
+ else
+ {
+ /* To an approximation, building any other constant is
+ proportionally expensive to the number of instructions
+ required to build that constant. This is true whether we
+ are compiling for SPEED or otherwise. */
+ *cost = COSTS_N_INSNS (aarch64_build_constant (0,
+ INTVAL (x),
+ false));
+ }
+ return true;
+
+ case CONST_DOUBLE:
+ if (speed)
+ {
+ /* mov[df,sf]_aarch64. */
+ if (aarch64_float_const_representable_p (x))
+ /* FMOV (scalar immediate). */
+ *cost += extra_cost->fp[mode == DFmode].fpconst;
+ else if (!aarch64_float_const_zero_rtx_p (x))
+ {
+ /* This will be a load from memory. */
+ if (mode == DFmode)
+ *cost += extra_cost->ldst.loadd;
+ else
+ *cost += extra_cost->ldst.loadf;
+ }
+ else
+ /* Otherwise this is +0.0. We get this using MOVI d0, #0
+ or MOV v0.s[0], wzr - neither of which are modeled by the
+ cost tables. Just use the default cost. */
+ {
+ }
+ }
+
+ return true;
+
case MEM:
if (speed)
- *cost += extra_cost->ldst.load;
+ {
+ /* For loads we want the base cost of a load, plus an
+ approximation for the additional cost of the addressing
+ mode. */
+ rtx address = XEXP (x, 0);
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ *cost += extra_cost->ldst.load;
+ else if (mode == SFmode)
+ *cost += extra_cost->ldst.loadf;
+ else if (mode == DFmode)
+ *cost += extra_cost->ldst.loadd;
+
+ *cost +=
+ COSTS_N_INSNS (aarch64_address_cost (address, mode,
+ 0, speed));
+ }
return true;
case NEG:
- op0 = CONST0_RTX (GET_MODE (x));
- op1 = XEXP (x, 0);
- goto cost_minus;
+ op0 = XEXP (x, 0);
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ {
+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+ {
+ /* CSETM. */
+ *cost += rtx_cost (XEXP (op0, 0), NEG, 0, speed);
+ return true;
+ }
+
+ /* Cost this as SUB wzr, X. */
+ op0 = CONST0_RTX (GET_MODE (x));
+ op1 = XEXP (x, 0);
+ goto cost_minus;
+ }
+
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ /* Support (neg(fma...)) as a single instruction only if
+ sign of zeros is unimportant. This matches the decision
+ making in aarch64.md. */
+ if (GET_CODE (op0) == FMA && !HONOR_SIGNED_ZEROS (GET_MODE (op0)))
+ {
+ /* FNMADD. */
+ *cost = rtx_cost (op0, NEG, 0, speed);
+ return true;
+ }
+ if (speed)
+ /* FNEG. */
+ *cost += extra_cost->fp[mode == DFmode].neg;
+ return false;
+ }
+
+ return false;
case COMPARE:
op0 = XEXP (x, 0);
@@ -4592,94 +5056,208 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
goto cost_logic;
}
- /* Comparisons can work if the order is swapped.
- Canonicalization puts the more complex operation first, but
- we want it in op1. */
- if (! (REG_P (op0)
- || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
- {
- op0 = XEXP (x, 1);
- op1 = XEXP (x, 0);
- }
- goto cost_minus;
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
+ {
+ /* TODO: A write to the CC flags possibly costs extra, this
+ needs encoding in the cost tables. */
+
+ /* CC_ZESWPmode supports zero extend for free. */
+ if (GET_MODE (x) == CC_ZESWPmode && GET_CODE (op0) == ZERO_EXTEND)
+ op0 = XEXP (op0, 0);
+
+ /* ANDS. */
+ if (GET_CODE (op0) == AND)
+ {
+ x = op0;
+ goto cost_logic;
+ }
+
+ if (GET_CODE (op0) == PLUS)
+ {
+ /* ADDS (and CMN alias). */
+ x = op0;
+ goto cost_plus;
+ }
+
+ if (GET_CODE (op0) == MINUS)
+ {
+ /* SUBS. */
+ x = op0;
+ goto cost_minus;
+ }
+
+ if (GET_CODE (op1) == NEG)
+ {
+ /* CMN. */
+ if (speed)
+ *cost += extra_cost->alu.arith;
+
+ *cost += rtx_cost (op0, COMPARE, 0, speed);
+ *cost += rtx_cost (XEXP (op1, 0), NEG, 1, speed);
+ return true;
+ }
+
+ /* CMP.
+
+ Compare can freely swap the order of operands, and
+ canonicalization puts the more complex operation first.
+ But the integer MINUS logic expects the shift/extend
+ operation in op1. */
+ if (! (REG_P (op0)
+ || (GET_CODE (op0) == SUBREG && REG_P (SUBREG_REG (op0)))))
+ {
+ op0 = XEXP (x, 1);
+ op1 = XEXP (x, 0);
+ }
+ goto cost_minus;
+ }
+
+ if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
+ {
+ /* FCMP. */
+ if (speed)
+ *cost += extra_cost->fp[mode == DFmode].compare;
+
+ if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
+ {
+ /* FCMP supports constant 0.0 for no extra cost. */
+ return true;
+ }
+ return false;
+ }
+
+ return false;
case MINUS:
- op0 = XEXP (x, 0);
- op1 = XEXP (x, 1);
+ {
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+
+cost_minus:
+ /* Detect valid immediates. */
+ if ((GET_MODE_CLASS (mode) == MODE_INT
+ || (GET_MODE_CLASS (mode) == MODE_CC
+ && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
+ && CONST_INT_P (op1)
+ && aarch64_uimm12_shift (INTVAL (op1)))
+ {
+ *cost += rtx_cost (op0, MINUS, 0, speed);
- cost_minus:
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
- || (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC
- && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT))
- {
- if (op0 != const0_rtx)
+ if (speed)
+ /* SUB(S) (immediate). */
+ *cost += extra_cost->alu.arith;
+ return true;
+
+ }
+
+ /* Look for SUB (extended register). */
+ if (aarch64_rtx_arith_op_extract_p (op1, mode))
+ {
+ if (speed)
+ *cost += extra_cost->alu.arith_shift;
+
+ *cost += rtx_cost (XEXP (XEXP (op1, 0), 0),
+ (enum rtx_code) GET_CODE (op1),
+ 0, speed);
+ return true;
+ }
+
+ rtx new_op1 = aarch64_strip_extend (op1);
+
+ /* Cost this as an FMA-alike operation. */
+ if ((GET_CODE (new_op1) == MULT
+ || GET_CODE (new_op1) == ASHIFT)
+ && code != COMPARE)
+ {
+ *cost += aarch64_rtx_mult_cost (new_op1, MULT,
+ (enum rtx_code) code,
+ speed);
*cost += rtx_cost (op0, MINUS, 0, speed);
+ return true;
+ }
- if (CONST_INT_P (op1))
- {
- if (!aarch64_uimm12_shift (INTVAL (op1)))
- *cost += rtx_cost (op1, MINUS, 1, speed);
- }
- else
- {
- op1 = aarch64_strip_shift_or_extend (op1);
- *cost += rtx_cost (op1, MINUS, 1, speed);
- }
- return true;
- }
+ *cost += rtx_cost (new_op1, MINUS, 1, speed);
- return false;
+ if (speed)
+ {
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ /* SUB(S). */
+ *cost += extra_cost->alu.arith;
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ /* FSUB. */
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+ }
+ return true;
+ }
case PLUS:
- op0 = XEXP (x, 0);
- op1 = XEXP (x, 1);
+ {
+ rtx new_op0;
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
- {
- if (CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1)))
- {
- *cost += rtx_cost (op0, PLUS, 0, speed);
- }
- else
- {
- rtx new_op0 = aarch64_strip_shift_or_extend (op0);
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
- if (new_op0 == op0
- && GET_CODE (op0) == MULT)
- {
- if ((GET_CODE (XEXP (op0, 0)) == ZERO_EXTEND
- && GET_CODE (XEXP (op0, 1)) == ZERO_EXTEND)
- || (GET_CODE (XEXP (op0, 0)) == SIGN_EXTEND
- && GET_CODE (XEXP (op0, 1)) == SIGN_EXTEND))
- {
- *cost += (rtx_cost (XEXP (XEXP (op0, 0), 0), MULT, 0,
- speed)
- + rtx_cost (XEXP (XEXP (op0, 1), 0), MULT, 1,
- speed)
- + rtx_cost (op1, PLUS, 1, speed));
- if (speed)
- *cost +=
- extra_cost->mult[GET_MODE (x) == DImode].extend_add;
- return true;
- }
+cost_plus:
+ if (GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMPARE
+ || GET_RTX_CLASS (GET_CODE (op0)) == RTX_COMM_COMPARE)
+ {
+ /* CSINC. */
+ *cost += rtx_cost (XEXP (op0, 0), PLUS, 0, speed);
+ *cost += rtx_cost (op1, PLUS, 1, speed);
+ return true;
+ }
- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
- + rtx_cost (XEXP (op0, 1), MULT, 1, speed)
- + rtx_cost (op1, PLUS, 1, speed));
+ if (GET_MODE_CLASS (mode) == MODE_INT
+ && CONST_INT_P (op1)
+ && aarch64_uimm12_shift (INTVAL (op1)))
+ {
+ *cost += rtx_cost (op0, PLUS, 0, speed);
- if (speed)
- *cost += extra_cost->mult[GET_MODE (x) == DImode].add;
+ if (speed)
+ /* ADD (immediate). */
+ *cost += extra_cost->alu.arith;
+ return true;
+ }
- return true;
- }
+ /* Look for ADD (extended register). */
+ if (aarch64_rtx_arith_op_extract_p (op0, mode))
+ {
+ if (speed)
+ *cost += extra_cost->alu.arith_shift;
- *cost += (rtx_cost (new_op0, PLUS, 0, speed)
- + rtx_cost (op1, PLUS, 1, speed));
- }
- return true;
- }
+ *cost += rtx_cost (XEXP (XEXP (op0, 0), 0),
+ (enum rtx_code) GET_CODE (op0),
+ 0, speed);
+ return true;
+ }
- return false;
+ /* Strip any extend, leave shifts behind as we will
+ cost them through mult_cost. */
+ new_op0 = aarch64_strip_extend (op0);
+
+ if (GET_CODE (new_op0) == MULT
+ || GET_CODE (new_op0) == ASHIFT)
+ {
+ *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
+ speed);
+ *cost += rtx_cost (op1, PLUS, 1, speed);
+ return true;
+ }
+
+ *cost += (rtx_cost (new_op0, PLUS, 0, speed)
+ + rtx_cost (op1, PLUS, 1, speed));
+
+ if (speed)
+ {
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ /* ADD. */
+ *cost += extra_cost->alu.arith;
+ else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ /* FADD. */
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+ }
+ return true;
+ }
case IOR:
case XOR:
@@ -4688,117 +5266,252 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
+ if (code == AND
+ && GET_CODE (op0) == MULT
+ && CONST_INT_P (XEXP (op0, 1))
+ && CONST_INT_P (op1)
+ && aarch64_uxt_size (exact_log2 (INTVAL (XEXP (op0, 1))),
+ INTVAL (op1)) != 0)
+ {
+ /* This is a UBFM/SBFM. */
+ *cost += rtx_cost (XEXP (op0, 0), ZERO_EXTRACT, 0, speed);
+ if (speed)
+ *cost += extra_cost->alu.bfx;
+ return true;
+ }
+
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
{
+ /* We possibly get the immediate for free, this is not
+ modelled. */
if (CONST_INT_P (op1)
&& aarch64_bitmask_imm (INTVAL (op1), GET_MODE (x)))
{
- *cost += rtx_cost (op0, AND, 0, speed);
+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+
+ if (speed)
+ *cost += extra_cost->alu.logical;
+
+ return true;
}
else
{
+ rtx new_op0 = op0;
+
+ /* Handle ORN, EON, or BIC. */
if (GET_CODE (op0) == NOT)
op0 = XEXP (op0, 0);
- op0 = aarch64_strip_shift (op0);
- *cost += (rtx_cost (op0, AND, 0, speed)
- + rtx_cost (op1, AND, 1, speed));
+
+ new_op0 = aarch64_strip_shift (op0);
+
+ /* If we had a shift on op0 then this is a logical-shift-
+ by-register/immediate operation. Otherwise, this is just
+ a logical operation. */
+ if (speed)
+ {
+ if (new_op0 != op0)
+ {
+ /* Shift by immediate. */
+ if (CONST_INT_P (XEXP (op0, 1)))
+ *cost += extra_cost->alu.log_shift;
+ else
+ *cost += extra_cost->alu.log_shift_reg;
+ }
+ else
+ *cost += extra_cost->alu.logical;
+ }
+
+ /* In both cases we want to cost both operands. */
+ *cost += rtx_cost (new_op0, (enum rtx_code) code, 0, speed)
+ + rtx_cost (op1, (enum rtx_code) code, 1, speed);
+
+ return true;
}
- return true;
}
return false;
+ case NOT:
+ /* MVN. */
+ if (speed)
+ *cost += extra_cost->alu.logical;
+
+ /* The logical instruction could have the shifted register form,
+ but the cost is the same if the shift is processed as a separate
+ instruction, so we don't bother with it here. */
+ return false;
+
case ZERO_EXTEND:
- if ((GET_MODE (x) == DImode
- && GET_MODE (XEXP (x, 0)) == SImode)
- || GET_CODE (XEXP (x, 0)) == MEM)
+
+ op0 = XEXP (x, 0);
+ /* If a value is written in SI mode, then zero extended to DI
+ mode, the operation will in general be free as a write to
+ a 'w' register implicitly zeroes the upper bits of an 'x'
+ register. However, if this is
+
+ (set (reg) (zero_extend (reg)))
+
+ we must cost the explicit register move. */
+ if (mode == DImode
+ && GET_MODE (op0) == SImode
+ && outer == SET)
{
- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+ int op_cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, 0, speed);
+
+ if (!op_cost && speed)
+ /* MOV. */
+ *cost += extra_cost->alu.extend;
+ else
+ /* Free, the cost is that of the SI mode operation. */
+ *cost = op_cost;
+
return true;
}
+ else if (MEM_P (XEXP (x, 0)))
+ {
+ /* All loads can zero extend to any size for free. */
+ *cost = rtx_cost (XEXP (x, 0), ZERO_EXTEND, param, speed);
+ return true;
+ }
+
+ /* UXTB/UXTH. */
+ if (speed)
+ *cost += extra_cost->alu.extend;
+
return false;
case SIGN_EXTEND:
- if (GET_CODE (XEXP (x, 0)) == MEM)
+ if (MEM_P (XEXP (x, 0)))
{
- *cost += rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed);
+ /* LDRSH. */
+ if (speed)
+ {
+ rtx address = XEXP (XEXP (x, 0), 0);
+ *cost += extra_cost->ldst.load_sign_extend;
+
+ *cost +=
+ COSTS_N_INSNS (aarch64_address_cost (address, mode,
+ 0, speed));
+ }
return true;
}
+
+ if (speed)
+ *cost += extra_cost->alu.extend;
return false;
- case ROTATE:
- if (!CONST_INT_P (XEXP (x, 1)))
- *cost += COSTS_N_INSNS (2);
- /* Fall through. */
- case ROTATERT:
- case LSHIFTRT:
case ASHIFT:
- case ASHIFTRT:
-
- /* Shifting by a register often takes an extra cycle. */
- if (speed && !CONST_INT_P (XEXP (x, 1)))
- *cost += extra_cost->alu.arith_shift_reg;
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
- *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed);
- return true;
+ if (CONST_INT_P (op1))
+ {
+ /* LSL (immediate), UBMF, UBFIZ and friends. These are all
+ aliases. */
+ if (speed)
+ *cost += extra_cost->alu.shift;
- case HIGH:
- if (!CONSTANT_P (XEXP (x, 0)))
- *cost += rtx_cost (XEXP (x, 0), HIGH, 0, speed);
- return true;
+ /* We can incorporate zero/sign extend for free. */
+ if (GET_CODE (op0) == ZERO_EXTEND
+ || GET_CODE (op0) == SIGN_EXTEND)
+ op0 = XEXP (op0, 0);
- case LO_SUM:
- if (!CONSTANT_P (XEXP (x, 1)))
- *cost += rtx_cost (XEXP (x, 1), LO_SUM, 1, speed);
- *cost += rtx_cost (XEXP (x, 0), LO_SUM, 0, speed);
- return true;
+ *cost += rtx_cost (op0, ASHIFT, 0, speed);
+ return true;
+ }
+ else
+ {
+ /* LSLV. */
+ if (speed)
+ *cost += extra_cost->alu.shift_reg;
- case ZERO_EXTRACT:
- case SIGN_EXTRACT:
- *cost += rtx_cost (XEXP (x, 0), ZERO_EXTRACT, 0, speed);
- return true;
+ return false; /* All arguments need to be in registers. */
+ }
- case MULT:
+ case ROTATE:
+ case ROTATERT:
+ case LSHIFTRT:
+ case ASHIFTRT:
op0 = XEXP (x, 0);
op1 = XEXP (x, 1);
- *cost = COSTS_N_INSNS (1);
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ if (CONST_INT_P (op1))
{
- if (CONST_INT_P (op1)
- && exact_log2 (INTVAL (op1)) > 0)
- {
- *cost += rtx_cost (op0, ASHIFT, 0, speed);
- return true;
- }
+ /* ASR (immediate) and friends. */
+ if (speed)
+ *cost += extra_cost->alu.shift;
- if ((GET_CODE (op0) == ZERO_EXTEND
- && GET_CODE (op1) == ZERO_EXTEND)
- || (GET_CODE (op0) == SIGN_EXTEND
- && GET_CODE (op1) == SIGN_EXTEND))
- {
- *cost += (rtx_cost (XEXP (op0, 0), MULT, 0, speed)
- + rtx_cost (XEXP (op1, 0), MULT, 1, speed));
- if (speed)
- *cost += extra_cost->mult[GET_MODE (x) == DImode].extend;
- return true;
- }
+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+ return true;
+ }
+ else
+ {
+ /* ASR (register) and friends. */
if (speed)
- *cost += extra_cost->mult[GET_MODE (x) == DImode].simple;
+ *cost += extra_cost->alu.shift_reg;
+
+ return false; /* All arguments need to be in registers. */
}
- else if (speed)
+
+ case SYMBOL_REF:
+
+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE)
{
- if (GET_MODE (x) == DFmode)
- *cost += extra_cost->fp[1].mult;
- else if (GET_MODE (x) == SFmode)
- *cost += extra_cost->fp[0].mult;
+ /* LDR. */
+ if (speed)
+ *cost += extra_cost->ldst.load;
+ }
+ else if (aarch64_cmodel == AARCH64_CMODEL_SMALL
+ || aarch64_cmodel == AARCH64_CMODEL_SMALL_PIC)
+ {
+ /* ADRP, followed by ADD. */
+ *cost += COSTS_N_INSNS (1);
+ if (speed)
+ *cost += 2 * extra_cost->alu.arith;
+ }
+ else if (aarch64_cmodel == AARCH64_CMODEL_TINY
+ || aarch64_cmodel == AARCH64_CMODEL_TINY_PIC)
+ {
+ /* ADR. */
+ if (speed)
+ *cost += extra_cost->alu.arith;
}
- return false; /* All arguments need to be in registers. */
+ if (flag_pic)
+ {
+ /* One extra load instruction, after accessing the GOT. */
+ *cost += COSTS_N_INSNS (1);
+ if (speed)
+ *cost += extra_cost->ldst.load;
+ }
+ return true;
+
+ case HIGH:
+ case LO_SUM:
+ /* ADRP/ADD (immediate). */
+ if (speed)
+ *cost += extra_cost->alu.arith;
+ return true;
+
+ case ZERO_EXTRACT:
+ case SIGN_EXTRACT:
+ /* UBFX/SBFX. */
+ if (speed)
+ *cost += extra_cost->alu.bfx;
+
+ /* We can trust that the immediates used will be correct (there
+ are no by-register forms), so we need only cost op0. */
+ *cost += rtx_cost (XEXP (x, 0), (enum rtx_code) code, 0, speed);
+ return true;
+
+ case MULT:
+ *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
+ /* aarch64_rtx_mult_cost always handles recursion to its
+ operands. */
+ return true;
case MOD:
case UMOD:
- *cost = COSTS_N_INSNS (2);
if (speed)
{
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
@@ -4815,53 +5528,213 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
case DIV:
case UDIV:
- *cost = COSTS_N_INSNS (1);
+ case SQRT:
if (speed)
{
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
- *cost += extra_cost->mult[GET_MODE (x) == DImode].idiv;
- else if (GET_MODE (x) == DFmode)
- *cost += extra_cost->fp[1].div;
- else if (GET_MODE (x) == SFmode)
- *cost += extra_cost->fp[0].div;
+ if (GET_MODE_CLASS (mode) == MODE_INT)
+ /* There is no integer SQRT, so only DIV and UDIV can get
+ here. */
+ *cost += extra_cost->mult[mode == DImode].idiv;
+ else
+ *cost += extra_cost->fp[mode == DFmode].div;
}
return false; /* All arguments need to be in registers. */
- default:
- break;
- }
- return false;
-}
+ case IF_THEN_ELSE:
+ op2 = XEXP (x, 2);
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
-static int
-aarch64_address_cost (rtx x ATTRIBUTE_UNUSED,
- enum machine_mode mode ATTRIBUTE_UNUSED,
- addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
-{
- enum rtx_code c = GET_CODE (x);
- const struct cpu_addrcost_table *addr_cost = aarch64_tune_params->addr_cost;
+ if (GET_CODE (op1) == PC || GET_CODE (op2) == PC)
+ {
+ /* Conditional branch. */
+ if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
+ return true;
+ else
+ {
+ if (GET_CODE (op0) == NE
+ || GET_CODE (op0) == EQ)
+ {
+ rtx inner = XEXP (op0, 0);
+ rtx comparator = XEXP (op0, 1);
- if (c == PRE_INC || c == PRE_DEC || c == PRE_MODIFY)
- return addr_cost->pre_modify;
+ if (comparator == const0_rtx)
+ {
+ /* TBZ/TBNZ/CBZ/CBNZ. */
+ if (GET_CODE (inner) == ZERO_EXTRACT)
+ /* TBZ/TBNZ. */
+ *cost += rtx_cost (XEXP (inner, 0), ZERO_EXTRACT,
+ 0, speed);
+ else
+ /* CBZ/CBNZ. */
+ *cost += rtx_cost (inner, GET_CODE (op0), 0, speed);
- if (c == POST_INC || c == POST_DEC || c == POST_MODIFY)
- return addr_cost->post_modify;
+ return true;
+ }
+ }
+ else if (GET_CODE (op0) == LT
+ || GET_CODE (op0) == GE)
+ {
+ rtx comparator = XEXP (op0, 1);
- if (c == PLUS)
- {
- if (GET_CODE (XEXP (x, 1)) == CONST_INT)
- return addr_cost->imm_offset;
- else if (GET_CODE (XEXP (x, 0)) == MULT
- || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
- || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
- return addr_cost->register_extend;
+ /* TBZ/TBNZ. */
+ if (comparator == const0_rtx)
+ return true;
+ }
+ }
+ }
+ else if (GET_MODE_CLASS (GET_MODE (XEXP (op0, 0))) == MODE_CC)
+ {
+ /* It's a conditional operation based on the status flags,
+ so it must be some flavor of CSEL. */
+
+ /* CSNEG, CSINV, and CSINC are handled for free as part of CSEL. */
+ if (GET_CODE (op1) == NEG
+ || GET_CODE (op1) == NOT
+ || (GET_CODE (op1) == PLUS && XEXP (op1, 1) == const1_rtx))
+ op1 = XEXP (op1, 0);
+
+ *cost += rtx_cost (op1, IF_THEN_ELSE, 1, speed);
+ *cost += rtx_cost (op2, IF_THEN_ELSE, 2, speed);
+ return true;
+ }
- return addr_cost->register_offset;
- }
- else if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
- return addr_cost->imm_offset;
+ /* We don't know what this is, cost all operands. */
+ return false;
- return 0;
+ case EQ:
+ case NE:
+ case GT:
+ case GTU:
+ case LT:
+ case LTU:
+ case GE:
+ case GEU:
+ case LE:
+ case LEU:
+
+ return false; /* All arguments must be in registers. */
+
+ case FMA:
+ op0 = XEXP (x, 0);
+ op1 = XEXP (x, 1);
+ op2 = XEXP (x, 2);
+
+ if (speed)
+ *cost += extra_cost->fp[mode == DFmode].fma;
+
+ /* FMSUB, FNMADD, and FNMSUB are free. */
+ if (GET_CODE (op0) == NEG)
+ op0 = XEXP (op0, 0);
+
+ if (GET_CODE (op2) == NEG)
+ op2 = XEXP (op2, 0);
+
+ /* aarch64_fnma4_elt_to_64v2df has the NEG as operand 1,
+ and the by-element operand as operand 0. */
+ if (GET_CODE (op1) == NEG)
+ op1 = XEXP (op1, 0);
+
+ /* Catch vector-by-element operations. The by-element operand can
+ either be (vec_duplicate (vec_select (x))) or just
+ (vec_select (x)), depending on whether we are multiplying by
+ a vector or a scalar.
+
+ Canonicalization is not very good in these cases, FMA4 will put the
+ by-element operand as operand 0, FNMA4 will have it as operand 1. */
+ if (GET_CODE (op0) == VEC_DUPLICATE)
+ op0 = XEXP (op0, 0);
+ else if (GET_CODE (op1) == VEC_DUPLICATE)
+ op1 = XEXP (op1, 0);
+
+ if (GET_CODE (op0) == VEC_SELECT)
+ op0 = XEXP (op0, 0);
+ else if (GET_CODE (op1) == VEC_SELECT)
+ op1 = XEXP (op1, 0);
+
+ /* If the remaining parameters are not registers,
+ get the cost to put them into registers. */
+ *cost += rtx_cost (op0, FMA, 0, speed);
+ *cost += rtx_cost (op1, FMA, 1, speed);
+ *cost += rtx_cost (op2, FMA, 2, speed);
+ return true;
+
+ case FLOAT_EXTEND:
+ if (speed)
+ *cost += extra_cost->fp[mode == DFmode].widen;
+ return false;
+
+ case FLOAT_TRUNCATE:
+ if (speed)
+ *cost += extra_cost->fp[mode == DFmode].narrow;
+ return false;
+
+ case ABS:
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ /* FABS and FNEG are analogous. */
+ if (speed)
+ *cost += extra_cost->fp[mode == DFmode].neg;
+ }
+ else
+ {
+ /* Integer ABS will either be split to
+ two arithmetic instructions, or will be an ABS
+ (scalar), which we don't model. */
+ *cost = COSTS_N_INSNS (2);
+ if (speed)
+ *cost += 2 * extra_cost->alu.arith;
+ }
+ return false;
+
+ case SMAX:
+ case SMIN:
+ if (speed)
+ {
+ /* FMAXNM/FMINNM/FMAX/FMIN.
+ TODO: This may not be accurate for all implementations, but
+ we do not model this in the cost tables. */
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+ }
+ return false;
+
+ case TRUNCATE:
+
+ /* Decompose <su>muldi3_highpart. */
+ if (/* (truncate:DI */
+ mode == DImode
+ /* (lshiftrt:TI */
+ && GET_MODE (XEXP (x, 0)) == TImode
+ && GET_CODE (XEXP (x, 0)) == LSHIFTRT
+ /* (mult:TI */
+ && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
+ /* (ANY_EXTEND:TI (reg:DI))
+ (ANY_EXTEND:TI (reg:DI))) */
+ && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)
+ || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
+ && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND))
+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0)) == DImode
+ && GET_MODE (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0)) == DImode
+ /* (const_int 64) */
+ && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+ && UINTVAL (XEXP (XEXP (x, 0), 1)) == 64)
+ {
+ /* UMULH/SMULH. */
+ if (speed)
+ *cost += extra_cost->mult[mode == DImode].extend;
+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 0),
+ MULT, 0, speed);
+ *cost += rtx_cost (XEXP (XEXP (XEXP (XEXP (x, 0), 0), 1), 0),
+ MULT, 1, speed);
+ return true;
+ }
+
+ /* Fall through. */
+ default:
+ return true;
+ }
+ return false;
}
static int