aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorktkachov <ktkachov@138bc75d-0d04-0410-961f-82ee72b054a4>2015-04-30 16:59:50 (GMT)
committerDan Albert <danalbert@google.com>2015-09-09 21:23:35 (GMT)
commita981e4500a6364a7af778544cbcbc22bffc4cb58 (patch)
tree61eb6d9470c2d229b91337a7659e90ef21f505b8
parent3a0cddad3cd2639a22002ae5f3102441414f8012 (diff)
downloadtoolchain_gcc-a981e4500a6364a7af778544cbcbc22bffc4cb58.zip
toolchain_gcc-a981e4500a6364a7af778544cbcbc22bffc4cb58.tar.gz
toolchain_gcc-a981e4500a6364a7af778544cbcbc22bffc4cb58.tar.bz2
[4.9] Add several improvements for AArch64
Backported from GCC 5. 2015-04-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Properly handle mvn-register and add EON+shift pattern and cost appropriately * config/aarch64/aarch64.md (*eor_one_cmpl_<SHIFT:optab><mode>3_alt): New pattern. (*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze): Likewise. * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle MVN-shift appropriately. Handle alternative EON form. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222637 138bc75d-0d04-0410-961f-82ee72b054a4 2015-04-30 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Properly cost FABD pattern * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle pattern for fabd in ABS case. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222638 138bc75d-0d04-0410-961f-82ee72b054a4 2015-05-01 Kyrylo Tkachov <kyrylo.tkachov@arm.com> Remember to cost operand 0 in FP compare-with-0.0 case * config/aarch64/aarch64.c (aarch64_rtx_costs, COMPARE case): Add cost of op0 in the compare-with-fpzero case. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222673 138bc75d-0d04-0410-961f-82ee72b054a4 2015-05-01 Wilco Dijkstra <wdijkstr@arm.com> Fix aarch64_rtx_costs of PLUS/MINUS * gcc/config/aarch64/aarch64.c (aarch64_rtx_costs): Calculate cost of op0 and op1 in PLUS and MINUS cases. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222676 138bc75d-0d04-0410-961f-82ee72b054a4 2015-07-27 Wilco Dijkstra <wdijkstr@arm.com> Improve spill code - swap order in shl pattern * config/aarch64/aarch64.md (aarch64_ashl_sisd_or_int_<mode>3): Place integer variant first. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226247 138bc75d-0d04-0410-961f-82ee72b054a4 2015-07-27 Wilco Dijkstra <wdijkstr@arm.com> Improve spill code - swap order in shr patterns * gcc/config/aarch64/aarch64.md (aarch64_lshr_sisd_or_int_<mode>3): Place integer variant first. (aarch64_ashr_sisd_or_int_<mode>3): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226253 138bc75d-0d04-0410-961f-82ee72b054a4 2015-08-04 Pawel Kupidura <pawel.kupidura@arm.com> * config/aarch64/aarch64.c: Change inner loop statement cost to be consistent with other targets. Change-Id: If5b10466302d733fcae6eacc9d128fdb8f95c0de git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226575 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.c75
-rw-r--r--gcc-4.9/gcc/config/aarch64/aarch64.md67
2 files changed, 103 insertions, 39 deletions
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c
index 8006784..09f2d2c 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.c
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.c
@@ -5120,6 +5120,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1))
{
+ *cost += rtx_cost (op0, COMPARE, 0, speed);
/* FCMP supports constant 0.0 for no extra cost. */
return true;
}
@@ -5134,6 +5135,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED,
op1 = XEXP (x, 1);
cost_minus:
+ *cost += rtx_cost (op0, MINUS, 0, speed);
+
/* Detect valid immediates. */
if ((GET_MODE_CLASS (mode) == MODE_INT
|| (GET_MODE_CLASS (mode) == MODE_CC
@@ -5141,13 +5144,10 @@ cost_minus:
&& CONST_INT_P (op1)
&& aarch64_uimm12_shift (INTVAL (op1)))
{
- *cost += rtx_cost (op0, MINUS, 0, speed);
-
if (speed)
/* SUB(S) (immediate). */
*cost += extra_cost->alu.arith;
return true;
-
}
/* Look for SUB (extended register). */
@@ -5172,7 +5172,6 @@ cost_minus:
*cost += aarch64_rtx_mult_cost (new_op1, MULT,
(enum rtx_code) code,
speed);
- *cost += rtx_cost (op0, MINUS, 0, speed);
return true;
}
@@ -5219,6 +5218,8 @@ cost_plus:
return true;
}
+ *cost += rtx_cost (op1, PLUS, 1, speed);
+
/* Look for ADD (extended register). */
if (aarch64_rtx_arith_op_extract_p (op0, mode))
{
@@ -5240,12 +5241,10 @@ cost_plus:
{
*cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS,
speed);
- *cost += rtx_cost (op1, PLUS, 1, speed);
return true;
}
- *cost += (rtx_cost (new_op0, PLUS, 0, speed)
- + rtx_cost (op1, PLUS, 1, speed));
+ *cost += rtx_cost (new_op0, PLUS, 0, speed);
if (speed)
{
@@ -5331,13 +5330,45 @@ cost_plus:
return false;
case NOT:
+ x = XEXP (x, 0);
+ op0 = aarch64_strip_shift (x);
+
+ /* MVN-shifted-reg. */
+ if (op0 != x)
+ {
+ *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed);
+
+ if (speed)
+ *cost += extra_cost->alu.log_shift;
+
+ return true;
+ }
+ /* EON can have two forms: (xor (not a) b) but also (not (xor a b)).
+ Handle the second form here taking care that 'a' in the above can
+ be a shift. */
+ else if (GET_CODE (op0) == XOR)
+ {
+ rtx newop0 = XEXP (op0, 0);
+ rtx newop1 = XEXP (op0, 1);
+ rtx op0_stripped = aarch64_strip_shift (newop0);
+
+ *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed)
+ + rtx_cost (op0_stripped, XOR, 0, speed);
+
+ if (speed)
+ {
+ if (op0_stripped != newop0)
+ *cost += extra_cost->alu.log_shift;
+ else
+ *cost += extra_cost->alu.logical;
+ }
+
+ return true;
+ }
/* MVN. */
if (speed)
*cost += extra_cost->alu.logical;
- /* The logical instruction could have the shifted register form,
- but the cost is the same if the shift is processed as a separate
- instruction, so we don't bother with it here. */
return false;
case ZERO_EXTEND:
@@ -5672,7 +5703,19 @@ cost_plus:
case ABS:
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
{
- /* FABS and FNEG are analogous. */
+ op0 = XEXP (x, 0);
+
+ /* FABD, which is analogous to FADD. */
+ if (GET_CODE (op0) == MINUS)
+ {
+ *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed);
+ + rtx_cost (XEXP (op0, 1), MINUS, 1, speed);
+ if (speed)
+ *cost += extra_cost->fp[mode == DFmode].addsub;
+
+ return true;
+ }
+ /* Simple FABS is analogous to FNEG. */
if (speed)
*cost += extra_cost->fp[mode == DFmode].neg;
}
@@ -5867,15 +5910,9 @@ aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
/* Statements in an inner loop relative to the loop being
vectorized are weighted more heavily. The value here is
- a function (linear for now) of the loop nest level. */
+ arbitrary and could potentially be improved with analysis. */
if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
- {
- loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
- struct loop *loop = LOOP_VINFO_LOOP (loop_info);
- unsigned nest_level = loop_depth (loop);
-
- count *= nest_level;
- }
+ count *= 50; /* FIXME */
retval = (unsigned) (count * stmt_cost);
cost[where] += retval;
diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md
index dc88f8b..fe68bfe 100644
--- a/gcc-4.9/gcc/config/aarch64/aarch64.md
+++ b/gcc-4.9/gcc/config/aarch64/aarch64.md
@@ -2571,6 +2571,32 @@
[(set_attr "type" "logics_shift_imm")]
)
+(define_insn "*eor_one_cmpl_<SHIFT:optab><mode>3_alt"
+ [(set (match_operand:GPI 0 "register_operand" "=r")
+ (not:GPI (xor:GPI
+ (SHIFT:GPI
+ (match_operand:GPI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_shift_imm_<mode>" "n"))
+ (match_operand:GPI 3 "register_operand" "r"))))]
+ ""
+ "eon\\t%<w>0, %<w>3, %<w>1, <SHIFT:shift> %2"
+ [(set_attr "type" "logic_shift_imm")]
+)
+
+;; Zero-extend version of the above.
+(define_insn "*eor_one_cmpl_<SHIFT:optab>sidi3_alt_ze"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (not:SI (xor:SI
+ (SHIFT:SI
+ (match_operand:SI 1 "register_operand" "r")
+ (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+ (match_operand:SI 3 "register_operand" "r")))))]
+ ""
+ "eon\\t%w0, %w3, %w1, <SHIFT:shift> %2"
+ [(set_attr "type" "logic_shift_imm")]
+)
+
(define_insn "*and_one_cmpl_<SHIFT:optab><mode>3_compare0"
[(set (reg:CC_NZ CC_REGNUM)
(compare:CC_NZ
@@ -2771,32 +2797,33 @@
;; Logical left shift using SISD or Integer instruction
(define_insn "*aarch64_ashl_sisd_or_int_<mode>3"
- [(set (match_operand:GPI 0 "register_operand" "=w,w,r")
+ [(set (match_operand:GPI 0 "register_operand" "=r,w,w")
(ashift:GPI
- (match_operand:GPI 1 "register_operand" "w,w,r")
- (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+ (match_operand:GPI 1 "register_operand" "r,w,w")
+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w")))]
""
"@
+ lsl\t%<w>0, %<w>1, %<w>2
shl\t%<rtn>0<vas>, %<rtn>1<vas>, %2
- ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
- lsl\t%<w>0, %<w>1, %<w>2"
- [(set_attr "simd" "yes,yes,no")
- (set_attr "type" "neon_shift_imm<q>, neon_shift_reg<q>,shift_reg")]
+ ushl\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>"
+ [(set_attr "simd" "no,yes,yes")
+ (set_attr "type" "shift_reg,neon_shift_imm<q>, neon_shift_reg<q>")]
)
;; Logical right shift using SISD or Integer instruction
(define_insn "*aarch64_lshr_sisd_or_int_<mode>3"
- [(set (match_operand:GPI 0 "register_operand" "=w,&w,r")
+ [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w")
(lshiftrt:GPI
- (match_operand:GPI 1 "register_operand" "w,w,r")
- (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "Us<cmode>,w,rUs<cmode>")))]
+ (match_operand:GPI 1 "register_operand" "r,w,w,w")
+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_<mode>" "rUs<cmode>,Us<cmode>,w,0")))]
""
"@
+ lsr\t%<w>0, %<w>1, %<w>2
ushr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
#
- lsr\t%<w>0, %<w>1, %<w>2"
- [(set_attr "simd" "yes,yes,no")
- (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,shift_reg")]
+ #"
+ [(set_attr "simd" "no,yes,yes,yes")
+ (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
)
(define_split
@@ -2831,18 +2858,18 @@
;; Arithmetic right shift using SISD or Integer instruction
(define_insn "*aarch64_ashr_sisd_or_int_<mode>3"
- [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r")
+ [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w")
(ashiftrt:GPI
- (match_operand:GPI 1 "register_operand" "w,w,w,r")
- (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us<cmode>,w,0,rUs<cmode>")))]
+ (match_operand:GPI 1 "register_operand" "r,w,w,w")
+ (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "rUs<cmode>,Us<cmode>,w,0")))]
""
"@
+ asr\t%<w>0, %<w>1, %<w>2
sshr\t%<rtn>0<vas>, %<rtn>1<vas>, %2
#
- #
- asr\t%<w>0, %<w>1, %<w>2"
- [(set_attr "simd" "yes,yes,yes,no")
- (set_attr "type" "neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>,shift_reg")]
+ #"
+ [(set_attr "simd" "no,yes,yes,yes")
+ (set_attr "type" "shift_reg,neon_shift_imm<q>,neon_shift_reg<q>,neon_shift_reg<q>")]
)
(define_split