From a981e4500a6364a7af778544cbcbc22bffc4cb58 Mon Sep 17 00:00:00 2001 From: ktkachov Date: Thu, 30 Apr 2015 16:59:50 +0000 Subject: [4.9] Add several improvements for AArch64 Backported from GCC 5. 2015-04-30 Kyrylo Tkachov Properly handle mvn-register and add EON+shift pattern and cost appropriately * config/aarch64/aarch64.md (*eor_one_cmpl_3_alt): New pattern. (*eor_one_cmpl_sidi3_alt_ze): Likewise. * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle MVN-shift appropriately. Handle alternative EON form. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222637 138bc75d-0d04-0410-961f-82ee72b054a4 2015-04-30 Kyrylo Tkachov Properly cost FABD pattern * config/aarch64/aarch64.c (aarch64_rtx_costs): Handle pattern for fabd in ABS case. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222638 138bc75d-0d04-0410-961f-82ee72b054a4 2015-05-01 Kyrylo Tkachov Remember to cost operand 0 in FP compare-with-0.0 case * config/aarch64/aarch64.c (aarch64_rtx_costs, COMPARE case): Add cost of op0 in the compare-with-fpzero case. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222673 138bc75d-0d04-0410-961f-82ee72b054a4 2015-05-01 Wilco Dijkstra Fix aarch64_rtx_costs of PLUS/MINUS * gcc/config/aarch64/aarch64.c (aarch64_rtx_costs): Calculate cost of op0 and op1 in PLUS and MINUS cases. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@222676 138bc75d-0d04-0410-961f-82ee72b054a4 2015-07-27 Wilco Dijkstra Improve spill code - swap order in shl pattern * config/aarch64/aarch64.md (aarch64_ashl_sisd_or_int_3): Place integer variant first. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226247 138bc75d-0d04-0410-961f-82ee72b054a4 2015-07-27 Wilco Dijkstra Improve spill code - swap order in shr patterns * gcc/config/aarch64/aarch64.md (aarch64_lshr_sisd_or_int_3): Place integer variant first. (aarch64_ashr_sisd_or_int_3): Likewise. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226253 138bc75d-0d04-0410-961f-82ee72b054a4 2015-08-04 Pawel Kupidura * config/aarch64/aarch64.c: Change inner loop statement cost to be consistent with other targets. Change-Id: If5b10466302d733fcae6eacc9d128fdb8f95c0de git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@226575 138bc75d-0d04-0410-961f-82ee72b054a4 --- gcc-4.9/gcc/config/aarch64/aarch64.c | 75 ++++++++++++++++++++++++++--------- gcc-4.9/gcc/config/aarch64/aarch64.md | 67 +++++++++++++++++++++---------- 2 files changed, 103 insertions(+), 39 deletions(-) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 8006784f6..09f2d2c5d 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -5120,6 +5120,7 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, if (CONST_DOUBLE_P (op1) && aarch64_float_const_zero_rtx_p (op1)) { + *cost += rtx_cost (op0, COMPARE, 0, speed); /* FCMP supports constant 0.0 for no extra cost. */ return true; } @@ -5134,6 +5135,8 @@ aarch64_rtx_costs (rtx x, int code, int outer ATTRIBUTE_UNUSED, op1 = XEXP (x, 1); cost_minus: + *cost += rtx_cost (op0, MINUS, 0, speed); + /* Detect valid immediates. */ if ((GET_MODE_CLASS (mode) == MODE_INT || (GET_MODE_CLASS (mode) == MODE_CC @@ -5141,13 +5144,10 @@ cost_minus: && CONST_INT_P (op1) && aarch64_uimm12_shift (INTVAL (op1))) { - *cost += rtx_cost (op0, MINUS, 0, speed); - if (speed) /* SUB(S) (immediate). */ *cost += extra_cost->alu.arith; return true; - } /* Look for SUB (extended register). */ @@ -5172,7 +5172,6 @@ cost_minus: *cost += aarch64_rtx_mult_cost (new_op1, MULT, (enum rtx_code) code, speed); - *cost += rtx_cost (op0, MINUS, 0, speed); return true; } @@ -5219,6 +5218,8 @@ cost_plus: return true; } + *cost += rtx_cost (op1, PLUS, 1, speed); + /* Look for ADD (extended register). */ if (aarch64_rtx_arith_op_extract_p (op0, mode)) { @@ -5240,12 +5241,10 @@ cost_plus: { *cost += aarch64_rtx_mult_cost (new_op0, MULT, PLUS, speed); - *cost += rtx_cost (op1, PLUS, 1, speed); return true; } - *cost += (rtx_cost (new_op0, PLUS, 0, speed) - + rtx_cost (op1, PLUS, 1, speed)); + *cost += rtx_cost (new_op0, PLUS, 0, speed); if (speed) { @@ -5331,13 +5330,45 @@ cost_plus: return false; case NOT: + x = XEXP (x, 0); + op0 = aarch64_strip_shift (x); + + /* MVN-shifted-reg. */ + if (op0 != x) + { + *cost += rtx_cost (op0, (enum rtx_code) code, 0, speed); + + if (speed) + *cost += extra_cost->alu.log_shift; + + return true; + } + /* EON can have two forms: (xor (not a) b) but also (not (xor a b)). + Handle the second form here taking care that 'a' in the above can + be a shift. */ + else if (GET_CODE (op0) == XOR) + { + rtx newop0 = XEXP (op0, 0); + rtx newop1 = XEXP (op0, 1); + rtx op0_stripped = aarch64_strip_shift (newop0); + + *cost += rtx_cost (newop1, (enum rtx_code) code, 1, speed) + + rtx_cost (op0_stripped, XOR, 0, speed); + + if (speed) + { + if (op0_stripped != newop0) + *cost += extra_cost->alu.log_shift; + else + *cost += extra_cost->alu.logical; + } + + return true; + } /* MVN. */ if (speed) *cost += extra_cost->alu.logical; - /* The logical instruction could have the shifted register form, - but the cost is the same if the shift is processed as a separate - instruction, so we don't bother with it here. */ return false; case ZERO_EXTEND: @@ -5672,7 +5703,19 @@ cost_plus: case ABS: if (GET_MODE_CLASS (mode) == MODE_FLOAT) { - /* FABS and FNEG are analogous. */ + op0 = XEXP (x, 0); + + /* FABD, which is analogous to FADD. */ + if (GET_CODE (op0) == MINUS) + { + *cost += rtx_cost (XEXP (op0, 0), MINUS, 0, speed); + + rtx_cost (XEXP (op0, 1), MINUS, 1, speed); + if (speed) + *cost += extra_cost->fp[mode == DFmode].addsub; + + return true; + } + /* Simple FABS is analogous to FNEG. */ if (speed) *cost += extra_cost->fp[mode == DFmode].neg; } @@ -5867,15 +5910,9 @@ aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, /* Statements in an inner loop relative to the loop being vectorized are weighted more heavily. The value here is - a function (linear for now) of the loop nest level. */ + arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) - { - loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); - struct loop *loop = LOOP_VINFO_LOOP (loop_info); - unsigned nest_level = loop_depth (loop); - - count *= nest_level; - } + count *= 50; /* FIXME */ retval = (unsigned) (count * stmt_cost); cost[where] += retval; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.md b/gcc-4.9/gcc/config/aarch64/aarch64.md index dc88f8b10..fe68bfea1 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.md +++ b/gcc-4.9/gcc/config/aarch64/aarch64.md @@ -2571,6 +2571,32 @@ [(set_attr "type" "logics_shift_imm")] ) +(define_insn "*eor_one_cmpl_3_alt" + [(set (match_operand:GPI 0 "register_operand" "=r") + (not:GPI (xor:GPI + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_" "n")) + (match_operand:GPI 3 "register_operand" "r"))))] + "" + "eon\\t%0, %3, %1, %2" + [(set_attr "type" "logic_shift_imm")] +) + +;; Zero-extend version of the above. +(define_insn "*eor_one_cmpl_sidi3_alt_ze" + [(set (match_operand:DI 0 "register_operand" "=r") + (zero_extend:DI + (not:SI (xor:SI + (SHIFT:SI + (match_operand:SI 1 "register_operand" "r") + (match_operand:QI 2 "aarch64_shift_imm_si" "n")) + (match_operand:SI 3 "register_operand" "r")))))] + "" + "eon\\t%w0, %w3, %w1, %2" + [(set_attr "type" "logic_shift_imm")] +) + (define_insn "*and_one_cmpl_3_compare0" [(set (reg:CC_NZ CC_REGNUM) (compare:CC_NZ @@ -2771,32 +2797,33 @@ ;; Logical left shift using SISD or Integer instruction (define_insn "*aarch64_ashl_sisd_or_int_3" - [(set (match_operand:GPI 0 "register_operand" "=w,w,r") + [(set (match_operand:GPI 0 "register_operand" "=r,w,w") (ashift:GPI - (match_operand:GPI 1 "register_operand" "w,w,r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] + (match_operand:GPI 1 "register_operand" "r,w,w") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs,Us,w")))] "" "@ + lsl\t%0, %1, %2 shl\t%0, %1, %2 - ushl\t%0, %1, %2 - lsl\t%0, %1, %2" - [(set_attr "simd" "yes,yes,no") - (set_attr "type" "neon_shift_imm, neon_shift_reg,shift_reg")] + ushl\t%0, %1, %2" + [(set_attr "simd" "no,yes,yes") + (set_attr "type" "shift_reg,neon_shift_imm, neon_shift_reg")] ) ;; Logical right shift using SISD or Integer instruction (define_insn "*aarch64_lshr_sisd_or_int_3" - [(set (match_operand:GPI 0 "register_operand" "=w,&w,r") + [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w") (lshiftrt:GPI - (match_operand:GPI 1 "register_operand" "w,w,r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "Us,w,rUs")))] + (match_operand:GPI 1 "register_operand" "r,w,w,w") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_" "rUs,Us,w,0")))] "" "@ + lsr\t%0, %1, %2 ushr\t%0, %1, %2 # - lsr\t%0, %1, %2" - [(set_attr "simd" "yes,yes,no") - (set_attr "type" "neon_shift_imm,neon_shift_reg,shift_reg")] + #" + [(set_attr "simd" "no,yes,yes,yes") + (set_attr "type" "shift_reg,neon_shift_imm,neon_shift_reg,neon_shift_reg")] ) (define_split @@ -2831,18 +2858,18 @@ ;; Arithmetic right shift using SISD or Integer instruction (define_insn "*aarch64_ashr_sisd_or_int_3" - [(set (match_operand:GPI 0 "register_operand" "=w,&w,&w,r") + [(set (match_operand:GPI 0 "register_operand" "=r,w,&w,&w") (ashiftrt:GPI - (match_operand:GPI 1 "register_operand" "w,w,w,r") - (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "Us,w,0,rUs")))] + (match_operand:GPI 1 "register_operand" "r,w,w,w") + (match_operand:QI 2 "aarch64_reg_or_shift_imm_di" "rUs,Us,w,0")))] "" "@ + asr\t%0, %1, %2 sshr\t%0, %1, %2 # - # - asr\t%0, %1, %2" - [(set_attr "simd" "yes,yes,yes,no") - (set_attr "type" "neon_shift_imm,neon_shift_reg,neon_shift_reg,shift_reg")] + #" + [(set_attr "simd" "no,yes,yes,yes") + (set_attr "type" "shift_reg,neon_shift_imm,neon_shift_reg,neon_shift_reg")] ) (define_split -- cgit v1.2.3