From c16bf8984b9f89eeb79abd9a3789151f5c98d02f Mon Sep 17 00:00:00 2001 From: Rong Xu Date: Mon, 13 Oct 2014 15:06:37 -0700 Subject: [gcc-4.9] fix for for cortex-a53 erratum 835769 This patch cherry picks r216116 from google/gcc-4_9 branch (which is r216077 and r216079 from upstream gcc-4_9 branch). Tested the built compiler with aarch64 devicde. Change-Id: I75745fe35aae83bc3b64135a2efc7f8ffd9cfb1f --- gcc-4.9/gcc/config/aarch64/aarch64-protos.h | 2 + gcc-4.9/gcc/config/aarch64/aarch64.c | 131 ++++++++++++++++++++++++++++ gcc-4.9/gcc/config/aarch64/aarch64.h | 9 ++ gcc-4.9/gcc/config/aarch64/aarch64.opt | 4 + 4 files changed, 146 insertions(+) (limited to 'gcc-4.9/gcc/config/aarch64') diff --git a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h index 5542f023b..bef58bf71 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64-protos.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64-protos.h @@ -287,6 +287,8 @@ aarch64_builtin_vectorized_function (tree fndecl, extern void aarch64_split_combinev16qi (rtx operands[3]); extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel); +extern bool aarch64_madd_needs_nop (rtx); +extern void aarch64_final_prescan_insn (rtx); extern bool aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel); #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.c b/gcc-4.9/gcc/config/aarch64/aarch64.c index 07430a48d..34986d7c6 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.c +++ b/gcc-4.9/gcc/config/aarch64/aarch64.c @@ -5256,6 +5256,15 @@ aarch64_override_options (void) aarch64_tune = selected_tune->core; aarch64_tune_params = selected_tune->tune; + if (aarch64_fix_a53_err835769 == 2) + { +#ifdef TARGET_FIX_ERR_A53_835769_DEFAULT + aarch64_fix_a53_err835769 = 1; +#else + aarch64_fix_a53_err835769 = 0; +#endif + } + aarch64_override_options_after_change (); if (TARGET_ANDROID) @@ -6466,6 +6475,128 @@ aarch64_mangle_type (const_tree type) return NULL; } +static int +is_mem_p (rtx *x, void *data ATTRIBUTE_UNUSED) +{ + return MEM_P (*x); +} + +static bool +is_memory_op (rtx mem_insn) +{ + rtx pattern = PATTERN (mem_insn); + return for_each_rtx (&pattern, is_mem_p, NULL); +} + +/* Find the first rtx before insn that will generate an assembly + instruction. */ + +static rtx +aarch64_prev_real_insn (rtx insn) +{ + if (!insn) + return NULL; + + do + { + insn = prev_real_insn (insn); + } + while (insn && recog_memoized (insn) < 0); + + return insn; +} + +static bool +is_madd_op (enum attr_type t1) +{ + unsigned int i; + /* A number of these may be AArch32 only. */ + enum attr_type mlatypes[] = { + TYPE_MLA, TYPE_MLAS, TYPE_SMLAD, TYPE_SMLADX, TYPE_SMLAL, TYPE_SMLALD, + TYPE_SMLALS, TYPE_SMLALXY, TYPE_SMLAWX, TYPE_SMLAWY, TYPE_SMLAXY, + TYPE_SMMLA, TYPE_UMLAL, TYPE_UMLALS,TYPE_SMLSD, TYPE_SMLSDX, TYPE_SMLSLD + }; + + for (i = 0; i < sizeof (mlatypes) / sizeof (enum attr_type); i++) + { + if (t1 == mlatypes[i]) + return true; + } + + return false; +} + +/* Check if there is a register dependency between a load and the insn + for which we hold recog_data. */ + +static bool +dep_between_memop_and_curr (rtx memop) +{ + rtx load_reg; + int opno; + + if (!memop) + return false; + + if (!REG_P (SET_DEST (memop))) + return false; + + load_reg = SET_DEST (memop); + for (opno = 0; opno < recog_data.n_operands; opno++) + { + rtx operand = recog_data.operand[opno]; + if (REG_P (operand) + && reg_overlap_mentioned_p (load_reg, operand)) + return true; + + } + return false; +} + +bool +aarch64_madd_needs_nop (rtx insn) +{ + enum attr_type attr_type; + rtx prev; + rtx body; + + if (!aarch64_fix_a53_err835769) + return false; + + if (recog_memoized (insn) < 0) + return false; + + attr_type = get_attr_type (insn); + if (!is_madd_op (attr_type)) + return false; + + prev = aarch64_prev_real_insn (insn); + if (!prev) + return false; + + body = single_set (prev); + + /* If the previous insn is a memory op and there is no dependency between + it and the madd, emit a nop between them. If we know the previous insn is + a memory op but body is NULL, emit the nop to be safe, it's probably a + load/store pair insn. */ + if (is_memory_op (prev) + && GET_MODE (recog_data.operand[0]) == DImode + && (!dep_between_memop_and_curr (body))) + return true; + + return false; + +} + +void +aarch64_final_prescan_insn (rtx insn) +{ + if (aarch64_madd_needs_nop (insn)) + fprintf (asm_out_file, "\tnop // between mem op and mult-accumulate\n"); +} + + /* Return the equivalent letter for size. */ static char sizetochar (int size) diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.h b/gcc-4.9/gcc/config/aarch64/aarch64.h index 2fd6df4af..77b2bb9b4 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.h +++ b/gcc-4.9/gcc/config/aarch64/aarch64.h @@ -481,6 +481,15 @@ enum target_cpus (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << 6)) #endif +/* If inserting NOP before a mult-accumulate insn remember to adjust the + length so that conditional branching code is updated appropriately. */ +#define ADJUST_INSN_LENGTH(insn, length) \ + if (aarch64_madd_needs_nop (insn)) \ + length += 4; + +#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \ + aarch64_final_prescan_insn (INSN); \ + /* The processor for which instructions should be scheduled. */ extern enum aarch64_processor aarch64_tune; diff --git a/gcc-4.9/gcc/config/aarch64/aarch64.opt b/gcc-4.9/gcc/config/aarch64/aarch64.opt index f5a15b729..fc0307e28 100644 --- a/gcc-4.9/gcc/config/aarch64/aarch64.opt +++ b/gcc-4.9/gcc/config/aarch64/aarch64.opt @@ -67,6 +67,10 @@ mgeneral-regs-only Target Report RejectNegative Mask(GENERAL_REGS_ONLY) Generate code which uses only the general registers +mfix-cortex-a53-835769 +Target Report Var(aarch64_fix_a53_err835769) Init(2) +Workaround for ARM Cortex-A53 Erratum number 835769 + mlittle-endian Target Report RejectNegative InverseMask(BIG_END) Assume target CPU is configured as little endian -- cgit v1.2.3