From 331e362574142e4c1d9d509533d1c96b6dc54d13 Mon Sep 17 00:00:00 2001 From: Carrot Wei Date: Thu, 15 May 2014 10:06:33 +0800 Subject: [4.8, 4.9] Add simplify-got This pass optimize GOT_PREL (already exists in toolchain/gcc/gcc-4.6) Backport from svn://gcc.gnu.org/svn/gcc/branches/google/gcc-4_6-mobile UNSPEC_GOT_PREL_SYM is now in new file arm/unspecs.md 4.9 port is slightly different due to changes in gcc passes See Google b/14811006 r173209 | carrot | 2011-04-30 16:07:46 +0800 (Sat, 30 Apr 2011) | 21 lines * hooks.c (hook_rtx_void_null): New function. * hooks.h (hook_rtx_void_null): New prototype. * target.def (got_access): New hook vector declaration. * tree-pass.h (pass_simplify_got): New pass. * timevar.def (TV_SIMPLIFY_GOT): New TV id. * simplify-got.c: New source file. * Makefile.in (simplify-got.o): Add a new file. * passes.c (init_optimization_passes): Add a new pass. * config/arm/arm.c (arm_output_addr_const_extra): Output GOT_PREL relocation. (arm_get_pic_reg): New function. (arm_clear_pic_reg): New function. (arm_can_simplify_got_access): New function. (arm_loaded_global_var): New function. (arm_load_global_address): New function. * config/arm/arm.md (UNSPEC_GOT_PREL_SYM): New UNSPEC symbol. * testsuite/gcc.target/arm/got1.c: New testcase. * testsuite/gcc.target/arm/got2.c: New testcase. Change-Id: I91e881df19bb6937a5fbcc8e6b83d158717c7773 --- gcc-4.9/gcc/config/arm/arm.c | 222 ++++++++++++++++++++++++++++++++++++++ gcc-4.9/gcc/config/arm/unspecs.md | 1 + 2 files changed, 223 insertions(+) (limited to 'gcc-4.9/gcc/config') diff --git a/gcc-4.9/gcc/config/arm/arm.c b/gcc-4.9/gcc/config/arm/arm.c index 17d030490..83763555c 100644 --- a/gcc-4.9/gcc/config/arm/arm.c +++ b/gcc-4.9/gcc/config/arm/arm.c @@ -286,7 +286,15 @@ static unsigned arm_add_stmt_cost (void *data, int count, static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); + static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); + +static rtx arm_get_pic_reg (void); +static void arm_clear_pic_reg (void); +static bool arm_can_simplify_got_access (int, int); +static rtx arm_loaded_global_var (rtx, rtx *, rtx *); +static void arm_load_global_address (rtx, rtx, rtx, rtx, rtx); + /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -662,6 +670,21 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_VECTORIZE_ADD_STMT_COST #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost +#undef TARGET_GET_PIC_REG +#define TARGET_GET_PIC_REG arm_get_pic_reg + +#undef TARGET_CLEAR_PIC_REG +#define TARGET_CLEAR_PIC_REG arm_clear_pic_reg + +#undef TARGET_LOADED_GLOBAL_VAR +#define TARGET_LOADED_GLOBAL_VAR arm_loaded_global_var + +#undef TARGET_CAN_SIMPLIFY_GOT_ACCESS +#define TARGET_CAN_SIMPLIFY_GOT_ACCESS arm_can_simplify_got_access + +#undef TARGET_LOAD_GLOBAL_ADDRESS +#define TARGET_LOAD_GLOBAL_ADDRESS arm_load_global_address + #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON \ arm_canonicalize_comparison @@ -29208,6 +29231,14 @@ arm_output_addr_const_extra (FILE *fp, rtx x) fputc (')', fp); return TRUE; } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOT_PREL_SYM) + { + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputs ("(GOT_PREL)+(", fp); + output_addr_const (fp, XVECEXP (x, 0, 1)); + fputc (')', fp); + return TRUE; + } else if (GET_CODE (x) == CONST_VECTOR) return arm_emit_vector_const (fp, x); @@ -31166,4 +31197,195 @@ arm_const_not_ok_for_debug_p (rtx p) return false; } +rtx +arm_get_pic_reg (void) +{ + return cfun->machine->pic_reg; +} + +/* Clear the pic_reg to NULL. */ +void +arm_clear_pic_reg (void) +{ + cfun->machine->pic_reg = NULL_RTX; +} + +/* Determine if it is profitable to simplify GOT accesses. + + The default global address loading instructions are: + + ldr r3, .L2 # A + ldr r2, .L2+4 # B +.LPIC0: + add r3, pc # A + ldr r4, [r3, r2] # B + ... +.L2: + .word _GLOBAL_OFFSET_TABLE_-(.LPIC0+4) # A + .word i(GOT) # S + + The new instruction sequence is: + + ldr r3, .L2 # C +.LPIC0: + add r3, pc # C + ldr r3, [r3] # C + ... +.L2: + i(GOT_PREL)+(.-(.LPIC0+4)) # C + + Suppose the number of global address loading is n, the number of + accessed global symbol is s, this function should return + + cost(A) + cost(B) * n + cost(S) * s >= cost(C) * n + + From the above code snippets, we can see that + + cost(A) = INSN_LENGTH * 2 + WORD_LENGTH + cost(B) = INSN_LENGTH * 2 + cost(S) = WORD_LENGTH + cost(C) = INSN_LENGTH * 3 + WORD_LENGTH + + The length of instruction depends on the target instruction set. */ + +#define N_INSNS_A 2 +#define N_INSNS_B 2 +#define N_INSNS_C 3 + +bool +arm_can_simplify_got_access (int n_symbol, int n_access) +{ + int insn_len = TARGET_THUMB ? 2 : 4; + int cost_A = insn_len * N_INSNS_A + UNITS_PER_WORD; + int cost_B = insn_len * N_INSNS_B; + int cost_S = UNITS_PER_WORD; + int cost_C = insn_len * N_INSNS_C + UNITS_PER_WORD; + + return cost_A + cost_B * n_access + cost_S * n_symbol >= cost_C * n_access; +} + +/* Detect if INSN loads a global address. If so returns the symbol. + If the GOT offset is loaded in a separate instruction, sets the + corresponding OFFSET_REG and OFFSET_INSN. Otherwise fills with NULL. */ +rtx +arm_loaded_global_var (rtx insn, rtx *offset_reg, rtx *offset_insn) +{ + rtx set = single_set (insn); + rtx pic_reg = cfun->machine->pic_reg; + gcc_assert (pic_reg); + + /* Global address loading instruction has the pattern: + (SET address_reg (MEM (PLUS pic_reg offset_reg))) */ + if (set && MEM_P (SET_SRC (set)) + && (GET_CODE (XEXP (SET_SRC (set),0)) == PLUS)) + { + unsigned int regno; + df_ref def; + rtx def_insn; + rtx src; + rtx plus = XEXP (SET_SRC (set),0); + rtx op0 = XEXP (plus, 0); + rtx op1 = XEXP (plus, 1); + if (op1 == pic_reg) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + if (op0 != pic_reg) + return NULL_RTX; + + if (REG_P (op1)) + { + regno = REGNO (op1); + if ((DF_REG_USE_COUNT (regno) != 1) + || (DF_REG_DEF_COUNT (regno) != 1)) + return NULL_RTX; + + /* The offset loading insn has the pattern: + (SET offset_reg (UNSPEC [symbol] UNSPEC_PIC_SYM)) */ + def = DF_REG_DEF_CHAIN (regno); + def_insn = DF_REF_INSN (def); + set = single_set (def_insn); + if (SET_DEST (set) != op1) + return NULL_RTX; + + src = SET_SRC (set); + *offset_reg = op1; + *offset_insn = def_insn; + } + else + { + src = op1; + *offset_reg = NULL; + *offset_insn = NULL; + } + + if ((GET_CODE (src) != UNSPEC) || (XINT (src, 1) != UNSPEC_PIC_SYM)) + return NULL_RTX; + + return RTVEC_ELT (XVEC (src, 0), 0); + } + + return NULL_RTX; +} + +/* Rewrite the global address loading instructions. + SYMBOL is the global variable. OFFSET_REG contains the offset of the + GOT entry. ADDRESS_REG will receive the final global address. + LOAD_INSN is the original insn which loads the address from GOT. + OFFSET_INSN is the original insn which sets OFFSET_REG. + If the GOT offset is not loaded in a separate instruction, OFFSET_REG + and OFFSET_INSN should be NULL. */ +void +arm_load_global_address (rtx symbol, rtx offset_reg, + rtx address_reg, rtx load_insn, rtx offset_insn) +{ + rtx offset, got_prel, new_insn; + rtx labelno = GEN_INT (pic_labelno++); + rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + rtx set = single_set (load_insn); + + rtx tmp_reg = offset_reg; + rtx insert_pos = offset_insn; + if (offset_reg == NULL) + { + tmp_reg = address_reg; + insert_pos = PREV_INSN (load_insn); + } + + /* The first insn: + (SET tmp_reg (address_of_GOT_entry(symbol) - pc)) + The expression (address_of_GOT_entry(symbol) - pc) is expressed by + got_prel, which is actually represented by R_ARM_GOT_PREL relocation. */ + l1 = gen_rtx_CONST (VOIDmode, l1); + l1 = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); + offset = gen_rtx_MINUS (VOIDmode, pc_rtx, l1); + got_prel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, offset), + UNSPEC_GOT_PREL_SYM); + got_prel = gen_rtx_CONST (Pmode, got_prel); + if (TARGET_32BIT) + new_insn = emit_insn_after (gen_pic_load_addr_32bit (tmp_reg, got_prel), + insert_pos); + else + new_insn = emit_insn_after (gen_pic_load_addr_thumb1 (tmp_reg, got_prel), + insert_pos); + + /* The second insn: + (SET tmp_reg (PLUS tmp_reg pc_rtx)) */ + if (TARGET_ARM) + emit_insn_after (gen_pic_add_dot_plus_eight (tmp_reg, tmp_reg, labelno), + new_insn); + else + emit_insn_after (gen_pic_add_dot_plus_four (tmp_reg, tmp_reg, labelno), + new_insn); + + /* The last insn to access the GOT entry: + (SET address_reg (MEM tmp_reg)) + We reuse the existed load instruction. */ + XEXP (SET_SRC (set), 0) = tmp_reg; + df_insn_rescan (load_insn); +} + #include "gt-arm.h" diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md index 8caa953bc..87edf18e2 100644 --- a/gcc-4.9/gcc/config/arm/unspecs.md +++ b/gcc-4.9/gcc/config/arm/unspecs.md @@ -83,6 +83,7 @@ ; FPSCR rounding mode and signal inexactness. UNSPEC_VRINTA ; Represent a float to integral float rounding ; towards nearest, ties away from zero. + UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol ]) (define_c_enum "unspec" [ -- cgit v1.2.3