From 331e362574142e4c1d9d509533d1c96b6dc54d13 Mon Sep 17 00:00:00 2001 From: Carrot Wei Date: Thu, 15 May 2014 10:06:33 +0800 Subject: [4.8, 4.9] Add simplify-got This pass optimize GOT_PREL (already exists in toolchain/gcc/gcc-4.6) Backport from svn://gcc.gnu.org/svn/gcc/branches/google/gcc-4_6-mobile UNSPEC_GOT_PREL_SYM is now in new file arm/unspecs.md 4.9 port is slightly different due to changes in gcc passes See Google b/14811006 r173209 | carrot | 2011-04-30 16:07:46 +0800 (Sat, 30 Apr 2011) | 21 lines * hooks.c (hook_rtx_void_null): New function. * hooks.h (hook_rtx_void_null): New prototype. * target.def (got_access): New hook vector declaration. * tree-pass.h (pass_simplify_got): New pass. * timevar.def (TV_SIMPLIFY_GOT): New TV id. * simplify-got.c: New source file. * Makefile.in (simplify-got.o): Add a new file. * passes.c (init_optimization_passes): Add a new pass. * config/arm/arm.c (arm_output_addr_const_extra): Output GOT_PREL relocation. (arm_get_pic_reg): New function. (arm_clear_pic_reg): New function. (arm_can_simplify_got_access): New function. (arm_loaded_global_var): New function. (arm_load_global_address): New function. * config/arm/arm.md (UNSPEC_GOT_PREL_SYM): New UNSPEC symbol. * testsuite/gcc.target/arm/got1.c: New testcase. * testsuite/gcc.target/arm/got2.c: New testcase. Change-Id: I91e881df19bb6937a5fbcc8e6b83d158717c7773 --- gcc-4.8/gcc/Makefile.in | 3 + gcc-4.8/gcc/config/arm/arm.c | 220 +++++++++++++++++++++++++++ gcc-4.8/gcc/config/arm/unspecs.md | 1 + gcc-4.8/gcc/doc/tm.texi | 37 +++++ gcc-4.8/gcc/doc/tm.texi.in | 37 +++++ gcc-4.8/gcc/hooks.c | 7 + gcc-4.8/gcc/hooks.h | 1 + gcc-4.8/gcc/passes.c | 1 + gcc-4.8/gcc/simplify-got.c | 197 ++++++++++++++++++++++++ gcc-4.8/gcc/target.def | 45 ++++++ gcc-4.8/gcc/testsuite/gcc.target/arm/got1.c | 10 ++ gcc-4.8/gcc/testsuite/gcc.target/arm/got2.c | 11 ++ gcc-4.8/gcc/timevar.def | 1 + gcc-4.8/gcc/tree-pass.h | 1 + gcc-4.9/gcc/Makefile.in | 1 + gcc-4.9/gcc/config/arm/arm.c | 222 ++++++++++++++++++++++++++++ gcc-4.9/gcc/config/arm/unspecs.md | 1 + gcc-4.9/gcc/doc/tm.texi | 37 +++++ gcc-4.9/gcc/doc/tm.texi.in | 37 +++++ gcc-4.9/gcc/hooks.c | 7 + gcc-4.9/gcc/hooks.h | 1 + gcc-4.9/gcc/passes.def | 1 + gcc-4.9/gcc/simplify-got.c | 216 +++++++++++++++++++++++++++ gcc-4.9/gcc/target.def | 45 ++++++ gcc-4.9/gcc/testsuite/gcc.target/arm/got1.c | 10 ++ gcc-4.9/gcc/testsuite/gcc.target/arm/got2.c | 11 ++ gcc-4.9/gcc/timevar.def | 1 + gcc-4.9/gcc/tree-pass.h | 1 + 28 files changed, 1163 insertions(+) create mode 100644 gcc-4.8/gcc/simplify-got.c create mode 100644 gcc-4.8/gcc/testsuite/gcc.target/arm/got1.c create mode 100644 gcc-4.8/gcc/testsuite/gcc.target/arm/got2.c create mode 100644 gcc-4.9/gcc/simplify-got.c create mode 100644 gcc-4.9/gcc/testsuite/gcc.target/arm/got1.c create mode 100644 gcc-4.9/gcc/testsuite/gcc.target/arm/got2.c diff --git a/gcc-4.8/gcc/Makefile.in b/gcc-4.8/gcc/Makefile.in index 045de7594..25532a1a9 100644 --- a/gcc-4.8/gcc/Makefile.in +++ b/gcc-4.8/gcc/Makefile.in @@ -1345,6 +1345,7 @@ OBJS = \ sel-sched-dump.o \ sel-sched.o \ sese.o \ + simplify-got.o \ simplify-rtx.o \ sparseset.o \ sreal.o \ @@ -2854,6 +2855,8 @@ jump.o : jump.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) \ $(EXCEPT_H) $(FUNCTION_H) $(BASIC_BLOCK_H) $(TREE_PASS_H) \ $(DIAGNOSTIC_CORE_H) $(DIAGNOSTIC_CORE_H) $(INSN_ATTR_H) $(TM_P_H) reload.h \ $(PREDICT_H) $(TARGET_H) +simplify-got.o : simplify-got.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ + $(RTL_H) $(FLAGS_H) $(TREE_PASS_H) $(TIMEVAR_H) $(TARGET_H) $(HASHTAB_H) simplify-rtx.o : simplify-rtx.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \ $(RTL_H) $(REGS_H) hard-reg-set.h $(FLAGS_H) insn-config.h \ $(RECOG_H) $(EXPR_H) $(DIAGNOSTIC_CORE_H) $(FUNCTION_H) $(GGC_H) $(TM_P_H) \ diff --git a/gcc-4.8/gcc/config/arm/arm.c b/gcc-4.8/gcc/config/arm/arm.c index 8dd5107ed..e8d83e0f3 100644 --- a/gcc-4.8/gcc/config/arm/arm.c +++ b/gcc-4.8/gcc/config/arm/arm.c @@ -280,6 +280,12 @@ static unsigned arm_add_stmt_cost (void *data, int count, static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); +static rtx arm_get_pic_reg (void); +static void arm_clear_pic_reg (void); +static bool arm_can_simplify_got_access (int, int); +static rtx arm_loaded_global_var (rtx, rtx *, rtx *); +static void arm_load_global_address (rtx, rtx, rtx, rtx, rtx); + /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = @@ -645,6 +651,21 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_VECTORIZE_ADD_STMT_COST #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost +#undef TARGET_GET_PIC_REG +#define TARGET_GET_PIC_REG arm_get_pic_reg + +#undef TARGET_CLEAR_PIC_REG +#define TARGET_CLEAR_PIC_REG arm_clear_pic_reg + +#undef TARGET_LOADED_GLOBAL_VAR +#define TARGET_LOADED_GLOBAL_VAR arm_loaded_global_var + +#undef TARGET_CAN_SIMPLIFY_GOT_ACCESS +#define TARGET_CAN_SIMPLIFY_GOT_ACCESS arm_can_simplify_got_access + +#undef TARGET_LOAD_GLOBAL_ADDRESS +#define TARGET_LOAD_GLOBAL_ADDRESS arm_load_global_address + #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON \ arm_canonicalize_comparison @@ -25682,6 +25703,14 @@ arm_output_addr_const_extra (FILE *fp, rtx x) fputc (')', fp); return TRUE; } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOT_PREL_SYM) + { + output_addr_const (fp, XVECEXP (x, 0, 0)); + fputs ("(GOT_PREL)+(", fp); + output_addr_const (fp, XVECEXP (x, 0, 1)); + fputc (')', fp); + return TRUE; + } else if (GET_CODE (x) == CONST_VECTOR) return arm_emit_vector_const (fp, x); @@ -27463,4 +27492,195 @@ arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2) } +rtx +arm_get_pic_reg (void) +{ + return cfun->machine->pic_reg; +} + +/* Clear the pic_reg to NULL. */ +void +arm_clear_pic_reg (void) +{ + cfun->machine->pic_reg = NULL_RTX; +} + +/* Determine if it is profitable to simplify GOT accesses. + + The default global address loading instructions are: + + ldr r3, .L2 # A + ldr r2, .L2+4 # B +.LPIC0: + add r3, pc # A + ldr r4, [r3, r2] # B + ... +.L2: + .word _GLOBAL_OFFSET_TABLE_-(.LPIC0+4) # A + .word i(GOT) # S + + The new instruction sequence is: + + ldr r3, .L2 # C +.LPIC0: + add r3, pc # C + ldr r3, [r3] # C + ... +.L2: + i(GOT_PREL)+(.-(.LPIC0+4)) # C + + Suppose the number of global address loading is n, the number of + accessed global symbol is s, this function should return + + cost(A) + cost(B) * n + cost(S) * s >= cost(C) * n + + From the above code snippets, we can see that + + cost(A) = INSN_LENGTH * 2 + WORD_LENGTH + cost(B) = INSN_LENGTH * 2 + cost(S) = WORD_LENGTH + cost(C) = INSN_LENGTH * 3 + WORD_LENGTH + + The length of instruction depends on the target instruction set. */ + +#define N_INSNS_A 2 +#define N_INSNS_B 2 +#define N_INSNS_C 3 + +bool +arm_can_simplify_got_access (int n_symbol, int n_access) +{ + int insn_len = TARGET_THUMB ? 2 : 4; + int cost_A = insn_len * N_INSNS_A + UNITS_PER_WORD; + int cost_B = insn_len * N_INSNS_B; + int cost_S = UNITS_PER_WORD; + int cost_C = insn_len * N_INSNS_C + UNITS_PER_WORD; + + return cost_A + cost_B * n_access + cost_S * n_symbol >= cost_C * n_access; +} + +/* Detect if INSN loads a global address. If so returns the symbol. + If the GOT offset is loaded in a separate instruction, sets the + corresponding OFFSET_REG and OFFSET_INSN. Otherwise fills with NULL. */ +rtx +arm_loaded_global_var (rtx insn, rtx *offset_reg, rtx *offset_insn) +{ + rtx set = single_set (insn); + rtx pic_reg = cfun->machine->pic_reg; + gcc_assert (pic_reg); + + /* Global address loading instruction has the pattern: + (SET address_reg (MEM (PLUS pic_reg offset_reg))) */ + if (set && MEM_P (SET_SRC (set)) + && (GET_CODE (XEXP (SET_SRC (set),0)) == PLUS)) + { + unsigned int regno; + df_ref def; + rtx def_insn; + rtx src; + rtx plus = XEXP (SET_SRC (set),0); + rtx op0 = XEXP (plus, 0); + rtx op1 = XEXP (plus, 1); + if (op1 == pic_reg) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + if (op0 != pic_reg) + return NULL_RTX; + + if (REG_P (op1)) + { + regno = REGNO (op1); + if ((DF_REG_USE_COUNT (regno) != 1) + || (DF_REG_DEF_COUNT (regno) != 1)) + return NULL_RTX; + + /* The offset loading insn has the pattern: + (SET offset_reg (UNSPEC [symbol] UNSPEC_PIC_SYM)) */ + def = DF_REG_DEF_CHAIN (regno); + def_insn = DF_REF_INSN (def); + set = single_set (def_insn); + if (SET_DEST (set) != op1) + return NULL_RTX; + + src = SET_SRC (set); + *offset_reg = op1; + *offset_insn = def_insn; + } + else + { + src = op1; + *offset_reg = NULL; + *offset_insn = NULL; + } + + if ((GET_CODE (src) != UNSPEC) || (XINT (src, 1) != UNSPEC_PIC_SYM)) + return NULL_RTX; + + return RTVEC_ELT (XVEC (src, 0), 0); + } + + return NULL_RTX; +} + +/* Rewrite the global address loading instructions. + SYMBOL is the global variable. OFFSET_REG contains the offset of the + GOT entry. ADDRESS_REG will receive the final global address. + LOAD_INSN is the original insn which loads the address from GOT. + OFFSET_INSN is the original insn which sets OFFSET_REG. + If the GOT offset is not loaded in a separate instruction, OFFSET_REG + and OFFSET_INSN should be NULL. */ +void +arm_load_global_address (rtx symbol, rtx offset_reg, + rtx address_reg, rtx load_insn, rtx offset_insn) +{ + rtx offset, got_prel, new_insn; + rtx labelno = GEN_INT (pic_labelno++); + rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + rtx set = single_set (load_insn); + + rtx tmp_reg = offset_reg; + rtx insert_pos = offset_insn; + if (offset_reg == NULL) + { + tmp_reg = address_reg; + insert_pos = PREV_INSN (load_insn); + } + + /* The first insn: + (SET tmp_reg (address_of_GOT_entry(symbol) - pc)) + The expression (address_of_GOT_entry(symbol) - pc) is expressed by + got_prel, which is actually represented by R_ARM_GOT_PREL relocation. */ + l1 = gen_rtx_CONST (VOIDmode, l1); + l1 = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); + offset = gen_rtx_MINUS (VOIDmode, pc_rtx, l1); + got_prel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, offset), + UNSPEC_GOT_PREL_SYM); + got_prel = gen_rtx_CONST (Pmode, got_prel); + if (TARGET_32BIT) + new_insn = emit_insn_after (gen_pic_load_addr_32bit (tmp_reg, got_prel), + insert_pos); + else + new_insn = emit_insn_after (gen_pic_load_addr_thumb1 (tmp_reg, got_prel), + insert_pos); + + /* The second insn: + (SET tmp_reg (PLUS tmp_reg pc_rtx)) */ + if (TARGET_ARM) + emit_insn_after (gen_pic_add_dot_plus_eight (tmp_reg, tmp_reg, labelno), + new_insn); + else + emit_insn_after (gen_pic_add_dot_plus_four (tmp_reg, tmp_reg, labelno), + new_insn); + + /* The last insn to access the GOT entry: + (SET address_reg (MEM tmp_reg)) + We reuse the existed load instruction. */ + XEXP (SET_SRC (set), 0) = tmp_reg; + df_insn_rescan (load_insn); +} + #include "gt-arm.h" diff --git a/gcc-4.8/gcc/config/arm/unspecs.md b/gcc-4.8/gcc/config/arm/unspecs.md index 398591277..b4547911b 100644 --- a/gcc-4.8/gcc/config/arm/unspecs.md +++ b/gcc-4.8/gcc/config/arm/unspecs.md @@ -83,6 +83,7 @@ ; FPSCR rounding mode and signal inexactness. UNSPEC_VRINTA ; Represent a float to integral float rounding ; towards nearest, ties away from zero. + UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol ]) (define_c_enum "unspec" [ diff --git a/gcc-4.8/gcc/doc/tm.texi b/gcc-4.8/gcc/doc/tm.texi index cbbc82dfe..48df7ce24 100644 --- a/gcc-4.8/gcc/doc/tm.texi +++ b/gcc-4.8/gcc/doc/tm.texi @@ -9914,6 +9914,43 @@ default, inlining is not allowed if the callee function has function specific target options and the caller does not use the same options. @end deftypefn +@deftypefn {Target Hook} rtx TARGET_GET_PIC_REG (void) +Return the pic_reg pseudo register which holds the base address of GOT. + It is only required by the simplify-got optimization. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_CLEAR_PIC_REG (void) +After successful simplify-got optimization, the pic_reg is useless. So a + target can use this hook to clear pic_reg. +@end deftypefn + +@deftypefn {Target Hook} rtx TARGET_LOADED_GLOBAL_VAR (rtx @var{insn}, rtx *@var{offset_reg}, rtx *@var{offset_insn}) +This hook is used to detect if the given @var{insn} loads a global + variable's address from GOT with the form of + @smallexample + (set @var{address_reg} (mem (plus pic_reg @var{offset_reg}))) + @end smallexample + If so return the global variable whose address will be loaded and fill in + @var{offset_insn} and @var{offset_reg}. @var{offset_reg} is set at + @var{offset_insn} to hold the offset from GOT base to the GOT entry of the + global variable. Otherwise return @code{NULL_RTX}. +@end deftypefn + +@deftypefn {Target Hook} bool TARGET_CAN_SIMPLIFY_GOT_ACCESS (int @var{n_symbol}, int @var{n_access}) +This hook determines if it satisfy the target dependent conditions to do + simplify-got when given the number of global variable accessing and the + number of accessed symbols. If the returned value is false the GOT access + insns will not be rewritten. Otherwise we will rewrite these insns. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_LOAD_GLOBAL_ADDRESS (rtx @var{symbol}, rtx @var{offset_reg}, rtx @var{address_reg}, rtx @var{load_insn}, rtx @var{offset_insn}) +This hook does the actual rewriting of GOT access insn @var{load_insn}. + The global variable is @var{symbol}. The global address should be loaded + into @var{address_reg}. The register @var{offset_reg} was previously set + in insn @var{offset_insn} to hold the offset from GOT base to the GOT + entry of the global variable. Now it can be used as a scratch register. +@end deftypefn + @node Emulated TLS @section Emulating TLS @cindex Emulated TLS diff --git a/gcc-4.8/gcc/doc/tm.texi.in b/gcc-4.8/gcc/doc/tm.texi.in index dfba947f5..6b5c45b2b 100644 --- a/gcc-4.8/gcc/doc/tm.texi.in +++ b/gcc-4.8/gcc/doc/tm.texi.in @@ -9775,6 +9775,43 @@ default, inlining is not allowed if the callee function has function specific target options and the caller does not use the same options. @end deftypefn +@hook TARGET_GET_PIC_REG +Return the pic_reg pseudo register which holds the base address of GOT. + It is only required by the simplify-got optimization. +@end deftypefn + +@hook TARGET_CLEAR_PIC_REG +After successful simplify-got optimization, the pic_reg is useless. So a + target can use this hook to clear pic_reg. +@end deftypefn + +@hook TARGET_LOADED_GLOBAL_VAR +This hook is used to detect if the given @var{insn} loads a global + variable's address from GOT with the form of + @smallexample + (set @var{address_reg} (mem (plus pic_reg @var{offset_reg}))) + @end smallexample + If so return the global variable whose address will be loaded and fill in + @var{offset_insn} and @var{offset_reg}. @var{offset_reg} is set at + @var{offset_insn} to hold the offset from GOT base to the GOT entry of the + global variable. Otherwise return @code{NULL_RTX}. +@end deftypefn + +@hook TARGET_CAN_SIMPLIFY_GOT_ACCESS +This hook determines if it satisfy the target dependent conditions to do + simplify-got when given the number of global variable accessing and the + number of accessed symbols. If the returned value is false the GOT access + insns will not be rewritten. Otherwise we will rewrite these insns. +@end deftypefn + +@hook TARGET_LOAD_GLOBAL_ADDRESS +This hook does the actual rewriting of GOT access insn @var{load_insn}. + The global variable is @var{symbol}. The global address should be loaded + into @var{address_reg}. The register @var{offset_reg} was previously set + in insn @var{offset_insn} to hold the offset from GOT base to the GOT + entry of the global variable. Now it can be used as a scratch register. +@end deftypefn + @node Emulated TLS @section Emulating TLS @cindex Emulated TLS diff --git a/gcc-4.8/gcc/hooks.c b/gcc-4.8/gcc/hooks.c index 3b54dfa54..81948071e 100644 --- a/gcc-4.8/gcc/hooks.c +++ b/gcc-4.8/gcc/hooks.c @@ -344,6 +344,13 @@ hook_rtx_tree_int_null (tree a ATTRIBUTE_UNUSED, int b ATTRIBUTE_UNUSED) return NULL; } +/* Generic hook that returns NULL_RTX. */ +rtx +hook_rtx_void_null (void) +{ + return NULL; +} + /* Generic hook that takes three trees and returns the last one as is. */ tree hook_tree_tree_tree_tree_3rd_identity (tree a ATTRIBUTE_UNUSED, diff --git a/gcc-4.8/gcc/hooks.h b/gcc-4.8/gcc/hooks.h index 50bcc6afc..558d04990 100644 --- a/gcc-4.8/gcc/hooks.h +++ b/gcc-4.8/gcc/hooks.h @@ -95,6 +95,7 @@ extern bool default_can_output_mi_thunk_no_vcall (const_tree, HOST_WIDE_INT, extern rtx hook_rtx_rtx_identity (rtx); extern rtx hook_rtx_rtx_null (rtx); extern rtx hook_rtx_tree_int_null (tree, int); +extern rtx hook_rtx_void_null (void); extern const char *hook_constcharptr_void_null (void); extern const char *hook_constcharptr_const_tree_null (const_tree); diff --git a/gcc-4.8/gcc/passes.c b/gcc-4.8/gcc/passes.c index 61f8a118a..4a7f8d2ae 100644 --- a/gcc-4.8/gcc/passes.c +++ b/gcc-4.8/gcc/passes.c @@ -1601,6 +1601,7 @@ init_optimization_passes (void) NEXT_PASS (pass_rtl_loop_done); *p = NULL; } + NEXT_PASS (pass_simplify_got); NEXT_PASS (pass_web); NEXT_PASS (pass_rtl_cprop); NEXT_PASS (pass_cse2); diff --git a/gcc-4.8/gcc/simplify-got.c b/gcc-4.8/gcc/simplify-got.c new file mode 100644 index 000000000..e822fa736 --- /dev/null +++ b/gcc-4.8/gcc/simplify-got.c @@ -0,0 +1,197 @@ +/* Simplify the code to load global variable's address from GOT. + Copyright (C) 2011 + Free Software Foundation, Inc. + Contributed by Wei Guozhi . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file contains optimization for global variable's address loading + from GOT. + + When generating PIC code, we need to load global variable's address from + GOT. Many targets do this as following: + + (set pic_reg ...) # load the base address of GOT into pic_reg. + ... + (set off_set ...) # load the offset from the base of GOT to + # a global variable's GOT entry. + (set address # load the address from GOT. + (mem (plus pic_reg off_set))) + ... + + If the target has an alternative method (usually uses a different + relocation) to load the global address and in some cases it has less + cost and avoid the pic_reg, we can use this pass to improve it. + + In order to employ this optimization the target must satisfy the + following constraints: + + 1. There should be at least 2 methods to load a global variable's + address from GOT. + + 2. By default all global variables accesses use the method described + above. + + 3. There is a target dependent situation that the alternative method is + better when considering the number of global variable accesses and + the number of accessed variables. + + 4. The alternative method doesn't use the base of GOT (pic_reg). +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "target.h" +#include "tree-pass.h" +#include "df.h" +#include "timevar.h" + +#define VAR_TABLE_SIZE 10 + +/* Information needed when rewrite the GOT access insns. */ +struct got_access_info +{ + rtx symbol; /* The global variable. */ + rtx offset_reg; /* Register contains the GOT entry offset. */ + rtx address_reg; /* Register contains the final global address. */ + rtx offset_insn; /* The insn loads the offset. */ + rtx load_insn; /* The insn which loads the address from GOT. */ +}; + +/* This optimization is enabled only when the pic_reg is actually used. */ +static bool +gate_handle_simplify_got (void) +{ + return optimize && targetm.got_access.get_pic_reg (); +} + +static unsigned int +rest_of_handle_simplify_got (void) +{ + df_ref ref; + rtx use = NULL_RTX; + int i, n_symbol, n_access = 0; + struct got_access_info* got_accesses; + htab_t var_table = htab_create (VAR_TABLE_SIZE, + htab_hash_pointer, + htab_eq_pointer, + NULL); + rtx pic_reg = targetm.got_access.get_pic_reg (); + gcc_assert (pic_reg); + + ref = DF_REG_USE_CHAIN (REGNO (pic_reg)); + got_accesses = XNEWVEC(struct got_access_info, + DF_REG_USE_COUNT (REGNO (pic_reg))); + + /* Check if all uses of pic_reg are loading global address through the + default method. */ + while (ref) + { + rtx insn = DF_REF_INSN (ref); + + /* Check for the special USE insn, it is not a real usage of pic_reg. */ + if (GET_CODE (PATTERN (insn)) == USE) + use = insn; + else + { + /* If an insn both set and use pic_reg, it is in the process of + constructing the value of pic_reg. We should also ignore it. */ + rtx set = single_set (insn); + if (!(set && SET_DEST (set) == pic_reg)) + { + rtx offset_reg; + rtx offset_insn; + rtx symbol = targetm.got_access.loaded_global_var (insn, + &offset_reg, + &offset_insn); + if (symbol) + { + rtx* slot = (rtx*) htab_find_slot (var_table, symbol, INSERT); + if (*slot == HTAB_EMPTY_ENTRY) + *slot = symbol; + + gcc_assert (set); + got_accesses[n_access].symbol = symbol; + got_accesses[n_access].offset_reg = offset_reg; + got_accesses[n_access].address_reg = SET_DEST (set); + got_accesses[n_access].load_insn = insn; + got_accesses[n_access].offset_insn = offset_insn; + n_access++; + } + else + { + /* This insn doesn't load a global address, but it has + other unexpected usage of pic_reg, give up. */ + free (got_accesses); + htab_delete (var_table); + return 0; + } + } + } + ref = DF_REF_NEXT_REG(ref); + } + + /* Check if we can simplify it. */ + n_symbol = htab_elements (var_table); + gcc_assert (n_symbol <= n_access); + if (!targetm.got_access.can_simplify_got_access (n_symbol, n_access)) + { + free (got_accesses); + htab_delete (var_table); + return 0; + } + + /* Rewrite the global address loading insns. */ + for (i=0; imachine->pic_reg; +} + +/* Clear the pic_reg to NULL. */ +void +arm_clear_pic_reg (void) +{ + cfun->machine->pic_reg = NULL_RTX; +} + +/* Determine if it is profitable to simplify GOT accesses. + + The default global address loading instructions are: + + ldr r3, .L2 # A + ldr r2, .L2+4 # B +.LPIC0: + add r3, pc # A + ldr r4, [r3, r2] # B + ... +.L2: + .word _GLOBAL_OFFSET_TABLE_-(.LPIC0+4) # A + .word i(GOT) # S + + The new instruction sequence is: + + ldr r3, .L2 # C +.LPIC0: + add r3, pc # C + ldr r3, [r3] # C + ... +.L2: + i(GOT_PREL)+(.-(.LPIC0+4)) # C + + Suppose the number of global address loading is n, the number of + accessed global symbol is s, this function should return + + cost(A) + cost(B) * n + cost(S) * s >= cost(C) * n + + From the above code snippets, we can see that + + cost(A) = INSN_LENGTH * 2 + WORD_LENGTH + cost(B) = INSN_LENGTH * 2 + cost(S) = WORD_LENGTH + cost(C) = INSN_LENGTH * 3 + WORD_LENGTH + + The length of instruction depends on the target instruction set. */ + +#define N_INSNS_A 2 +#define N_INSNS_B 2 +#define N_INSNS_C 3 + +bool +arm_can_simplify_got_access (int n_symbol, int n_access) +{ + int insn_len = TARGET_THUMB ? 2 : 4; + int cost_A = insn_len * N_INSNS_A + UNITS_PER_WORD; + int cost_B = insn_len * N_INSNS_B; + int cost_S = UNITS_PER_WORD; + int cost_C = insn_len * N_INSNS_C + UNITS_PER_WORD; + + return cost_A + cost_B * n_access + cost_S * n_symbol >= cost_C * n_access; +} + +/* Detect if INSN loads a global address. If so returns the symbol. + If the GOT offset is loaded in a separate instruction, sets the + corresponding OFFSET_REG and OFFSET_INSN. Otherwise fills with NULL. */ +rtx +arm_loaded_global_var (rtx insn, rtx *offset_reg, rtx *offset_insn) +{ + rtx set = single_set (insn); + rtx pic_reg = cfun->machine->pic_reg; + gcc_assert (pic_reg); + + /* Global address loading instruction has the pattern: + (SET address_reg (MEM (PLUS pic_reg offset_reg))) */ + if (set && MEM_P (SET_SRC (set)) + && (GET_CODE (XEXP (SET_SRC (set),0)) == PLUS)) + { + unsigned int regno; + df_ref def; + rtx def_insn; + rtx src; + rtx plus = XEXP (SET_SRC (set),0); + rtx op0 = XEXP (plus, 0); + rtx op1 = XEXP (plus, 1); + if (op1 == pic_reg) + { + rtx tmp = op0; + op0 = op1; + op1 = tmp; + } + + if (op0 != pic_reg) + return NULL_RTX; + + if (REG_P (op1)) + { + regno = REGNO (op1); + if ((DF_REG_USE_COUNT (regno) != 1) + || (DF_REG_DEF_COUNT (regno) != 1)) + return NULL_RTX; + + /* The offset loading insn has the pattern: + (SET offset_reg (UNSPEC [symbol] UNSPEC_PIC_SYM)) */ + def = DF_REG_DEF_CHAIN (regno); + def_insn = DF_REF_INSN (def); + set = single_set (def_insn); + if (SET_DEST (set) != op1) + return NULL_RTX; + + src = SET_SRC (set); + *offset_reg = op1; + *offset_insn = def_insn; + } + else + { + src = op1; + *offset_reg = NULL; + *offset_insn = NULL; + } + + if ((GET_CODE (src) != UNSPEC) || (XINT (src, 1) != UNSPEC_PIC_SYM)) + return NULL_RTX; + + return RTVEC_ELT (XVEC (src, 0), 0); + } + + return NULL_RTX; +} + +/* Rewrite the global address loading instructions. + SYMBOL is the global variable. OFFSET_REG contains the offset of the + GOT entry. ADDRESS_REG will receive the final global address. + LOAD_INSN is the original insn which loads the address from GOT. + OFFSET_INSN is the original insn which sets OFFSET_REG. + If the GOT offset is not loaded in a separate instruction, OFFSET_REG + and OFFSET_INSN should be NULL. */ +void +arm_load_global_address (rtx symbol, rtx offset_reg, + rtx address_reg, rtx load_insn, rtx offset_insn) +{ + rtx offset, got_prel, new_insn; + rtx labelno = GEN_INT (pic_labelno++); + rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); + rtx set = single_set (load_insn); + + rtx tmp_reg = offset_reg; + rtx insert_pos = offset_insn; + if (offset_reg == NULL) + { + tmp_reg = address_reg; + insert_pos = PREV_INSN (load_insn); + } + + /* The first insn: + (SET tmp_reg (address_of_GOT_entry(symbol) - pc)) + The expression (address_of_GOT_entry(symbol) - pc) is expressed by + got_prel, which is actually represented by R_ARM_GOT_PREL relocation. */ + l1 = gen_rtx_CONST (VOIDmode, l1); + l1 = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); + offset = gen_rtx_MINUS (VOIDmode, pc_rtx, l1); + got_prel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, offset), + UNSPEC_GOT_PREL_SYM); + got_prel = gen_rtx_CONST (Pmode, got_prel); + if (TARGET_32BIT) + new_insn = emit_insn_after (gen_pic_load_addr_32bit (tmp_reg, got_prel), + insert_pos); + else + new_insn = emit_insn_after (gen_pic_load_addr_thumb1 (tmp_reg, got_prel), + insert_pos); + + /* The second insn: + (SET tmp_reg (PLUS tmp_reg pc_rtx)) */ + if (TARGET_ARM) + emit_insn_after (gen_pic_add_dot_plus_eight (tmp_reg, tmp_reg, labelno), + new_insn); + else + emit_insn_after (gen_pic_add_dot_plus_four (tmp_reg, tmp_reg, labelno), + new_insn); + + /* The last insn to access the GOT entry: + (SET address_reg (MEM tmp_reg)) + We reuse the existed load instruction. */ + XEXP (SET_SRC (set), 0) = tmp_reg; + df_insn_rescan (load_insn); +} + #include "gt-arm.h" diff --git a/gcc-4.9/gcc/config/arm/unspecs.md b/gcc-4.9/gcc/config/arm/unspecs.md index 8caa953bc..87edf18e2 100644 --- a/gcc-4.9/gcc/config/arm/unspecs.md +++ b/gcc-4.9/gcc/config/arm/unspecs.md @@ -83,6 +83,7 @@ ; FPSCR rounding mode and signal inexactness. UNSPEC_VRINTA ; Represent a float to integral float rounding ; towards nearest, ties away from zero. + UNSPEC_GOT_PREL_SYM ; Specify an R_ARM_GOT_PREL relocation of a symbol ]) (define_c_enum "unspec" [ diff --git a/gcc-4.9/gcc/doc/tm.texi b/gcc-4.9/gcc/doc/tm.texi index f7024a745..f3c0c14a9 100644 --- a/gcc-4.9/gcc/doc/tm.texi +++ b/gcc-4.9/gcc/doc/tm.texi @@ -9951,6 +9951,43 @@ default, inlining is not allowed if the callee function has function specific target options and the caller does not use the same options. @end deftypefn +@deftypefn {Target Hook} rtx TARGET_GET_PIC_REG (void) +Return the pic_reg pseudo register which holds the base address of GOT. + It is only required by the simplify-got optimization. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_CLEAR_PIC_REG (void) +After successful simplify-got optimization, the pic_reg is useless. So a + target can use this hook to clear pic_reg. +@end deftypefn + +@deftypefn {Target Hook} rtx TARGET_LOADED_GLOBAL_VAR (rtx @var{insn}, rtx *@var{offset_reg}, rtx *@var{offset_insn}) +This hook is used to detect if the given @var{insn} loads a global + variable's address from GOT with the form of + @smallexample + (set @var{address_reg} (mem (plus pic_reg @var{offset_reg}))) + @end smallexample + If so return the global variable whose address will be loaded and fill in + @var{offset_insn} and @var{offset_reg}. @var{offset_reg} is set at + @var{offset_insn} to hold the offset from GOT base to the GOT entry of the + global variable. Otherwise return @code{NULL_RTX}. +@end deftypefn + +@deftypefn {Target Hook} bool TARGET_CAN_SIMPLIFY_GOT_ACCESS (int @var{n_symbol}, int @var{n_access}) +This hook determines if it satisfy the target dependent conditions to do + simplify-got when given the number of global variable accessing and the + number of accessed symbols. If the returned value is false the GOT access + insns will not be rewritten. Otherwise we will rewrite these insns. +@end deftypefn + +@deftypefn {Target Hook} void TARGET_LOAD_GLOBAL_ADDRESS (rtx @var{symbol}, rtx @var{offset_reg}, rtx @var{address_reg}, rtx @var{load_insn}, rtx @var{offset_insn}) +This hook does the actual rewriting of GOT access insn @var{load_insn}. + The global variable is @var{symbol}. The global address should be loaded + into @var{address_reg}. The register @var{offset_reg} was previously set + in insn @var{offset_insn} to hold the offset from GOT base to the GOT + entry of the global variable. Now it can be used as a scratch register. +@end deftypefn + @node Emulated TLS @section Emulating TLS @cindex Emulated TLS diff --git a/gcc-4.9/gcc/doc/tm.texi.in b/gcc-4.9/gcc/doc/tm.texi.in index 6dcbde45c..fb26c1f41 100644 --- a/gcc-4.9/gcc/doc/tm.texi.in +++ b/gcc-4.9/gcc/doc/tm.texi.in @@ -7502,6 +7502,43 @@ on this implementation detail. @hook TARGET_CAN_INLINE_P +@hook TARGET_GET_PIC_REG +Return the pic_reg pseudo register which holds the base address of GOT. + It is only required by the simplify-got optimization. +@end deftypefn + +@hook TARGET_CLEAR_PIC_REG +After successful simplify-got optimization, the pic_reg is useless. So a + target can use this hook to clear pic_reg. +@end deftypefn + +@hook TARGET_LOADED_GLOBAL_VAR +This hook is used to detect if the given @var{insn} loads a global + variable's address from GOT with the form of + @smallexample + (set @var{address_reg} (mem (plus pic_reg @var{offset_reg}))) + @end smallexample + If so return the global variable whose address will be loaded and fill in + @var{offset_insn} and @var{offset_reg}. @var{offset_reg} is set at + @var{offset_insn} to hold the offset from GOT base to the GOT entry of the + global variable. Otherwise return @code{NULL_RTX}. +@end deftypefn + +@hook TARGET_CAN_SIMPLIFY_GOT_ACCESS +This hook determines if it satisfy the target dependent conditions to do + simplify-got when given the number of global variable accessing and the + number of accessed symbols. If the returned value is false the GOT access + insns will not be rewritten. Otherwise we will rewrite these insns. +@end deftypefn + +@hook TARGET_LOAD_GLOBAL_ADDRESS +This hook does the actual rewriting of GOT access insn @var{load_insn}. + The global variable is @var{symbol}. The global address should be loaded + into @var{address_reg}. The register @var{offset_reg} was previously set + in insn @var{offset_insn} to hold the offset from GOT base to the GOT + entry of the global variable. Now it can be used as a scratch register. +@end deftypefn + @node Emulated TLS @section Emulating TLS @cindex Emulated TLS diff --git a/gcc-4.9/gcc/hooks.c b/gcc-4.9/gcc/hooks.c index 1c67bdfec..10384542d 100644 --- a/gcc-4.9/gcc/hooks.c +++ b/gcc-4.9/gcc/hooks.c @@ -365,6 +365,13 @@ hook_uint_mode_0 (enum machine_mode m ATTRIBUTE_UNUSED) return 0; } +/* Generic hook that returns NULL_RTX. */ +rtx +hook_rtx_void_null (void) +{ + return NULL; +} + /* Generic hook that takes three trees and returns the last one as is. */ tree hook_tree_tree_tree_tree_3rd_identity (tree a ATTRIBUTE_UNUSED, diff --git a/gcc-4.9/gcc/hooks.h b/gcc-4.9/gcc/hooks.h index 896b41d8c..42825a413 100644 --- a/gcc-4.9/gcc/hooks.h +++ b/gcc-4.9/gcc/hooks.h @@ -100,6 +100,7 @@ extern bool default_can_output_mi_thunk_no_vcall (const_tree, HOST_WIDE_INT, extern rtx hook_rtx_rtx_identity (rtx); extern rtx hook_rtx_rtx_null (rtx); extern rtx hook_rtx_tree_int_null (tree, int); +extern rtx hook_rtx_void_null (void); extern const char *hook_constcharptr_void_null (void); extern const char *hook_constcharptr_const_tree_null (const_tree); diff --git a/gcc-4.9/gcc/passes.def b/gcc-4.9/gcc/passes.def index c98b048ea..dbe0d3498 100644 --- a/gcc-4.9/gcc/passes.def +++ b/gcc-4.9/gcc/passes.def @@ -347,6 +347,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_rtl_loop_done); TERMINATE_PASS_LIST () POP_INSERT_PASSES () + NEXT_PASS (pass_simplify_got); NEXT_PASS (pass_web); NEXT_PASS (pass_rtl_cprop); NEXT_PASS (pass_cse2); diff --git a/gcc-4.9/gcc/simplify-got.c b/gcc-4.9/gcc/simplify-got.c new file mode 100644 index 000000000..61a8fe893 --- /dev/null +++ b/gcc-4.9/gcc/simplify-got.c @@ -0,0 +1,216 @@ +/* Simplify the code to load global variable's address from GOT. + Copyright (C) 2011 + Free Software Foundation, Inc. + Contributed by Wei Guozhi . + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +. */ + +/* This file contains optimization for global variable's address loading + from GOT. + + When generating PIC code, we need to load global variable's address from + GOT. Many targets do this as following: + + (set pic_reg ...) # load the base address of GOT into pic_reg. + ... + (set off_set ...) # load the offset from the base of GOT to + # a global variable's GOT entry. + (set address # load the address from GOT. + (mem (plus pic_reg off_set))) + ... + + If the target has an alternative method (usually uses a different + relocation) to load the global address and in some cases it has less + cost and avoid the pic_reg, we can use this pass to improve it. + + In order to employ this optimization the target must satisfy the + following constraints: + + 1. There should be at least 2 methods to load a global variable's + address from GOT. + + 2. By default all global variables accesses use the method described + above. + + 3. There is a target dependent situation that the alternative method is + better when considering the number of global variable accesses and + the number of accessed variables. + + 4. The alternative method doesn't use the base of GOT (pic_reg). +*/ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tm.h" +#include "rtl.h" +#include "flags.h" +#include "target.h" +#include "tree-pass.h" +#include "df.h" +#include "timevar.h" + +#define VAR_TABLE_SIZE 10 + +/* Information needed when rewrite the GOT access insns. */ +struct got_access_info +{ + rtx symbol; /* The global variable. */ + rtx offset_reg; /* Register contains the GOT entry offset. */ + rtx address_reg; /* Register contains the final global address. */ + rtx offset_insn; /* The insn loads the offset. */ + rtx load_insn; /* The insn which loads the address from GOT. */ +}; + +/* This optimization is enabled only when the pic_reg is actually used. */ +static bool +gate_handle_simplify_got (void) +{ + return optimize && targetm.got_access.get_pic_reg (); +} + +static unsigned int +rest_of_handle_simplify_got (void) +{ + df_ref ref; + rtx use = NULL_RTX; + int i, n_symbol, n_access = 0; + struct got_access_info* got_accesses; + htab_t var_table = htab_create (VAR_TABLE_SIZE, + htab_hash_pointer, + htab_eq_pointer, + NULL); + rtx pic_reg = targetm.got_access.get_pic_reg (); + gcc_assert (pic_reg); + + ref = DF_REG_USE_CHAIN (REGNO (pic_reg)); + got_accesses = XNEWVEC(struct got_access_info, + DF_REG_USE_COUNT (REGNO (pic_reg))); + + /* Check if all uses of pic_reg are loading global address through the + default method. */ + while (ref) + { + rtx insn = DF_REF_INSN (ref); + + /* Check for the special USE insn, it is not a real usage of pic_reg. */ + if (GET_CODE (PATTERN (insn)) == USE) + use = insn; + else + { + /* If an insn both set and use pic_reg, it is in the process of + constructing the value of pic_reg. We should also ignore it. */ + rtx set = single_set (insn); + if (!(set && SET_DEST (set) == pic_reg)) + { + rtx offset_reg; + rtx offset_insn; + rtx symbol = targetm.got_access.loaded_global_var (insn, + &offset_reg, + &offset_insn); + if (symbol) + { + rtx* slot = (rtx*) htab_find_slot (var_table, symbol, INSERT); + if (*slot == HTAB_EMPTY_ENTRY) + *slot = symbol; + + gcc_assert (set); + got_accesses[n_access].symbol = symbol; + got_accesses[n_access].offset_reg = offset_reg; + got_accesses[n_access].address_reg = SET_DEST (set); + got_accesses[n_access].load_insn = insn; + got_accesses[n_access].offset_insn = offset_insn; + n_access++; + } + else + { + /* This insn doesn't load a global address, but it has + other unexpected usage of pic_reg, give up. */ + free (got_accesses); + htab_delete (var_table); + return 0; + } + } + } + ref = DF_REF_NEXT_REG(ref); + } + + /* Check if we can simplify it. */ + n_symbol = htab_elements (var_table); + gcc_assert (n_symbol <= n_access); + if (!targetm.got_access.can_simplify_got_access (n_symbol, n_access)) + { + free (got_accesses); + htab_delete (var_table); + return 0; + } + + /* Rewrite the global address loading insns. */ + for (i=0; i