diff options
Diffstat (limited to 'gcc-4.4.3/gcc/config')
-rw-r--r-- | gcc-4.4.3/gcc/config/arm/thumb2.md | 10 | ||||
-rw-r--r-- | gcc-4.4.3/gcc/config/i386/i386.c | 137 | ||||
-rw-r--r-- | gcc-4.4.3/gcc/config/i386/i386.h | 3 | ||||
-rw-r--r-- | gcc-4.4.3/gcc/config/i386/i386.md | 34 |
4 files changed, 174 insertions, 10 deletions
diff --git a/gcc-4.4.3/gcc/config/arm/thumb2.md b/gcc-4.4.3/gcc/config/arm/thumb2.md index 6e03e8b21..c982ea7cd 100644 --- a/gcc-4.4.3/gcc/config/arm/thumb2.md +++ b/gcc-4.4.3/gcc/config/arm/thumb2.md @@ -1190,7 +1190,7 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" "* - if (get_attr_length (insn) == 2 && which_alternative == 0) + if (get_attr_length (insn) == 2) return \"cbz\\t%0, %l1\"; else return \"cmp\\t%0, #0\;beq\\t%l1\"; @@ -1198,7 +1198,8 @@ [(set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int 2)) - (le (minus (match_dup 1) (pc)) (const_int 128))) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (eq (symbol_ref ("which_alternative")) (const_int 0))) (const_int 2) (const_int 8)))] ) @@ -1212,7 +1213,7 @@ (clobber (reg:CC CC_REGNUM))] "TARGET_THUMB2" "* - if (get_attr_length (insn) == 2 && which_alternative == 0) + if (get_attr_length (insn) == 2) return \"cbnz\\t%0, %l1\"; else return \"cmp\\t%0, #0\;bne\\t%l1\"; @@ -1220,7 +1221,8 @@ [(set (attr "length") (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int 2)) - (le (minus (match_dup 1) (pc)) (const_int 128))) + (le (minus (match_dup 1) (pc)) (const_int 128)) + (eq (symbol_ref ("which_alternative")) (const_int 0))) (const_int 2) (const_int 8)))] ) diff --git a/gcc-4.4.3/gcc/config/i386/i386.c b/gcc-4.4.3/gcc/config/i386/i386.c index fa148be65..b237ed869 100644 --- a/gcc-4.4.3/gcc/config/i386/i386.c +++ b/gcc-4.4.3/gcc/config/i386/i386.c @@ -1481,6 +1481,9 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_PAD_RETURNS */ m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, + /* X86_TUNE_PAD_SHORT_FUNCTION: Pad short funtion. */ + m_ATOM, + /* X86_TUNE_EXT_80387_CONSTANTS */ m_K6_GEODE | m_ATHLON_K8 | m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, @@ -7640,6 +7643,11 @@ ix86_file_end (void) xops[0] = gen_rtx_REG (Pmode, regno); xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); + /* Pad stack IP move with 4 instructions. 2 NOPs count as 1 + instruction. */ + if (TARGET_PAD_SHORT_FUNCTION) + output_asm_insn ("nop; nop; nop; nop; nop; nop; nop; nop", + xops); output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); output_asm_insn ("ret", xops); } @@ -27911,17 +27919,134 @@ ix86_pad_returns (void) } } +/* Count the minimum number of instructions in BB. Return 4 if the + number of instructions >= 4. */ + +static int +ix86_count_insn_bb (basic_block bb) +{ + rtx insn; + int insn_count = 0; + + /* Count number of instructions in this block. Return 4 if the number + of instructions >= 4. */ + FOR_BB_INSNS (bb, insn) + { + /* Only happen in exit blocks. */ + if (JUMP_P (insn) + && GET_CODE (PATTERN (insn)) == RETURN) + break; + + if (NONDEBUG_INSN_P (insn) + && GET_CODE (PATTERN (insn)) != USE + && GET_CODE (PATTERN (insn)) != CLOBBER) + { + insn_count++; + if (insn_count >= 4) + return insn_count; + } + } + + return insn_count; +} + + +/* Count the minimum number of instructions in code path in BB. + Return 4 if the number of instructions >= 4. */ + +static int +ix86_count_insn (basic_block bb) +{ + edge e; + edge_iterator ei; + int min_prev_count; + + /* Only bother counting instructions along paths with no + more than 2 basic blocks between entry and exit. Given + that BB has an edge to exit, determine if a predecessor + of BB has an edge from entry. If so, compute the number + of instructions in the predecessor block. If there + happen to be multiple such blocks, compute the minimum. */ + min_prev_count = 4; + FOR_EACH_EDGE (e, ei, bb->preds) + { + edge prev_e; + edge_iterator prev_ei; + + if (e->src == ENTRY_BLOCK_PTR) + { + min_prev_count = 0; + break; + } + FOR_EACH_EDGE (prev_e, prev_ei, e->src->preds) + { + if (prev_e->src == ENTRY_BLOCK_PTR) + { + int count = ix86_count_insn_bb (e->src); + if (count < min_prev_count) + min_prev_count = count; + break; + } + } + } + + if (min_prev_count < 4) + min_prev_count += ix86_count_insn_bb (bb); + + return min_prev_count; +} + +/* Pad short funtion to 4 instructions. */ + +static void +ix86_pad_short_function (void) +{ + edge e; + edge_iterator ei; + + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) + { + rtx ret = BB_END (e->src); + if (JUMP_P (ret) && GET_CODE (PATTERN (ret)) == RETURN) + { + int insn_count = ix86_count_insn (e->src); + + /* Pad short function. */ + if (insn_count < 4) + { + rtx insn = ret; + + /* Find epilogue. */ + while (insn + && (!NOTE_P (insn) + || NOTE_KIND (insn) != NOTE_INSN_EPILOGUE_BEG)) + insn = PREV_INSN (insn); + + if (!insn) + insn = ret; + + /* Two NOPs are counted as one instruction. */ + insn_count = 2 * (4 - insn_count); + emit_insn_before (gen_nops (GEN_INT (insn_count)), insn); + } + } + } +} + /* Implement machine specific optimizations. We implement padding of returns for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ static void ix86_reorg (void) { - if (TARGET_PAD_RETURNS && optimize - && optimize_function_for_speed_p (cfun)) - ix86_pad_returns (); - if (TARGET_FOUR_JUMP_LIMIT && optimize - && optimize_function_for_speed_p (cfun)) - ix86_avoid_jump_misspredicts (); + if (optimize && optimize_function_for_speed_p (cfun)) + { + if (TARGET_PAD_SHORT_FUNCTION) + ix86_pad_short_function (); + else if (TARGET_PAD_RETURNS) + ix86_pad_returns (); + if (TARGET_FOUR_JUMP_LIMIT) + ix86_avoid_jump_misspredicts (); + } } /* Return nonzero when QImode register that must be represented via REX prefix diff --git a/gcc-4.4.3/gcc/config/i386/i386.h b/gcc-4.4.3/gcc/config/i386/i386.h index e9014b79a..d7cb8ac3c 100644 --- a/gcc-4.4.3/gcc/config/i386/i386.h +++ b/gcc-4.4.3/gcc/config/i386/i386.h @@ -291,6 +291,7 @@ enum ix86_tune_indices { X86_TUNE_USE_BT, X86_TUNE_USE_INCDEC, X86_TUNE_PAD_RETURNS, + X86_TUNE_PAD_SHORT_FUNCTION, X86_TUNE_EXT_80387_CONSTANTS, X86_TUNE_SHORTEN_X87_SSE, X86_TUNE_AVOID_VECTOR_DECODE, @@ -372,6 +373,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT] #define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC] #define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS] +#define TARGET_PAD_SHORT_FUNCTION \ + ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION] #define TARGET_EXT_80387_CONSTANTS \ ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS] #define TARGET_SHORTEN_X87_SSE ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE] diff --git a/gcc-4.4.3/gcc/config/i386/i386.md b/gcc-4.4.3/gcc/config/i386/i386.md index bbe915112..7989c31db 100644 --- a/gcc-4.4.3/gcc/config/i386/i386.md +++ b/gcc-4.4.3/gcc/config/i386/i386.md @@ -77,6 +77,7 @@ (UNSPEC_TLSDESC 23) ; Other random patterns + (UNSPEC_NOPS 29) (UNSPEC_SCAS 30) (UNSPEC_FNSTSW 31) (UNSPEC_SAHF 32) @@ -15323,6 +15324,39 @@ (set_attr "length_immediate" "0") (set_attr "modrm" "0")]) +;; Generate nops. Operand 0 is the number of nops, up to 8. +(define_insn "nops" + [(unspec [(match_operand 0 "const_int_operand" "")] + UNSPEC_NOPS)] + "reload_completed" +{ + switch (INTVAL (operands[0])) + { + case 1: + return "nop"; + case 2: + return "nop; nop"; + case 3: + return "nop; nop; nop"; + case 4: + return "nop; nop; nop; nop"; + case 5: + return "nop; nop; nop; nop; nop"; + case 6: + return "nop; nop; nop; nop; nop; nop"; + case 7: + return "nop; nop; nop; nop; nop; nop; nop"; + case 8: + return "nop; nop; nop; nop; nop; nop; nop; nop"; + default: + gcc_unreachable (); + break; + } +} + [(set (attr "length") (symbol_ref "INTVAL (operands[0])")) + (set_attr "length_immediate" "0") + (set_attr "modrm" "0")]) + ;; Align to 16-byte boundary, max skip in op0. Used to avoid ;; branch prediction penalty for the third jump in a 16-byte ;; block on K8. |