diff options
-rw-r--r-- | compiler/dex/quick/arm/call_arm.cc | 10 | ||||
-rw-r--r-- | compiler/dex/quick/arm64/call_arm64.cc | 10 | ||||
-rw-r--r-- | compiler/dex/quick/mips/call_mips.cc | 3 | ||||
-rw-r--r-- | compiler/dex/quick/mir_to_lir.h | 31 | ||||
-rw-r--r-- | compiler/dex/quick/x86/call_x86.cc | 4 | ||||
-rw-r--r-- | runtime/arch/arm/fault_handler_arm.cc | 5 | ||||
-rw-r--r-- | runtime/instruction_set.h | 55 | ||||
-rw-r--r-- | runtime/thread.cc | 6 | ||||
-rw-r--r-- | runtime/thread.h | 25 |
9 files changed, 95 insertions, 54 deletions
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc index 5466abd11d..2bdf3e40cb 100644 --- a/compiler/dex/quick/arm/call_arm.cc +++ b/compiler/dex/quick/arm/call_arm.cc @@ -356,11 +356,11 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { * We can safely skip the stack overflow check if we're * a leaf *and* our frame size < fudge factor. */ - bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && - (static_cast<size_t>(frame_size_) < - Thread::kStackOverflowReservedBytes)); + bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm); NewLIR0(kPseudoMethodEntry); - bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes); + constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes - + Thread::kStackOverflowSignalReservedBytes; + bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes); if (!skip_overflow_check) { if (Runtime::Current()->ExplicitStackOverflowChecks()) { if (!large_frame) { @@ -381,7 +381,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { // This is done before the callee save instructions to avoid any possibility // of these overflowing. This uses r12 and that's never saved in a callee // save. - OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes); + OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes); Load32Disp(rs_r12, 0, rs_r12); MarkPossibleStackOverflowException(); } diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc index f1748effb2..35263eab8a 100644 --- a/compiler/dex/quick/arm64/call_arm64.cc +++ b/compiler/dex/quick/arm64/call_arm64.cc @@ -337,13 +337,13 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) * We can safely skip the stack overflow check if we're * a leaf *and* our frame size < fudge factor. */ - bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && - (static_cast<size_t>(frame_size_) < - Thread::kStackOverflowReservedBytes)); + bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64); NewLIR0(kPseudoMethodEntry); - const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes); + constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes - + Thread::kStackOverflowSignalReservedBytes; + const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes; const int spill_count = num_core_spills_ + num_fp_spills_; const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf; // SP 16 byte alignment. const int frame_size_without_spills = frame_size_ - spill_size; @@ -412,7 +412,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) // Branch to throw target if there is not enough room. OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills); LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8); - LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr); + LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr); AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size)); OpRegCopy(rs_rA64_SP, rs_x9); // Establish stack after checks. } else { diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc index c73420291c..e53105fc84 100644 --- a/compiler/dex/quick/mips/call_mips.cc +++ b/compiler/dex/quick/mips/call_mips.cc @@ -305,8 +305,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) * We can safely skip the stack overflow check if we're * a leaf *and* our frame size < fudge factor. */ - bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && - (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes)); + bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips); NewLIR0(kPseudoMethodEntry); RegStorage check_reg = AllocTemp(); RegStorage new_sp = AllocTemp(); diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h index ca65432c58..f70087d451 100644 --- a/compiler/dex/quick/mir_to_lir.h +++ b/compiler/dex/quick/mir_to_lir.h @@ -25,6 +25,7 @@ #include "dex/backend.h" #include "dex/quick/resource_mask.h" #include "driver/compiler_driver.h" +#include "instruction_set.h" #include "leb128.h" #include "safe_map.h" #include "utils/array_ref.h" @@ -206,6 +207,36 @@ Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_grap #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath)) #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath)) +// Size of a frame that we definitely consider large. Anything larger than this should +// definitely get a stack overflow check. +static constexpr size_t kLargeFrameSize = 2 * KB; + +// Size of a frame that should be small. Anything leaf method smaller than this should run +// without a stack overflow check. +// The constant is from experience with frameworks code. +static constexpr size_t kSmallFrameSize = 1 * KB; + +// Determine whether a frame is small or large, used in the decision on whether to elide a +// stack overflow check on method entry. +// +// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the +// overflow-usable stack space. +static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) { + return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4; +} + +// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame. +COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm), + kSmallFrameSize_is_not_a_small_frame_arm); +COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64), + kSmallFrameSize_is_not_a_small_frame_arm64); +COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips), + kSmallFrameSize_is_not_a_small_frame_mips); +COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86), + kSmallFrameSize_is_not_a_small_frame_x86); +COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64), + kSmallFrameSize_is_not_a_small_frame_x64_64); + class Mir2Lir : public Backend { public: /* diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc index dd5dab290d..28195aba36 100644 --- a/compiler/dex/quick/x86/call_x86.cc +++ b/compiler/dex/quick/x86/call_x86.cc @@ -235,8 +235,8 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) { * We can safely skip the stack overflow check if we're * a leaf *and* our frame size < fudge factor. */ - const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() && - (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes)); + const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && + !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86); NewLIR0(kPseudoMethodEntry); /* Spill core callee saves */ SpillCoreRegs(); diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc index f81e2f9797..2a82129511 100644 --- a/runtime/arch/arm/fault_handler_arm.cc +++ b/runtime/arch/arm/fault_handler_arm.cc @@ -22,6 +22,7 @@ #include "globals.h" #include "base/logging.h" #include "base/hex_dump.h" +#include "instruction_set.h" #include "mirror/art_method.h" #include "mirror/art_method-inl.h" #include "thread.h" @@ -59,7 +60,7 @@ void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod** // get the method from the top of the stack. However it's in r0. uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address); uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>( - reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes); + reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes); if (overflow_addr == fault_addr) { *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0); } else { @@ -190,7 +191,7 @@ bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) { VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp << ", fault_addr: " << fault_addr; - uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes; + uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes; Thread* self = reinterpret_cast<Thread*>(sc->arm_r9); CHECK_EQ(self, Thread::Current()); diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h index 67e7100873..96eeb8dd37 100644 --- a/runtime/instruction_set.h +++ b/runtime/instruction_set.h @@ -22,6 +22,7 @@ #include "base/logging.h" // Logging is required for FATAL in the helper functions. #include "base/macros.h" +#include "globals.h" // For KB. namespace art { @@ -36,6 +37,20 @@ enum InstructionSet { }; std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs); +#if defined(__arm__) +static constexpr InstructionSet kRuntimeISA = kArm; +#elif defined(__aarch64__) +static constexpr InstructionSet kRuntimeISA = kArm64; +#elif defined(__mips__) +static constexpr InstructionSet kRuntimeISA = kMips; +#elif defined(__i386__) +static constexpr InstructionSet kRuntimeISA = kX86; +#elif defined(__x86_64__) +static constexpr InstructionSet kRuntimeISA = kX86_64; +#else +static constexpr InstructionSet kRuntimeISA = kNone; +#endif + // Architecture-specific pointer sizes static constexpr size_t kArmPointerSize = 4; static constexpr size_t kArm64PointerSize = 8; @@ -153,19 +168,33 @@ static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) { } } -#if defined(__arm__) -static constexpr InstructionSet kRuntimeISA = kArm; -#elif defined(__aarch64__) -static constexpr InstructionSet kRuntimeISA = kArm64; -#elif defined(__mips__) -static constexpr InstructionSet kRuntimeISA = kMips; -#elif defined(__i386__) -static constexpr InstructionSet kRuntimeISA = kX86; -#elif defined(__x86_64__) -static constexpr InstructionSet kRuntimeISA = kX86_64; -#else -static constexpr InstructionSet kRuntimeISA = kNone; -#endif +static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB; +static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes; +static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes; + +// TODO: shrink reserved space, in particular for 64bit. + +// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers. +// But this one works rather well. +static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB; +// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix: +// test-art-host-run-test-interpreter-018-stack-overflow +// test-art-host-run-test-interpreter-107-int-math2 +static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB; +static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB; + +static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) { + return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes : + isa == kArm64 ? kArm64StackOverflowReservedBytes : + isa == kMips ? kMipsStackOverflowReservedBytes : + isa == kX86 ? kX86StackOverflowReservedBytes : + isa == kX86_64 ? kX86_64StackOverflowReservedBytes : + isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) : + (LOG(FATAL) << "Unknown instruction set" << isa, 0); +} + +static constexpr size_t kRuntimeStackOverflowReservedBytes = + GetStackOverflowReservedBytes(kRuntimeISA); enum InstructionFeatures { kHwDiv = 0x1, // Supports hardware divide. diff --git a/runtime/thread.cc b/runtime/thread.cc index 6980530623..3f8f4a3dc0 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -220,7 +220,7 @@ static size_t FixStackSize(size_t stack_size) { // It's likely that callers are trying to ensure they have at least a certain amount of // stack space, so we should add our reserved space on top of what they requested, rather // than implicitly take it away from them. - stack_size += Thread::kStackOverflowReservedBytes; + stack_size += kRuntimeStackOverflowReservedBytes; } else { // If we are going to use implicit stack checks, allocate space for the protected // region at the bottom of the stack. @@ -489,7 +489,7 @@ void Thread::InitStackHwm() { tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base); tlsPtr_.stack_size = read_stack_size; - if (read_stack_size <= kStackOverflowReservedBytes) { + if (read_stack_size <= kRuntimeStackOverflowReservedBytes) { LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size << " bytes)"; } @@ -2200,7 +2200,7 @@ void Thread::SetStackEndForStackOverflow() { if (tlsPtr_.stack_end == tlsPtr_.stack_begin) { // However, we seem to have already extended to use the full stack. LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently " - << kStackOverflowReservedBytes << ")?"; + << kRuntimeStackOverflowReservedBytes << ")?"; DumpStack(LOG(ERROR)); LOG(FATAL) << "Recursive stack overflow."; } diff --git a/runtime/thread.h b/runtime/thread.h index bff9b5221c..7cd86deead 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -33,6 +33,7 @@ #include "gc/allocator/rosalloc.h" #include "globals.h" #include "handle_scope.h" +#include "instruction_set.h" #include "jvalue.h" #include "object_callbacks.h" #include "offsets.h" @@ -94,28 +95,8 @@ enum ThreadFlag { class Thread { public: - // Space to throw a StackOverflowError in. - // TODO: shrink reserved space, in particular for 64bit. -#if defined(__x86_64__) - static constexpr size_t kStackOverflowReservedBytes = 32 * KB; -#elif defined(__aarch64__) - // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers. - // But this one works rather well. - static constexpr size_t kStackOverflowReservedBytes = 32 * KB; -#elif defined(__i386__) - // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix: - // test-art-host-run-test-interpreter-018-stack-overflow - // test-art-host-run-test-interpreter-107-int-math2 - static constexpr size_t kStackOverflowReservedBytes = 24 * KB; -#else - static constexpr size_t kStackOverflowReservedBytes = 16 * KB; -#endif // How much of the reserved bytes is reserved for incoming signals. static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB; - // How much of the reserved bytes we may temporarily use during stack overflow checks as an - // optimization. - static constexpr size_t kStackOverflowReservedUsableBytes = - kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes; // For implicit overflow checks we reserve an extra piece of memory at the bottom // of the stack (lowest memory). The higher portion of the memory @@ -123,7 +104,7 @@ class Thread { // throwing the StackOverflow exception. static constexpr size_t kStackOverflowProtectedSize = 16 * KB; static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize + - kStackOverflowReservedBytes; + kRuntimeStackOverflowReservedBytes; // Creates a new native thread corresponding to the given managed peer. // Used to implement Thread.start. @@ -585,7 +566,7 @@ class Thread { // overflow region. tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize; } else { - tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes; + tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes; } } |