9 files changed, 95 insertions, 54 deletions
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 5466abd11d..2bdf3e40cb 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -356,11 +356,11 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                            (static_cast<size_t>(frame_size_) <
-                            Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArmStackOverflowReservedBytes -
+      Thread::kStackOverflowSignalReservedBytes;
+  bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
   if (!skip_overflow_check) {
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       if (!large_frame) {
@@ -381,7 +381,7 @@ void ArmMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
       // This is done before the callee save instructions to avoid any possibility
       // of these overflowing.  This uses r12 and that's never saved in a callee
       // save.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, kArmStackOverflowReservedBytes);
       Load32Disp(rs_r12, 0, rs_r12);
       MarkPossibleStackOverflowException();
     }
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index f1748effb2..35263eab8a 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -337,13 +337,13 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-                              (static_cast<size_t>(frame_size_) <
-                              Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
-  const bool large_frame = (static_cast<size_t>(frame_size_) > Thread::kStackOverflowReservedUsableBytes);
+  constexpr size_t kStackOverflowReservedUsableBytes = kArm64StackOverflowReservedBytes -
+        Thread::kStackOverflowSignalReservedBytes;
+  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
   const int frame_size_without_spills = frame_size_ - spill_size;
@@ -412,7 +412,7 @@ void Arm64Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
         // Branch to throw target if there is not enough room.
         OpRegRegImm(kOpSub, rs_x9, rs_rA64_SP, frame_size_without_spills);
         LoadWordDisp(rs_rA64_SELF, Thread::StackEndOffset<8>().Int32Value(), rs_x8);
-        LIR* branch = OpCmpBranch(kCondUlt, rs_rA64_SP, rs_x8, nullptr);
+        LIR* branch = OpCmpBranch(kCondUlt, rs_x9, rs_x8, nullptr);
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, spill_size));
         OpRegCopy(rs_rA64_SP, rs_x9);  // Establish stack after checks.
       } else {
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index c73420291c..e53105fc84 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -305,8 +305,7 @@ void MipsMir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method)
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
   NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocTemp();
   RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ca65432c58..f70087d451 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -25,6 +25,7 @@
 #include "dex/backend.h"
 #include "dex/quick/resource_mask.h"
 #include "driver/compiler_driver.h"
+#include "instruction_set.h"
 #include "leb128.h"
 #include "safe_map.h"
 #include "utils/array_ref.h"
@@ -206,6 +207,36 @@ Mir2Lir* X86_64CodeGenerator(CompilationUnit* const cu, MIRGraph* const mir_grap
 #define SLOW_TYPE_PATH (cu_->enable_debug & (1 << kDebugSlowTypePath))
 #define EXERCISE_SLOWEST_STRING_PATH (cu_->enable_debug & (1 << kDebugSlowestStringPath))
 
+// Size of a frame that we definitely consider large. Anything larger than this should
+// definitely get a stack overflow check.
+static constexpr size_t kLargeFrameSize = 2 * KB;
+
+// Size of a frame that should be small. Anything leaf method smaller than this should run
+// without a stack overflow check.
+// The constant is from experience with frameworks code.
+static constexpr size_t kSmallFrameSize = 1 * KB;
+
+// Determine whether a frame is small or large, used in the decision on whether to elide a
+// stack overflow check on method entry.
+//
+// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
+// overflow-usable stack space.
+static constexpr bool IsLargeFrame(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+}
+
+// We want to ensure that on all systems kSmallFrameSize will lead to false in IsLargeFrame.
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm),
+               kSmallFrameSize_is_not_a_small_frame_arm);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kArm64),
+               kSmallFrameSize_is_not_a_small_frame_arm64);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kMips),
+               kSmallFrameSize_is_not_a_small_frame_mips);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86),
+               kSmallFrameSize_is_not_a_small_frame_x86);
+COMPILE_ASSERT(!IsLargeFrame(kSmallFrameSize, kX86_64),
+               kSmallFrameSize_is_not_a_small_frame_x64_64);
+
 class Mir2Lir : public Backend {
   public:
     /*
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index dd5dab290d..28195aba36 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -235,8 +235,8 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  const bool skip_overflow_check = (mir_graph_->MethodIsLeaf() &&
-      (static_cast<size_t>(frame_size_) < Thread::kStackOverflowReservedBytes));
+  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() &&
+      !IsLargeFrame(frame_size_, Gen64Bit() ? kX86_64 : kX86);
   NewLIR0(kPseudoMethodEntry);
   /* Spill core callee saves */
   SpillCoreRegs();
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index f81e2f9797..2a82129511 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -22,6 +22,7 @@
 #include "globals.h"
 #include "base/logging.h"
 #include "base/hex_dump.h"
+#include "instruction_set.h"
 #include "mirror/art_method.h"
 #include "mirror/art_method-inl.h"
 #include "thread.h"
@@ -59,7 +60,7 @@ void FaultManager::GetMethodAndReturnPCAndSP(void* context, mirror::ArtMethod**
   // get the method from the top of the stack.  However it's in r0.
   uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
   uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(
-      reinterpret_cast<uint8_t*>(*out_sp) - Thread::kStackOverflowReservedBytes);
+      reinterpret_cast<uint8_t*>(*out_sp) - kArmStackOverflowReservedBytes);
   if (overflow_addr == fault_addr) {
     *out_method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
   } else {
@@ -190,7 +191,7 @@ bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
   VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
 
-  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+  uintptr_t overflow_addr = sp - kArmStackOverflowReservedBytes;
 
   Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
   CHECK_EQ(self, Thread::Current());
diff --git a/runtime/instruction_set.h b/runtime/instruction_set.h
index 67e7100873..96eeb8dd37 100644
--- a/runtime/instruction_set.h
+++ b/runtime/instruction_set.h
@@ -22,6 +22,7 @@
 
 #include "base/logging.h"  // Logging is required for FATAL in the helper functions.
 #include "base/macros.h"
+#include "globals.h"       // For KB.
 
 namespace art {
 
@@ -36,6 +37,20 @@ enum InstructionSet {
 };
 std::ostream& operator<<(std::ostream& os, const InstructionSet& rhs);
 
+#if defined(__arm__)
+static constexpr InstructionSet kRuntimeISA = kArm;
+#elif defined(__aarch64__)
+static constexpr InstructionSet kRuntimeISA = kArm64;
+#elif defined(__mips__)
+static constexpr InstructionSet kRuntimeISA = kMips;
+#elif defined(__i386__)
+static constexpr InstructionSet kRuntimeISA = kX86;
+#elif defined(__x86_64__)
+static constexpr InstructionSet kRuntimeISA = kX86_64;
+#else
+static constexpr InstructionSet kRuntimeISA = kNone;
+#endif
+
 // Architecture-specific pointer sizes
 static constexpr size_t kArmPointerSize = 4;
 static constexpr size_t kArm64PointerSize = 8;
@@ -153,19 +168,33 @@ static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
   }
 }
 
-#if defined(__arm__)
-static constexpr InstructionSet kRuntimeISA = kArm;
-#elif defined(__aarch64__)
-static constexpr InstructionSet kRuntimeISA = kArm64;
-#elif defined(__mips__)
-static constexpr InstructionSet kRuntimeISA = kMips;
-#elif defined(__i386__)
-static constexpr InstructionSet kRuntimeISA = kX86;
-#elif defined(__x86_64__)
-static constexpr InstructionSet kRuntimeISA = kX86_64;
-#else
-static constexpr InstructionSet kRuntimeISA = kNone;
-#endif
+static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
+static constexpr size_t kArmStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
+
+// TODO: shrink reserved space, in particular for 64bit.
+
+// Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
+// But this one works rather well.
+static constexpr size_t kArm64StackOverflowReservedBytes = 32 * KB;
+// TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
+// test-art-host-run-test-interpreter-018-stack-overflow
+// test-art-host-run-test-interpreter-107-int-math2
+static constexpr size_t kX86StackOverflowReservedBytes = 24 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 32 * KB;
+
+static constexpr size_t GetStackOverflowReservedBytes(InstructionSet isa) {
+  return (isa == kArm || isa == kThumb2) ? kArmStackOverflowReservedBytes :
+           isa == kArm64 ? kArm64StackOverflowReservedBytes :
+           isa == kMips ? kMipsStackOverflowReservedBytes :
+           isa == kX86 ? kX86StackOverflowReservedBytes :
+           isa == kX86_64 ? kX86_64StackOverflowReservedBytes :
+           isa == kNone ? (LOG(FATAL) << "kNone has no stack overflow size", 0) :
+           (LOG(FATAL) << "Unknown instruction set" << isa, 0);
+}
+
+static constexpr size_t kRuntimeStackOverflowReservedBytes =
+    GetStackOverflowReservedBytes(kRuntimeISA);
 
 enum InstructionFeatures {
   kHwDiv  = 0x1,              // Supports hardware divide.
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6980530623..3f8f4a3dc0 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -220,7 +220,7 @@ static size_t FixStackSize(size_t stack_size) {
     // It's likely that callers are trying to ensure they have at least a certain amount of
     // stack space, so we should add our reserved space on top of what they requested, rather
     // than implicitly take it away from them.
-    stack_size += Thread::kStackOverflowReservedBytes;
+    stack_size += kRuntimeStackOverflowReservedBytes;
   } else {
     // If we are going to use implicit stack checks, allocate space for the protected
     // region at the bottom of the stack.
@@ -489,7 +489,7 @@ void Thread::InitStackHwm() {
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= kStackOverflowReservedBytes) {
+  if (read_stack_size <= kRuntimeStackOverflowReservedBytes) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
@@ -2200,7 +2200,7 @@ void Thread::SetStackEndForStackOverflow() {
   if (tlsPtr_.stack_end == tlsPtr_.stack_begin) {
     // However, we seem to have already extended to use the full stack.
     LOG(ERROR) << "Need to increase kStackOverflowReservedBytes (currently "
-               << kStackOverflowReservedBytes << ")?";
+               << kRuntimeStackOverflowReservedBytes << ")?";
     DumpStack(LOG(ERROR));
     LOG(FATAL) << "Recursive stack overflow.";
   }
diff --git a/runtime/thread.h b/runtime/thread.h
index bff9b5221c..7cd86deead 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,6 +33,7 @@
 #include "gc/allocator/rosalloc.h"
 #include "globals.h"
 #include "handle_scope.h"
+#include "instruction_set.h"
 #include "jvalue.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -94,28 +95,8 @@ enum ThreadFlag {
 
 class Thread {
  public:
-  // Space to throw a StackOverflowError in.
-  // TODO: shrink reserved space, in particular for 64bit.
-#if defined(__x86_64__)
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__aarch64__)
-  // Worst-case, we would need about 2.6x the amount of x86_64 for many more registers.
-  // But this one works rather well.
-  static constexpr size_t kStackOverflowReservedBytes = 32 * KB;
-#elif defined(__i386__)
-  // TODO: Bumped to workaround regression (http://b/14982147) Specifically to fix:
-  // test-art-host-run-test-interpreter-018-stack-overflow
-  // test-art-host-run-test-interpreter-107-int-math2
-  static constexpr size_t kStackOverflowReservedBytes = 24 * KB;
-#else
-  static constexpr size_t kStackOverflowReservedBytes = 16 * KB;
-#endif
   // How much of the reserved bytes is reserved for incoming signals.
   static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
-  // How much of the reserved bytes we may temporarily use during stack overflow checks as an
-  // optimization.
-  static constexpr size_t kStackOverflowReservedUsableBytes =
-      kStackOverflowReservedBytes - kStackOverflowSignalReservedBytes;
 
   // For implicit overflow checks we reserve an extra piece of memory at the bottom
   // of the stack (lowest memory).  The higher portion of the memory
@@ -123,7 +104,7 @@ class Thread {
   // throwing the StackOverflow exception.
   static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
   static constexpr size_t kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
-    kStackOverflowReservedBytes;
+      kRuntimeStackOverflowReservedBytes;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -585,7 +566,7 @@ class Thread {
       // overflow region.
       tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowImplicitCheckSize;
     } else {
-      tlsPtr_.stack_end = tlsPtr_.stack_begin + kStackOverflowReservedBytes;
+      tlsPtr_.stack_end = tlsPtr_.stack_begin + kRuntimeStackOverflowReservedBytes;
     }
   }