Each primitive kind now spills to different locations.

Having different slots depending on the types greatly simplifies the parallel move resolver. It also avoids doing FPU <-> Core register swaps, and force backends to implement such a swap. Change-Id: Ide9f0452e7ccf9efb8adddbcc246d44b937b253c
author: Nicolas Geoffray <ngeoffray@google.com> 2015-02-23 14:14:57 +0000
committer: Nicolas Geoffray <ngeoffray@google.com> 2015-02-23 14:56:07 +0000
commit: 776b3184ee04092b11edc781cdb81e8ed60601e3 (patch)
tree: 98458c7087866b988468f5d356550ff14f2ee3af
parent: 1382e569b31f4fab61fcfca5aa93275a2a3cb757 (diff)
download: art-776b3184ee04092b11edc781cdb81e8ed60601e3.tar.gz
art-776b3184ee04092b11edc781cdb81e8ed60601e3.tar.bz2
art-776b3184ee04092b11edc781cdb81e8ed60601e3.zip
5 files changed, 195 insertions, 25 deletions
diff --git a/compiler/optimizing/register_allocator.cc b/compiler/optimizing/register_allocator.cc
index bfbe63f6ce..54e62a5b2c 100644
--- a/compiler/optimizing/register_allocator.cc
+++ b/compiler/optimizing/register_allocator.cc
@@ -48,7 +48,10 @@ RegisterAllocator::RegisterAllocator(ArenaAllocator* allocator,
         physical_core_register_intervals_(allocator, codegen->GetNumberOfCoreRegisters()),
         physical_fp_register_intervals_(allocator, codegen->GetNumberOfFloatingPointRegisters()),
         temp_intervals_(allocator, 4),
-        spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        int_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        long_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        float_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
+        double_spill_slots_(allocator, kDefaultNumberOfSpillSlots),
         safepoints_(allocator, 0),
         processing_core_registers_(false),
         number_of_registers_(-1),
@@ -438,7 +441,7 @@ bool RegisterAllocator::ValidateInternal(bool log_fatal_on_failure) const {
     }
   }
 
-  return ValidateIntervals(intervals, spill_slots_.Size(), reserved_out_slots_, *codegen_,
+  return ValidateIntervals(intervals, GetNumberOfSpillSlots(), reserved_out_slots_, *codegen_,
                            allocator_, processing_core_registers_, log_fatal_on_failure);
 }
 
@@ -1133,41 +1136,62 @@ void RegisterAllocator::AllocateSpillSlotFor(LiveInterval* interval) {
   }
   size_t end = last_sibling->GetEnd();
 
+  GrowableArray<size_t>* spill_slots = nullptr;
+  switch (interval->GetType()) {
+    case Primitive::kPrimDouble:
+      spill_slots = &double_spill_slots_;
+      break;
+    case Primitive::kPrimLong:
+      spill_slots = &long_spill_slots_;
+      break;
+    case Primitive::kPrimFloat:
+      spill_slots = &float_spill_slots_;
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimShort:
+      spill_slots = &int_spill_slots_;
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
+  }
+
   // Find an available spill slot.
   size_t slot = 0;
-  for (size_t e = spill_slots_.Size(); slot < e; ++slot) {
-    // We check if it is less rather than less or equal because the parallel move
-    // resolver does not work when a single spill slot needs to be exchanged with
-    // a double spill slot. The strict comparison avoids needing to exchange these
-    // locations at the same lifetime position.
-    if (spill_slots_.Get(slot) < parent->GetStart()
-        && (slot == (e - 1) || spill_slots_.Get(slot + 1) < parent->GetStart())) {
+  for (size_t e = spill_slots->Size(); slot < e; ++slot) {
+    if (spill_slots->Get(slot) <= parent->GetStart()
+        && (slot == (e - 1) || spill_slots->Get(slot + 1) <= parent->GetStart())) {
       break;
     }
   }
 
   if (parent->NeedsTwoSpillSlots()) {
-    if (slot == spill_slots_.Size()) {
+    if (slot == spill_slots->Size()) {
       // We need a new spill slot.
-      spill_slots_.Add(end);
-      spill_slots_.Add(end);
-    } else if (slot == spill_slots_.Size() - 1) {
-      spill_slots_.Put(slot, end);
-      spill_slots_.Add(end);
+      spill_slots->Add(end);
+      spill_slots->Add(end);
+    } else if (slot == spill_slots->Size() - 1) {
+      spill_slots->Put(slot, end);
+      spill_slots->Add(end);
     } else {
-      spill_slots_.Put(slot, end);
-      spill_slots_.Put(slot + 1, end);
+      spill_slots->Put(slot, end);
+      spill_slots->Put(slot + 1, end);
     }
   } else {
-    if (slot == spill_slots_.Size()) {
+    if (slot == spill_slots->Size()) {
       // We need a new spill slot.
-      spill_slots_.Add(end);
+      spill_slots->Add(end);
     } else {
-      spill_slots_.Put(slot, end);
+      spill_slots->Put(slot, end);
     }
   }
 
-  parent->SetSpillSlot((slot + reserved_out_slots_) * kVRegSize);
+  // Note that the exact spill slot location will be computed when we resolve,
+  // that is when we know the number of spill slots for each type.
+  parent->SetSpillSlot(slot);
 }
 
 static bool IsValidDestination(Location destination) {
@@ -1516,7 +1540,7 @@ void RegisterAllocator::ConnectSplitSiblings(LiveInterval* interval,
 }
 
 void RegisterAllocator::Resolve() {
-  codegen_->InitializeCodeGeneration(spill_slots_.Size(),
+  codegen_->InitializeCodeGeneration(GetNumberOfSpillSlots(),
                                      maximum_number_of_live_core_registers_,
                                      maximum_number_of_live_fp_registers_,
                                      reserved_out_slots_,
@@ -1542,6 +1566,39 @@ void RegisterAllocator::Resolve() {
       } else if (current->HasSpillSlot()) {
         current->SetSpillSlot(current->GetSpillSlot() + codegen_->GetFrameSize());
       }
+    } else if (current->HasSpillSlot()) {
+      // Adjust the stack slot, now that we know the number of them for each type.
+      // The way this implementation lays out the stack is the following:
+      // [parameter slots     ]
+      // [double spill slots  ]
+      // [long spill slots    ]
+      // [float spill slots   ]
+      // [int/ref values      ]
+      // [maximum out values  ] (number of arguments for calls)
+      // [art method          ].
+      uint32_t slot = current->GetSpillSlot();
+      switch (current->GetType()) {
+        case Primitive::kPrimDouble:
+          slot += long_spill_slots_.Size();
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimLong:
+          slot += float_spill_slots_.Size();
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimFloat:
+          slot += int_spill_slots_.Size();
+          FALLTHROUGH_INTENDED;
+        case Primitive::kPrimNot:
+        case Primitive::kPrimInt:
+        case Primitive::kPrimChar:
+        case Primitive::kPrimByte:
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimShort:
+          slot += reserved_out_slots_;
+          break;
+        case Primitive::kPrimVoid:
+          LOG(FATAL) << "Unexpected type for interval " << current->GetType();
+      }
+      current->SetSpillSlot(slot * kVRegSize);
     }
 
     Location source = current->ToLocation();
diff --git a/compiler/optimizing/register_allocator.h b/compiler/optimizing/register_allocator.h
index b8f70bdc18..ff2f106b74 100644
--- a/compiler/optimizing/register_allocator.h
+++ b/compiler/optimizing/register_allocator.h
@@ -75,7 +75,10 @@ class RegisterAllocator {
   }
 
   size_t GetNumberOfSpillSlots() const {
-    return spill_slots_.Size();
+    return int_spill_slots_.Size()
+        + long_spill_slots_.Size()
+        + float_spill_slots_.Size()
+        + double_spill_slots_.Size();
   }
 
  private:
@@ -171,8 +174,14 @@ class RegisterAllocator {
   // where an instruction requires a temporary.
   GrowableArray<LiveInterval*> temp_intervals_;
 
-  // The spill slots allocated for live intervals.
-  GrowableArray<size_t> spill_slots_;
+  // The spill slots allocated for live intervals. We ensure spill slots
+  // are typed to avoid (1) doing moves and swaps between two different kinds
+  // of registers, and (2) swapping between a single stack slot and a double
+  // stack slot. This simplifies the parallel move resolver.
+  GrowableArray<size_t> int_spill_slots_;
+  GrowableArray<size_t> long_spill_slots_;
+  GrowableArray<size_t> float_spill_slots_;
+  GrowableArray<size_t> double_spill_slots_;
 
   // Instructions that need a safepoint.
   GrowableArray<HInstruction*> safepoints_;
diff --git a/test/451-spill-splot/expected.txt b/test/451-spill-splot/expected.txt
new file mode 100644
index 0000000000..efc3f2e7bd
--- /dev/null
+++ b/test/451-spill-splot/expected.txt
@@ -0,0 +1,6 @@
+85.0
+45.0
+20.0
+56.0
+20.0
+20.0
diff --git a/test/451-spill-splot/info.txt b/test/451-spill-splot/info.txt
new file mode 100644
index 0000000000..1772ce7c00
--- /dev/null
+++ b/test/451-spill-splot/info.txt
@@ -0,0 +1,2 @@
+Regression test for the optimizing compiler and the
+way it spills intervals of different types.
diff --git a/test/451-spill-splot/src/Main.java b/test/451-spill-splot/src/Main.java
new file mode 100644
index 0000000000..f631ebdd6d
--- /dev/null
+++ b/test/451-spill-splot/src/Main.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    // Create a few local variables to make sure some get spilled, and we get
+    // a conflict of swapping a single entry stack slot (float) with a double entry
+    // stack slot (double).
+    double a = 0.0;
+    double b = 1.0;
+    double c = 2.0;
+    double d = 3.0;
+    double e = 4.0;
+    double f = 5.0;
+    double g = 6.0;
+    double h = 7.0;
+    double i = 8.0;
+    double j = 9.0;
+
+    float aa = 0;
+    float bb = 1;
+    float cc = 2;
+    float dd = 3;
+    float ee = 4;
+    float ff = 5;
+    float gg = 6;
+    float hh = 7;
+    float ii = 8;
+    float jj = 9;
+    float kk = 10;
+    float ll = 10;
+    float mm = 10;
+    float nn = 10;
+
+    for (int count = 0; count < 2; count++) {
+      System.out.println(aa + bb + cc + dd + ee + ff + gg + hh + ii + jj + kk + ll + mm + nn);
+      System.out.println(a + b + c + d + e + f + g + h + i + j);
+      a = computeDouble();
+      b = computeDouble();
+      c = computeDouble();
+      d = computeDouble();
+      e = computeDouble();
+      f = computeDouble();
+      g = computeDouble();
+      h = computeDouble();
+      i = computeDouble();
+      j = computeDouble();
+      System.out.println(a + b + c + d + e + f + g + h + i + j);
+      aa = computeFloat();
+      bb = computeFloat();
+      cc = computeFloat();
+      dd = computeFloat();
+      ee = computeFloat();
+      ff = computeFloat();
+      gg = computeFloat();
+      hh = computeFloat();
+      ii = computeFloat();
+      jj = computeFloat();
+      kk = computeFloat();
+      ll = computeFloat();
+      mm = computeFloat();
+      nn = computeFloat();
+    }
+  }
+
+  static boolean doThrow = false;
+
+  public static double computeDouble() {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return 2.0;
+  }
+
+  public static float computeFloat() {
+    if (doThrow) {
+      // Try defeating inlining.
+      throw new Error();
+    }
+    return 4.0f;
+  }
+}
author	Nicolas Geoffray <ngeoffray@google.com>	2015-02-23 14:14:57 +0000
committer	Nicolas Geoffray <ngeoffray@google.com>	2015-02-23 14:56:07 +0000
commit	776b3184ee04092b11edc781cdb81e8ed60601e3 (patch)
tree	98458c7087866b988468f5d356550ff14f2ee3af
parent	1382e569b31f4fab61fcfca5aa93275a2a3cb757 (diff)
download	art-776b3184ee04092b11edc781cdb81e8ed60601e3.tar.gz art-776b3184ee04092b11edc781cdb81e8ed60601e3.tar.bz2 art-776b3184ee04092b11edc781cdb81e8ed60601e3.zip