18 files changed, 1478 insertions, 317 deletions
diff --git a/compiler/dex/compiler_enums.h b/compiler/dex/compiler_enums.h
index eb48cc3783..f0b47878e6 100644
--- a/compiler/dex/compiler_enums.h
+++ b/compiler/dex/compiler_enums.h
@@ -48,10 +48,16 @@ enum SpecialTargetRegister {
   kArg1,
   kArg2,
   kArg3,
+  kArg4,
+  kArg5,
   kFArg0,
   kFArg1,
   kFArg2,
   kFArg3,
+  kFArg4,
+  kFArg5,
+  kFArg6,
+  kFArg7,
   kRet0,
   kRet1,
   kInvokeTgt,
diff --git a/compiler/dex/frontend.cc b/compiler/dex/frontend.cc
index 5b9c763af6..547c0f6b30 100644
--- a/compiler/dex/frontend.cc
+++ b/compiler/dex/frontend.cc
@@ -707,7 +707,7 @@ int x86_64_support_list[] = {
 //         which has problems with long, float, double
 constexpr char arm64_supported_types[] = "ZBSCILVJFD";
 // (x84_64) We still have troubles with compiling longs/doubles/floats
-constexpr char x86_64_supported_types[] = "ZBSCILV";
+constexpr char x86_64_supported_types[] = "ZBSCILVJFD";
 
 // TODO: Remove this when we are able to compile everything.
 static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set) {
@@ -718,7 +718,7 @@ static bool CanCompileShorty(const char* shorty, InstructionSet instruction_set)
   // 1 is for the return type. Currently, we only accept 2 parameters at the most.
   // (x86_64): For now we have the same limitation. But we might want to split this
   //           check in future into two separate cases for arm64 and x86_64.
-  if (shorty_size > (1 + 2)) {
+  if ((shorty_size > (1 + 2)) && (instruction_set != kX86_64)) {
     return false;
   }
 
diff --git a/compiler/dex/quick/arm/target_arm.cc b/compiler/dex/quick/arm/target_arm.cc
index bd9c8b4b75..3b30cde0d4 100644
--- a/compiler/dex/quick/arm/target_arm.cc
+++ b/compiler/dex/quick/arm/target_arm.cc
@@ -113,6 +113,7 @@ RegStorage ArmMir2Lir::TargetReg(SpecialTargetRegister reg) {
     case kHiddenArg: res_reg = rs_r12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
diff --git a/compiler/dex/quick/arm64/target_arm64.cc b/compiler/dex/quick/arm64/target_arm64.cc
index b287399900..ce9528632e 100644
--- a/compiler/dex/quick/arm64/target_arm64.cc
+++ b/compiler/dex/quick/arm64/target_arm64.cc
@@ -127,6 +127,7 @@ RegStorage Arm64Mir2Lir::TargetReg(SpecialTargetRegister reg) {
     case kHiddenArg: res_reg = rs_x12; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = RegStorage::InvalidReg(); break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
diff --git a/compiler/dex/quick/gen_common.cc b/compiler/dex/quick/gen_common.cc
index 62c81d05bb..69ca7154e4 100644
--- a/compiler/dex/quick/gen_common.cc
+++ b/compiler/dex/quick/gen_common.cc
@@ -1959,7 +1959,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc
 
   switch (opcode) {
     case Instruction::NOT_LONG:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenNotLong(rl_dest, rl_src2);
         return;
       }
@@ -2009,7 +2009,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc
       break;
     case Instruction::DIV_LONG:
     case Instruction::DIV_LONG_2ADDR:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ true);
         return;
       }
@@ -2020,7 +2020,7 @@ static void GenArithOpLongImpl(Mir2Lir* mir_to_lir, CompilationUnit* cu, Instruc
       break;
     case Instruction::REM_LONG:
     case Instruction::REM_LONG_2ADDR:
-      if (cu->instruction_set == kArm64) {
+      if (cu->instruction_set == kArm64 || cu->instruction_set == kX86_64) {
         mir_to_lir->GenDivRemLong(opcode, rl_dest, rl_src1, rl_src2, /*is_div*/ false);
         return;
       }
diff --git a/compiler/dex/quick/gen_invoke.cc b/compiler/dex/quick/gen_invoke.cc
index 842533b66b..ee68fe2561 100644
--- a/compiler/dex/quick/gen_invoke.cc
+++ b/compiler/dex/quick/gen_invoke.cc
@@ -290,26 +290,51 @@ void Mir2Lir::CallRuntimeHelperRegLocationRegLocation(ThreadOffset<pointer_size>
         }
         LoadValueDirectWideFixed(arg1, r_tmp);
       } else {
-        RegStorage r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+        RegStorage r_tmp;
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg1), TargetReg(kArg2));
+        }
         LoadValueDirectWideFixed(arg1, r_tmp);
       }
     }
   } else {
     RegStorage r_tmp;
     if (arg0.fp) {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+      if (cu_->instruction_set == kX86_64) {
+        r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg0).GetReg());
+      } else {
+        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg0), TargetReg(kFArg1));
+      }
     } else {
-      r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      if (cu_->instruction_set == kX86_64) {
+        r_tmp = RegStorage::Solo64(TargetReg(kArg0).GetReg());
+      } else {
+        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg0), TargetReg(kArg1));
+      }
     }
     LoadValueDirectWideFixed(arg0, r_tmp);
     if (arg1.wide == 0) {
-      LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+      if (cu_->instruction_set == kX86_64) {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg1) : TargetReg(kArg1));
+      } else {
+        LoadValueDirectFixed(arg1, arg1.fp ? TargetReg(kFArg2) : TargetReg(kArg2));
+      }
     } else {
       RegStorage r_tmp;
       if (arg1.fp) {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::FloatSolo64(TargetReg(kFArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kFArg2), TargetReg(kFArg3));
+        }
       } else {
-        r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+        if (cu_->instruction_set == kX86_64) {
+          r_tmp = RegStorage::Solo64(TargetReg(kArg1).GetReg());
+        } else {
+          r_tmp = RegStorage::MakeRegPair(TargetReg(kArg2), TargetReg(kArg3));
+        }
       }
       LoadValueDirectWideFixed(arg1, r_tmp);
     }
diff --git a/compiler/dex/quick/gen_loadstore.cc b/compiler/dex/quick/gen_loadstore.cc
index 2c8b9b9adf..6ef793427c 100644
--- a/compiler/dex/quick/gen_loadstore.cc
+++ b/compiler/dex/quick/gen_loadstore.cc
@@ -391,24 +391,34 @@ RegLocation Mir2Lir::ForceTemp(RegLocation loc) {
   return loc;
 }
 
-// FIXME: will need an update for 64-bit core regs.
 RegLocation Mir2Lir::ForceTempWide(RegLocation loc) {
   DCHECK(loc.wide);
   DCHECK(loc.location == kLocPhysReg);
   DCHECK(!loc.reg.IsFloat());
-  if (IsTemp(loc.reg.GetLow())) {
-    Clobber(loc.reg.GetLow());
-  } else {
-    RegStorage temp_low = AllocTemp();
-    OpRegCopy(temp_low, loc.reg.GetLow());
-    loc.reg.SetLowReg(temp_low.GetReg());
-  }
-  if (IsTemp(loc.reg.GetHigh())) {
-    Clobber(loc.reg.GetHigh());
+
+  if (!loc.reg.IsPair()) {
+    if (IsTemp(loc.reg)) {
+      Clobber(loc.reg);
+    } else {
+      RegStorage temp = AllocTempWide();
+      OpRegCopy(temp, loc.reg);
+      loc.reg = temp;
+    }
   } else {
-    RegStorage temp_high = AllocTemp();
-    OpRegCopy(temp_high, loc.reg.GetHigh());
-    loc.reg.SetHighReg(temp_high.GetReg());
+    if (IsTemp(loc.reg.GetLow())) {
+      Clobber(loc.reg.GetLow());
+    } else {
+      RegStorage temp_low = AllocTemp();
+      OpRegCopy(temp_low, loc.reg.GetLow());
+      loc.reg.SetLowReg(temp_low.GetReg());
+    }
+    if (IsTemp(loc.reg.GetHigh())) {
+      Clobber(loc.reg.GetHigh());
+    } else {
+      RegStorage temp_high = AllocTemp();
+      OpRegCopy(temp_high, loc.reg.GetHigh());
+      loc.reg.SetHighReg(temp_high.GetReg());
+    }
   }
 
   // Ensure that this doesn't represent the original SR any more.
diff --git a/compiler/dex/quick/mips/target_mips.cc b/compiler/dex/quick/mips/target_mips.cc
index c1a7c990f0..381c7ce0aa 100644
--- a/compiler/dex/quick/mips/target_mips.cc
+++ b/compiler/dex/quick/mips/target_mips.cc
@@ -98,6 +98,7 @@ RegStorage MipsMir2Lir::TargetReg(SpecialTargetRegister reg) {
     case kHiddenArg: res_reg = rs_rT0; break;
     case kHiddenFpArg: res_reg = RegStorage::InvalidReg(); break;
     case kCount: res_reg = rs_rMIPS_COUNT; break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
diff --git a/compiler/dex/quick/mir_to_lir.cc b/compiler/dex/quick/mir_to_lir.cc
index 1f12b6fe69..a85be5e90c 100644
--- a/compiler/dex/quick/mir_to_lir.cc
+++ b/compiler/dex/quick/mir_to_lir.cc
@@ -68,20 +68,51 @@ void Mir2Lir::LockArg(int in_position, bool wide) {
 
 // TODO: needs revisit for 64-bit.
 RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide) {
-  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
-  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
-      RegStorage::InvalidReg();
-
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+
+  if (cu_->instruction_set == kX86) {
     /*
      * When doing a call for x86, it moves the stack pointer in order to push return.
      * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     * TODO: This needs revisited for 64-bit.
      */
     offset += sizeof(uint32_t);
   }
 
+  if (cu_->instruction_set == kX86_64) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+     */
+    offset += sizeof(uint64_t);
+  }
+
+  if (cu_->instruction_set == kX86_64) {
+    RegStorage reg_arg = GetArgMappingToPhysicalReg(in_position);
+    if (!reg_arg.Valid()) {
+      RegStorage new_reg = wide ? AllocTypedTempWide(false, reg_class) : AllocTypedTemp(false, reg_class);
+      LoadBaseDisp(TargetReg(kSp), offset, new_reg, wide ? k64 : k32);
+      return new_reg;
+    } else {
+      // Check if we need to copy the arg to a different reg_class.
+      if (!RegClassMatches(reg_class, reg_arg)) {
+        if (wide) {
+          RegStorage new_reg = AllocTypedTempWide(false, reg_class);
+          OpRegCopyWide(new_reg, reg_arg);
+          reg_arg = new_reg;
+        } else {
+          RegStorage new_reg = AllocTypedTemp(false, reg_class);
+          OpRegCopy(new_reg, reg_arg);
+          reg_arg = new_reg;
+        }
+      }
+    }
+    return reg_arg;
+  }
+
+  RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
+  RegStorage reg_arg_high = wide ? GetArgMappingToPhysicalReg(in_position + 1) :
+      RegStorage::InvalidReg();
+
   // If the VR is wide and there is no register for high part, we need to load it.
   if (wide && !reg_arg_high.Valid()) {
     // If the low part is not in a reg, we allocate a pair. Otherwise, we just load to high reg.
@@ -129,15 +160,22 @@ RegStorage Mir2Lir::LoadArg(int in_position, RegisterClass reg_class, bool wide)
 
 void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
   int offset = StackVisitor::GetOutVROffset(in_position, cu_->instruction_set);
-  if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) {
+  if (cu_->instruction_set == kX86) {
     /*
      * When doing a call for x86, it moves the stack pointer in order to push return.
      * Thus, we add another 4 bytes to figure out the out of caller (in of callee).
-     * TODO: This needs revisited for 64-bit.
      */
     offset += sizeof(uint32_t);
   }
 
+  if (cu_->instruction_set == kX86_64) {
+    /*
+     * When doing a call for x86, it moves the stack pointer in order to push return.
+     * Thus, we add another 8 bytes to figure out the out of caller (in of callee).
+     */
+    offset += sizeof(uint64_t);
+  }
+
   if (!rl_dest.wide) {
     RegStorage reg = GetArgMappingToPhysicalReg(in_position);
     if (reg.Valid()) {
@@ -146,6 +184,16 @@ void Mir2Lir::LoadArgDirect(int in_position, RegLocation rl_dest) {
       Load32Disp(TargetReg(kSp), offset, rl_dest.reg);
     }
   } else {
+    if (cu_->instruction_set == kX86_64) {
+      RegStorage reg = GetArgMappingToPhysicalReg(in_position);
+      if (reg.Valid()) {
+        OpRegCopy(rl_dest.reg, reg);
+      } else {
+        LoadBaseDisp(TargetReg(kSp), offset, rl_dest.reg, k64);
+      }
+      return;
+    }
+
     RegStorage reg_arg_low = GetArgMappingToPhysicalReg(in_position);
     RegStorage reg_arg_high = GetArgMappingToPhysicalReg(in_position + 1);
 
diff --git a/compiler/dex/quick/mir_to_lir.h b/compiler/dex/quick/mir_to_lir.h
index ed94a8d844..9718acde6c 100644
--- a/compiler/dex/quick/mir_to_lir.h
+++ b/compiler/dex/quick/mir_to_lir.h
@@ -910,13 +910,13 @@ class Mir2Lir : public Backend {
     void GenInvoke(CallInfo* info);
     void GenInvokeNoInline(CallInfo* info);
     virtual void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
-    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+    virtual int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
                              NextCallInsn next_call_insn,
                              const MethodReference& target_method,
                              uint32_t vtable_idx,
                              uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
                              bool skip_this);
-    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+    virtual int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
                            NextCallInsn next_call_insn,
                            const MethodReference& target_method,
                            uint32_t vtable_idx,
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 39a036560e..c3832969a4 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -317,6 +317,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 #undef UNARY_ENCODING_MAP
 
   { kx86Cdq32Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { 0, 0, 0x99, 0, 0, 0, 0, 0 }, "Cdq", "" },
+  { kx86Cqo64Da, kRegOpcode, NO_OPERAND | REG_DEFAD_USEA,                                  { REX_W, 0, 0x99, 0, 0, 0, 0, 0 }, "Cqo", "" },
   { kX86Bswap32R, kRegOpcode, IS_UNARY_OP | REG_DEF0_USE0,                                 { 0, 0, 0x0F, 0xC8, 0, 0, 0, 0 }, "Bswap32R", "!0r" },
   { kX86Push32R,  kRegOpcode, IS_UNARY_OP | REG_USE0 | REG_USE_SP | REG_DEF_SP | IS_STORE, { 0, 0, 0x50, 0,    0, 0, 0, 0 }, "Push32R",  "!0r" },
   { kX86Pop32R,   kRegOpcode, IS_UNARY_OP | REG_DEF0 | REG_USE_SP | REG_DEF_SP | IS_LOAD,  { 0, 0, 0x58, 0,    0, 0, 0, 0 }, "Pop32R",   "!0r" },
@@ -326,6 +327,11 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
 { kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, 0, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
 
+#define EXT_0F_REX_W_ENCODING_MAP(opname, prefix, opcode, reg_def) \
+{ kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
+{ kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
+{ kX86 ## opname ## RA, kRegArray, IS_LOAD | IS_QUIN_OP     | reg_def | REG_USE12, { prefix, REX_W, 0x0F, opcode, 0, 0, 0, 0 }, #opname "RA", "!0r,[!1r+!2r<<!3d+!4d]" }
+
 #define EXT_0F_ENCODING2_MAP(opname, prefix, opcode, opcode2, reg_def) \
 { kX86 ## opname ## RR, kRegReg,             IS_BINARY_OP   | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RR", "!0r,!1r" }, \
 { kX86 ## opname ## RM, kRegMem,   IS_LOAD | IS_TERTIARY_OP | reg_def | REG_USE1,  { prefix, 0, 0x0F, opcode, opcode2, 0, 0, 0 }, #opname "RM", "!0r,[!1r+!2d]" }, \
@@ -341,8 +347,12 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 
   EXT_0F_ENCODING_MAP(Cvtsi2sd,  0xF2, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsi2ss,  0xF3, 0x2A, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2sd,  0xF2, 0x2A, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvtsqi2ss,  0xF3, 0x2A, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvttsd2si, 0xF2, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvttss2si, 0xF3, 0x2C, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvttsd2sqi, 0xF2, 0x2C, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Cvttss2sqi, 0xF3, 0x2C, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtsd2si,  0xF2, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Cvtss2si,  0xF3, 0x2D, REG_DEF0),
   EXT_0F_ENCODING_MAP(Ucomisd,   0x66, 0x2E, SETS_CCODES|REG_USE0),
@@ -428,10 +438,19 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
   { kX86MovhpsAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x0, 0, 0x0F, 0x17, 0, 0, 0, 0 }, "MovhpsAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
   EXT_0F_ENCODING_MAP(Movdxr,    0x66, 0x6E, REG_DEF0),
+  EXT_0F_REX_W_ENCODING_MAP(Movqxr, 0x66, 0x6E, REG_DEF0),
+  { kX86MovqrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxRR", "!0r,!1r" },
+  { kX86MovqrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxMR", "[!0r+!1d],!2r" },
+  { kX86MovqrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, REX_W, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovqrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
+
   { kX86MovdrxRR, kRegRegStore, IS_BINARY_OP | REG_DEF0   | REG_USE1,   { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxRR", "!0r,!1r" },
   { kX86MovdrxMR, kMemReg,      IS_STORE | IS_TERTIARY_OP | REG_USE02,  { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxMR", "[!0r+!1d],!2r" },
   { kX86MovdrxAR, kArrayReg,    IS_STORE | IS_QUIN_OP     | REG_USE014, { 0x66, 0, 0x0F, 0x7E, 0, 0, 0, 0 }, "MovdrxAR", "[!0r+!1r<<!2d+!3d],!4r" },
 
+  { kX86MovsxdRR, kRegReg,      IS_BINARY_OP | REG_DEF0 | REG_USE1,              { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRR", "!0r,!1r" },
+  { kX86MovsxdRM, kRegMem,      IS_LOAD | IS_TERTIARY_OP | REG_DEF0 | REG_USE1,  { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRM", "!0r,[!1r+!2d]" },
+  { kX86MovsxdRA, kRegArray,    IS_LOAD | IS_QUIN_OP     | REG_DEF0 | REG_USE12, { REX_W, 0, 0x63, 0, 0, 0, 0, 0 }, "MovsxdRA", "!0r,[!1r+!2r<<!3d+!4d]" },
+
   { kX86Set8R, kRegCond,              IS_BINARY_OP   | REG_DEF0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8R", "!1c !0r" },
   { kX86Set8M, kMemCond,   IS_STORE | IS_TERTIARY_OP | REG_USE0  | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8M", "!2c [!0r+!1d]" },
   { kX86Set8A, kArrayCond, IS_STORE | IS_QUIN_OP     | REG_USE01 | USES_CCODES, { 0, 0, 0x0F, 0x90, 0, 0, 0, 0 }, "Set8A", "!4c [!0r+!1r<<!2d+!3d]" },
@@ -442,6 +461,7 @@ ENCODING_MAP(Cmp, IS_LOAD, 0, 0,
 
   EXT_0F_ENCODING_MAP(Imul16,  0x66, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
   EXT_0F_ENCODING_MAP(Imul32,  0x00, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
+  EXT_0F_ENCODING_MAP(Imul64,  REX_W, 0xAF, REG_USE0 | REG_DEF0 | SETS_CCODES),
 
   { kX86CmpxchgRR, kRegRegStore, IS_BINARY_OP | REG_DEF0 | REG_USE01 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "!0r,!1r" },
   { kX86CmpxchgMR, kMemReg,   IS_STORE | IS_TERTIARY_OP | REG_USE02 | REG_DEFA_USEA | SETS_CCODES, { 0, 0, 0x0F, 0xB1, 0, 0, 0, 0 }, "Cmpxchg", "[!0r+!1d],!2r" },
@@ -507,7 +527,7 @@ size_t X86Mir2Lir::ComputeSize(const X86EncodingMap* entry, int base, int displa
   }
   if (displacement != 0 || LowRegisterBits(RegStorage::RegNum(base)) == rs_rBP.GetRegNum()) {
     // BP requires an explicit displacement, even when it's 0.
-    if (entry->opcode != kX86Lea32RA) {
+    if (entry->opcode != kX86Lea32RA && entry->opcode != kX86Lea64RA) {
       DCHECK_NE(entry->flags & (IS_LOAD | IS_STORE), 0ULL) << entry->name;
     }
     size += IS_SIMM8(displacement) ? 1 : 4;
@@ -676,7 +696,7 @@ int X86Mir2Lir::GetInsnSize(LIR* lir) {
     case kMacro:  // lir operands - 0: reg
       DCHECK_EQ(lir->opcode, static_cast<int>(kX86StartOfMethod));
       return 5 /* call opcode + 4 byte displacement */ + 1 /* pop reg */ +
-          ComputeSize(&X86Mir2Lir::EncodingMap[kX86Sub32RI], 0, 0,
+          ComputeSize(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI], 0, 0,
                       lir->operands[0], NO_REG, false) -
           // shorter ax encoding
           (RegStorage::RegNum(lir->operands[0]) == rs_rAX.GetRegNum()  ? 1 : 0);
@@ -1408,8 +1428,8 @@ void X86Mir2Lir::EmitMacro(const X86EncodingMap* entry, uint8_t reg, int offset)
   DCHECK_LT(RegStorage::RegNum(reg), 8);
   code_buffer_.push_back(0x58 + RegStorage::RegNum(reg));  // pop reg
 
-  EmitRegImm(&X86Mir2Lir::EncodingMap[kX86Sub32RI], RegStorage::RegNum(reg),
-             offset + 5 /* size of call +0 */);
+  EmitRegImm(&X86Mir2Lir::EncodingMap[Gen64Bit() ? kX86Sub64RI : kX86Sub32RI],
+             RegStorage::RegNum(reg), offset + 5 /* size of call +0 */);
 }
 
 void X86Mir2Lir::EmitUnimplemented(const X86EncodingMap* entry, LIR* lir) {
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index fc0b305fc3..f5fce34f2b 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -86,11 +86,19 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    rl_method = LoadValue(rl_method, kCoreReg);
+    if (rl_method.wide) {
+      rl_method = LoadValueWide(rl_method, kCoreReg);
+    } else {
+      rl_method = LoadValue(rl_method, kCoreReg);
+    }
     start_of_method_reg = rl_method.reg;
     store_method_addr_used_ = true;
   } else {
-    start_of_method_reg = AllocTemp();
+    if (Gen64Bit()) {
+      start_of_method_reg = AllocTempWide();
+    } else {
+      start_of_method_reg = AllocTemp();
+    }
     NewLIR1(kX86StartOfMethod, start_of_method_reg.GetReg());
   }
   int low_key = s4FromSwitchData(&table[2]);
@@ -108,9 +116,14 @@ void X86Mir2Lir::GenPackedSwitch(MIR* mir, DexOffset table_offset,
 
   // Load the displacement from the switch table
   RegStorage disp_reg = AllocTemp();
-  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(), 2, WrapPointer(tab_rec));
+  NewLIR5(kX86PcRelLoadRA, disp_reg.GetReg(), start_of_method_reg.GetReg(), keyReg.GetReg(),
+          2, WrapPointer(tab_rec));
   // Add displacement to start of method
-  OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
+  if (Gen64Bit()) {
+    NewLIR2(kX86Add64RR, start_of_method_reg.GetReg(), disp_reg.GetReg());
+  } else {
+    OpRegReg(kOpAdd, start_of_method_reg, disp_reg);
+  }
   // ..and go!
   LIR* switch_branch = NewLIR1(kX86JmpR, start_of_method_reg.GetReg());
   tab_rec->anchor = switch_branch;
@@ -150,13 +163,18 @@ void X86Mir2Lir::GenFillArrayData(DexOffset table_offset, RegLocation rl_src) {
   if (base_of_code_ != nullptr) {
     // We can use the saved value.
     RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-    LoadValueDirect(rl_method, rs_rX86_ARG2);
+    if (rl_method.wide) {
+      LoadValueDirectWide(rl_method, rs_rX86_ARG2);
+    } else {
+      LoadValueDirect(rl_method, rs_rX86_ARG2);
+    }
     store_method_addr_used_ = true;
   } else {
+    // TODO(64) force to be 64-bit
     NewLIR1(kX86StartOfMethod, rs_rX86_ARG2.GetReg());
   }
   NewLIR2(kX86PcRelAdr, rs_rX86_ARG1.GetReg(), WrapPointer(tab_rec));
-  NewLIR2(kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
+  NewLIR2(Gen64Bit() ? kX86Add64RR : kX86Add32RR, rs_rX86_ARG1.GetReg(), rs_rX86_ARG2.GetReg());
   if (Is64BitInstructionSet(cu_->instruction_set)) {
     CallRuntimeHelperRegReg(QUICK_ENTRYPOINT_OFFSET(8, pHandleFillArrayData), rs_rX86_ARG0,
                             rs_rX86_ARG1, true);
@@ -264,9 +282,10 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
       OpRegThreadMem(kOpCmp, rs_rX86_SP, Thread::StackEndOffset<4>());
     }
     LIR* branch = OpCondBranch(kCondUlt, nullptr);
-    AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch,
-                                                 frame_size_ -
-                                                 GetInstructionSetPointerSize(cu_->instruction_set)));
+    AddSlowPath(
+        new(arena_)StackOverflowSlowPath(this, branch,
+                                         frame_size_ -
+                                         GetInstructionSetPointerSize(cu_->instruction_set)));
   }
 
   FlushIns(ArgLocs, rl_method);
@@ -276,7 +295,7 @@ void X86Mir2Lir::GenEntrySequence(RegLocation* ArgLocs, RegLocation rl_method) {
     setup_method_address_[0] = NewLIR1(kX86StartOfMethod, rs_rX86_ARG0.GetReg());
     int displacement = SRegOffset(base_of_code_->s_reg_low);
     // Native pointer - must be natural word size.
-    setup_method_address_[1] = StoreWordDisp(rs_rX86_SP, displacement, rs_rX86_ARG0);
+    setup_method_address_[1] = StoreBaseDisp(rs_rX86_SP, displacement, rs_rX86_ARG0, Gen64Bit() ? k64 : k32);
   }
 
   FreeTemp(rs_rX86_ARG0);
diff --git a/compiler/dex/quick/x86/codegen_x86.h b/compiler/dex/quick/x86/codegen_x86.h
index 648c148c15..38d60d2b54 100644
--- a/compiler/dex/quick/x86/codegen_x86.h
+++ b/compiler/dex/quick/x86/codegen_x86.h
@@ -20,9 +20,43 @@
 #include "dex/compiler_internals.h"
 #include "x86_lir.h"
 
+#include <map>
+
 namespace art {
 
 class X86Mir2Lir : public Mir2Lir {
+  protected:
+    class InToRegStorageMapper {
+      public:
+        virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide) = 0;
+        virtual ~InToRegStorageMapper() {}
+    };
+
+    class InToRegStorageX86_64Mapper : public InToRegStorageMapper {
+      public:
+        InToRegStorageX86_64Mapper() : cur_core_reg_(0), cur_fp_reg_(0) {}
+        virtual ~InToRegStorageX86_64Mapper() {}
+        virtual RegStorage GetNextReg(bool is_double_or_float, bool is_wide);
+      private:
+        int cur_core_reg_;
+        int cur_fp_reg_;
+    };
+
+    class InToRegStorageMapping {
+      public:
+        InToRegStorageMapping() : initialized_(false) {}
+        void Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper);
+        int GetMaxMappedIn() { return max_mapped_in_; }
+        bool IsThereStackMapped() { return is_there_stack_mapped_; }
+        RegStorage Get(int in_position);
+        bool IsInitialized() { return initialized_; }
+      private:
+        std::map<int, RegStorage> mapping_;
+        int max_mapped_in_;
+        bool is_there_stack_mapped_;
+        bool initialized_;
+    };
+
   public:
     X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator* arena, bool gen64bit);
 
@@ -56,6 +90,7 @@ class X86Mir2Lir : public Mir2Lir {
     // Required for target - register utilities.
     RegStorage TargetReg(SpecialTargetRegister reg);
     RegStorage GetArgMappingToPhysicalReg(int arg_num);
+    RegStorage GetCoreArgMappingToPhysicalReg(int core_arg_num);
     RegLocation GetReturnAlt();
     RegLocation GetReturnWideAlt();
     RegLocation LocCReturn();
@@ -151,22 +186,25 @@ class X86Mir2Lir : public Mir2Lir {
     void GenNegFloat(RegLocation rl_dest, RegLocation rl_src);
     void GenPackedSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
     void GenSparseSwitch(MIR* mir, DexOffset table_offset, RegLocation rl_src);
+    void GenIntToLong(RegLocation rl_dest, RegLocation rl_src);
 
     /*
      * @brief Generate a two address long operation with a constant value
      * @param rl_dest location of result
      * @param rl_src constant source operand
      * @param op Opcode to be generated
+     * @return success or not
      */
-    void GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
+    bool GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op);
     /*
      * @brief Generate a three address long operation with a constant value
      * @param rl_dest location of result
      * @param rl_src1 source operand
      * @param rl_src2 constant source operand
      * @param op Opcode to be generated
+     * @return success or not
      */
-    void GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
+    bool GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2,
                         Instruction::Code op);
 
     /**
@@ -222,6 +260,9 @@ class X86Mir2Lir : public Mir2Lir {
                                     bool can_assume_type_is_in_dex_cache,
                                     uint32_t type_idx, RegLocation rl_dest, RegLocation rl_src);
 
+    void GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                        RegLocation rl_src1, RegLocation rl_shift);
+
     // Single operation generators.
     LIR* OpUnconditionalBranch(LIR* target);
     LIR* OpCmpBranch(ConditionCode cond, RegStorage src1, RegStorage src2, LIR* target);
@@ -306,6 +347,22 @@ class X86Mir2Lir : public Mir2Lir {
      */
     void LoadClassType(uint32_t type_idx, SpecialTargetRegister symbolic_reg);
 
+    void FlushIns(RegLocation* ArgLocs, RegLocation rl_method);
+
+    int GenDalvikArgsNoRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                             NextCallInsn next_call_insn,
+                             const MethodReference& target_method,
+                             uint32_t vtable_idx,
+                             uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                             bool skip_this);
+
+    int GenDalvikArgsRange(CallInfo* info, int call_state, LIR** pcrLabel,
+                           NextCallInsn next_call_insn,
+                           const MethodReference& target_method,
+                           uint32_t vtable_idx,
+                           uintptr_t direct_code, uintptr_t direct_method, InvokeType type,
+                           bool skip_this);
+
     /*
      * @brief Generate a relative call to the method that will be patched at link time.
      * @param target_method The MethodReference of the method to be invoked.
@@ -794,6 +851,8 @@ class X86Mir2Lir : public Mir2Lir {
      * @param mir A kMirOpConst128b MIR instruction to match.
      */
     LIR *AddVectorLiteral(MIR *mir);
+
+    InToRegStorageMapping in_to_reg_storage_mapping_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/quick/x86/fp_x86.cc b/compiler/dex/quick/x86/fp_x86.cc
index 0421a5967a..c3580f76ae 100644
--- a/compiler/dex/quick/x86/fp_x86.cc
+++ b/compiler/dex/quick/x86/fp_x86.cc
@@ -272,21 +272,67 @@ void X86Mir2Lir::GenConversion(Instruction::Code opcode, RegLocation rl_dest,
       return;
     }
     case Instruction::LONG_TO_DOUBLE:
+      if (Gen64Bit()) {
+        rcSrc = kCoreReg;
+        op = kX86Cvtsqi2sdRR;
+        break;
+      }
       GenLongToFP(rl_dest, rl_src, true /* is_double */);
       return;
     case Instruction::LONG_TO_FLOAT:
+      if (Gen64Bit()) {
+        rcSrc = kCoreReg;
+        op = kX86Cvtsqi2ssRR;
+       break;
+      }
       GenLongToFP(rl_dest, rl_src, false /* is_double */);
       return;
     case Instruction::FLOAT_TO_LONG:
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pF2l), rl_dest, rl_src);
+      if (Gen64Bit()) {
+        rl_src = LoadValue(rl_src, kFPReg);
+        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
+        ClobberSReg(rl_dest.s_reg_low);
+        rl_result = EvalLoc(rl_dest, kCoreReg, true);
+        RegStorage temp_reg = AllocTempSingle();
+
+        // Set 0x7fffffffffffffff to rl_result
+        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+        NewLIR2(kX86Cvtsqi2ssRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+        NewLIR2(kX86ComissRR, rl_src.reg.GetReg(), temp_reg.GetReg());
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
+        NewLIR2(kX86Cvttss2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
+        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
+        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
+        branch_normal->target = NewLIR0(kPseudoTargetLabel);
+        StoreValueWide(rl_dest, rl_result);
       } else {
         GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pF2l), rl_dest, rl_src);
       }
       return;
     case Instruction::DOUBLE_TO_LONG:
-      if (Is64BitInstructionSet(cu_->instruction_set)) {
-        GenConversionCall(QUICK_ENTRYPOINT_OFFSET(8, pD2l), rl_dest, rl_src);
+      if (Gen64Bit()) {
+        rl_src = LoadValueWide(rl_src, kFPReg);
+        // If result vreg is also src vreg, break association to avoid useless copy by EvalLoc()
+        ClobberSReg(rl_dest.s_reg_low);
+        rl_result = EvalLoc(rl_dest, kCoreReg, true);
+        RegStorage temp_reg = AllocTempDouble();
+
+        // Set 0x7fffffffffffffff to rl_result
+        LoadConstantWide(rl_result.reg, 0x7fffffffffffffff);
+        NewLIR2(kX86Cvtsqi2sdRR, temp_reg.GetReg(), rl_result.reg.GetReg());
+        NewLIR2(kX86ComisdRR, rl_src.reg.GetReg(), temp_reg.GetReg());
+        LIR* branch_pos_overflow = NewLIR2(kX86Jcc8, 0, kX86CondA);
+        LIR* branch_na_n = NewLIR2(kX86Jcc8, 0, kX86CondP);
+        NewLIR2(kX86Cvttsd2sqiRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+        LIR* branch_normal = NewLIR1(kX86Jmp8, 0);
+        branch_na_n->target = NewLIR0(kPseudoTargetLabel);
+        NewLIR2(kX86Xor64RR, rl_result.reg.GetReg(), rl_result.reg.GetReg());
+        branch_pos_overflow->target = NewLIR0(kPseudoTargetLabel);
+        branch_normal->target = NewLIR0(kPseudoTargetLabel);
+        StoreValueWide(rl_dest, rl_result);
       } else {
         GenConversionCall(QUICK_ENTRYPOINT_OFFSET(4, pD2l), rl_dest, rl_src);
       }
@@ -434,9 +480,14 @@ void X86Mir2Lir::GenNegFloat(RegLocation rl_dest, RegLocation rl_src) {
 void X86Mir2Lir::GenNegDouble(RegLocation rl_dest, RegLocation rl_src) {
   RegLocation rl_result;
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  rl_result = EvalLoc(rl_dest, kCoreReg, true);
-  OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
-  OpRegCopy(rl_result.reg, rl_src.reg);
+  rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+  if (Gen64Bit()) {
+    LoadConstantWide(rl_result.reg, 0x8000000000000000);
+    OpRegReg(kOpAdd, rl_result.reg, rl_src.reg);
+  } else {
+    OpRegRegImm(kOpAdd, rl_result.reg.GetHigh(), rl_src.reg.GetHigh(), 0x80000000);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+  }
   StoreValueWide(rl_dest, rl_result);
 }
 
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 1cc16b9e12..d214b8de7b 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -31,6 +31,23 @@ namespace art {
  */
 void X86Mir2Lir::GenCmpLong(RegLocation rl_dest, RegLocation rl_src1,
                             RegLocation rl_src2) {
+  if (Gen64Bit()) {
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+    RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+    OpRegReg(kOpXor, rl_result.reg, rl_result.reg);  // result = 0
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    NewLIR2(kX86Set8R, rl_result.reg.GetReg(), kX86CondNe);  // result = (src1 != src2) ? 1 : result
+    RegStorage temp_reg = AllocTemp();
+    OpRegReg(kOpNeg, temp_reg, rl_result.reg);
+    OpRegReg(kOpCmp, rl_src1.reg, rl_src2.reg);
+    // result = (src1 < src2) ? -result : result
+    OpCondRegReg(kOpCmov, kCondLt, rl_result.reg, temp_reg);
+    StoreValue(rl_dest, rl_result);
+    FreeTemp(temp_reg);
+    return;
+  }
+
   FlushAllRegs();
   LockCallTemps();  // Prepare for explicit register usage
   RegStorage r_tmp1 = RegStorage::MakeRegPair(rs_r0, rs_r1);
@@ -108,7 +125,7 @@ LIR* X86Mir2Lir::OpRegCopyNoInsert(RegStorage r_dest, RegStorage r_src) {
   }
   if (r_dest.IsFloat() || r_src.IsFloat())
     return OpFpRegCopy(r_dest, r_src);
-  LIR* res = RawLIR(current_dalvik_offset_, kX86Mov32RR,
+  LIR* res = RawLIR(current_dalvik_offset_, r_dest.Is64Bit() ? kX86Mov64RR : kX86Mov32RR,
                     r_dest.GetReg(), r_src.GetReg());
   if (!(cu_->disable_opt & (1 << kSafeOptimizations)) && r_dest == r_src) {
     res->flags.is_nop = true;
@@ -133,36 +150,51 @@ void X86Mir2Lir::OpRegCopyWide(RegStorage r_dest, RegStorage r_src) {
       } else {
         // TODO: Prevent this from happening in the code. The result is often
         // unused or could have been loaded more easily from memory.
-        NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
-        RegStorage r_tmp = AllocTempDouble();
-        NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
-        NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
-        FreeTemp(r_tmp);
+        if (!r_src.IsPair()) {
+          DCHECK(!r_dest.IsPair());
+          NewLIR2(kX86MovqxrRR, r_dest.GetReg(), r_src.GetReg());
+        } else {
+          NewLIR2(kX86MovdxrRR, r_dest.GetReg(), r_src.GetLowReg());
+          RegStorage r_tmp = AllocTempDouble();
+          NewLIR2(kX86MovdxrRR, r_tmp.GetReg(), r_src.GetHighReg());
+          NewLIR2(kX86PunpckldqRR, r_dest.GetReg(), r_tmp.GetReg());
+          FreeTemp(r_tmp);
+        }
       }
     } else {
       if (src_fp) {
-        NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
-        RegStorage temp_reg = AllocTempDouble();
-        NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
-        NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
-        NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
+        if (!r_dest.IsPair()) {
+          DCHECK(!r_src.IsPair());
+          NewLIR2(kX86MovqrxRR, r_dest.GetReg(), r_src.GetReg());
+        } else {
+          NewLIR2(kX86MovdrxRR, r_dest.GetLowReg(), r_src.GetReg());
+          RegStorage temp_reg = AllocTempDouble();
+          NewLIR2(kX86MovsdRR, temp_reg.GetReg(), r_src.GetReg());
+          NewLIR2(kX86PsrlqRI, temp_reg.GetReg(), 32);
+          NewLIR2(kX86MovdrxRR, r_dest.GetHighReg(), temp_reg.GetReg());
+        }
       } else {
-        DCHECK(r_dest.IsPair());
-        DCHECK(r_src.IsPair());
-        // Handle overlap
-        if (r_src.GetHighReg() == r_dest.GetLowReg() && r_src.GetLowReg() == r_dest.GetHighReg()) {
-          // Deal with cycles.
-          RegStorage temp_reg = AllocTemp();
-          OpRegCopy(temp_reg, r_dest.GetHigh());
-          OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
-          OpRegCopy(r_dest.GetLow(), temp_reg);
-          FreeTemp(temp_reg);
-        } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+        DCHECK_EQ(r_dest.IsPair(), r_src.IsPair());
+        if (!r_src.IsPair()) {
+          // Just copy the register directly.
+          OpRegCopy(r_dest, r_src);
         } else {
-          OpRegCopy(r_dest.GetLow(), r_src.GetLow());
-          OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          // Handle overlap
+          if (r_src.GetHighReg() == r_dest.GetLowReg() &&
+              r_src.GetLowReg() == r_dest.GetHighReg()) {
+            // Deal with cycles.
+            RegStorage temp_reg = AllocTemp();
+            OpRegCopy(temp_reg, r_dest.GetHigh());
+            OpRegCopy(r_dest.GetHigh(), r_dest.GetLow());
+            OpRegCopy(r_dest.GetLow(), temp_reg);
+            FreeTemp(temp_reg);
+          } else if (r_src.GetHighReg() == r_dest.GetLowReg()) {
+            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+          } else {
+            OpRegCopy(r_dest.GetLow(), r_src.GetLow());
+            OpRegCopy(r_dest.GetHigh(), r_src.GetHigh());
+          }
         }
       }
     }
@@ -832,7 +864,11 @@ LIR* X86Mir2Lir::OpPcRelLoad(RegStorage reg, LIR* target) {
 
   // Address the start of the method
   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  LoadValueDirectFixed(rl_method, reg);
+  if (rl_method.wide) {
+    LoadValueDirectWideFixed(rl_method, reg);
+  } else {
+    LoadValueDirectFixed(rl_method, reg);
+  }
   store_method_addr_used_ = true;
 
   // Load the proper value from the literal area.
@@ -871,18 +907,23 @@ void X86Mir2Lir::GenMultiplyByTwoBitMultiplier(RegLocation rl_src,
 }
 
 void X86Mir2Lir::GenDivZeroCheckWide(RegStorage reg) {
-  DCHECK(reg.IsPair());  // TODO: allow 64BitSolo.
-  // We are not supposed to clobber the incoming storage, so allocate a temporary.
-  RegStorage t_reg = AllocTemp();
+  if (Gen64Bit()) {
+    DCHECK(reg.Is64Bit());
 
-  // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
-  OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
+    NewLIR2(kX86Cmp64RI8, reg.GetReg(), 0);
+  } else {
+    DCHECK(reg.IsPair());
+
+    // We are not supposed to clobber the incoming storage, so allocate a temporary.
+    RegStorage t_reg = AllocTemp();
+    // Doing an OR is a quick way to check if both registers are zero. This will set the flags.
+    OpRegRegReg(kOpOr, t_reg, reg.GetLow(), reg.GetHigh());
+    // The temp is no longer needed so free it at this time.
+    FreeTemp(t_reg);
+  }
 
   // In case of zero, throw ArithmeticException.
   GenDivZeroCheck(kCondEq);
-
-  // The temp is no longer needed so free it at this time.
-  FreeTemp(t_reg);
 }
 
 void X86Mir2Lir::GenArrayBoundsCheck(RegStorage index,
@@ -1221,18 +1262,22 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
   if (rl_src.location == kLocPhysReg) {
     // Both operands are in registers.
     // But we must ensure that rl_src is in pair
-    rl_src = LoadValueWide(rl_src, kCoreReg);
-    if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
-      // The registers are the same, so we would clobber it before the use.
-      RegStorage temp_reg = AllocTemp();
-      OpRegCopy(temp_reg, rl_dest.reg);
-      rl_src.reg.SetHighReg(temp_reg.GetReg());
-    }
-    NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
+    if (Gen64Bit()) {
+      NewLIR2(x86op, rl_dest.reg.GetReg(), rl_src.reg.GetReg());
+    } else {
+      rl_src = LoadValueWide(rl_src, kCoreReg);
+      if (rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()) {
+        // The registers are the same, so we would clobber it before the use.
+        RegStorage temp_reg = AllocTemp();
+        OpRegCopy(temp_reg, rl_dest.reg);
+        rl_src.reg.SetHighReg(temp_reg.GetReg());
+      }
+      NewLIR2(x86op, rl_dest.reg.GetLowReg(), rl_src.reg.GetLowReg());
 
-    x86op = GetOpcode(op, rl_dest, rl_src, true);
-    NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
-    FreeTemp(rl_src.reg);
+      x86op = GetOpcode(op, rl_dest, rl_src, true);
+      NewLIR2(x86op, rl_dest.reg.GetHighReg(), rl_src.reg.GetHighReg());
+      FreeTemp(rl_src.reg);  // ???
+    }
     return;
   }
 
@@ -1242,11 +1287,13 @@ void X86Mir2Lir::GenLongRegOrMemOp(RegLocation rl_dest, RegLocation rl_src,
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_src.s_reg_low);
 
-  LIR *lir = NewLIR3(x86op, rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
+  LIR *lir = NewLIR3(x86op, Gen64Bit() ? rl_dest.reg.GetReg() : rl_dest.reg.GetLowReg(), r_base, displacement + LOWORD_OFFSET);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
-  x86op = GetOpcode(op, rl_dest, rl_src, true);
-  lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+  if (!Gen64Bit()) {
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    lir = NewLIR3(x86op, rl_dest.reg.GetHighReg(), r_base, displacement + HIWORD_OFFSET);
+  }
   AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
 }
@@ -1273,13 +1320,16 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src, Instructi
   int r_base = TargetReg(kSp).GetReg();
   int displacement = SRegOffset(rl_dest.s_reg_low);
 
-  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, rl_src.reg.GetLowReg());
+  LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET,
+                     Gen64Bit() ? rl_src.reg.GetReg() : rl_src.reg.GetLowReg());
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
                           false /* is_load */, true /* is64bit */);
-  x86op = GetOpcode(op, rl_dest, rl_src, true);
-  lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+  if (!Gen64Bit()) {
+    x86op = GetOpcode(op, rl_dest, rl_src, true);
+    lir = NewLIR3(x86op, r_base, displacement + HIWORD_OFFSET, rl_src.reg.GetHighReg());
+  }
   AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                           true /* is_load */, true /* is64bit */);
   AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
@@ -1330,23 +1380,44 @@ void X86Mir2Lir::GenLongArith(RegLocation rl_dest, RegLocation rl_src1,
 
   // Get one of the source operands into temporary register.
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
-  if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
-    GenLongRegOrMemOp(rl_src1, rl_src2, op);
-  } else if (is_commutative) {
-    rl_src2 = LoadValueWide(rl_src2, kCoreReg);
-    // We need at least one of them to be a temporary.
-    if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
-      rl_src1 = ForceTempWide(rl_src1);
+  if (Gen64Bit()) {
+    if (IsTemp(rl_src1.reg)) {
       GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    } else if (is_commutative) {
+      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+      // We need at least one of them to be a temporary.
+      if (!IsTemp(rl_src2.reg)) {
+        rl_src1 = ForceTempWide(rl_src1);
+        GenLongRegOrMemOp(rl_src1, rl_src2, op);
+      } else {
+        GenLongRegOrMemOp(rl_src2, rl_src1, op);
+        StoreFinalValueWide(rl_dest, rl_src2);
+        return;
+      }
     } else {
-      GenLongRegOrMemOp(rl_src2, rl_src1, op);
-      StoreFinalValueWide(rl_dest, rl_src2);
-      return;
+      // Need LHS to be the temp.
+      rl_src1 = ForceTempWide(rl_src1);
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
     }
   } else {
-    // Need LHS to be the temp.
-    rl_src1 = ForceTempWide(rl_src1);
-    GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    if (IsTemp(rl_src1.reg.GetLow()) && IsTemp(rl_src1.reg.GetHigh())) {
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    } else if (is_commutative) {
+      rl_src2 = LoadValueWide(rl_src2, kCoreReg);
+      // We need at least one of them to be a temporary.
+      if (!(IsTemp(rl_src2.reg.GetLow()) && IsTemp(rl_src2.reg.GetHigh()))) {
+        rl_src1 = ForceTempWide(rl_src1);
+        GenLongRegOrMemOp(rl_src1, rl_src2, op);
+      } else {
+        GenLongRegOrMemOp(rl_src2, rl_src1, op);
+        StoreFinalValueWide(rl_dest, rl_src2);
+        return;
+      }
+    } else {
+      // Need LHS to be the temp.
+      rl_src1 = ForceTempWide(rl_src1);
+      GenLongRegOrMemOp(rl_src1, rl_src2, op);
+    }
   }
 
   StoreFinalValueWide(rl_dest, rl_src1);
@@ -1378,27 +1449,91 @@ void X86Mir2Lir::GenXorLong(Instruction::Code opcode, RegLocation rl_dest,
 }
 
 void X86Mir2Lir::GenNotLong(RegLocation rl_dest, RegLocation rl_src) {
-  LOG(FATAL) << "Unexpected use GenNotLong()";
+  if (Gen64Bit()) {
+    rl_src = LoadValueWide(rl_src, kCoreReg);
+    RegLocation rl_result;
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegCopy(rl_result.reg, rl_src.reg);
+    OpReg(kOpNot, rl_result.reg);
+    StoreValueWide(rl_dest, rl_result);
+  } else {
+    LOG(FATAL) << "Unexpected use GenNotLong()";
+  }
 }
 
 void X86Mir2Lir::GenDivRemLong(Instruction::Code, RegLocation rl_dest, RegLocation rl_src1,
                            RegLocation rl_src2, bool is_div) {
-  LOG(FATAL) << "Unexpected use GenDivRemLong()";
+  if (!Gen64Bit()) {
+    LOG(FATAL) << "Unexpected use GenDivRemLong()";
+    return;
+  }
+
+  // We have to use fixed registers, so flush all the temps.
+  FlushAllRegs();
+  LockCallTemps();  // Prepare for explicit register usage.
+
+  // Load LHS into RAX.
+  LoadValueDirectWideFixed(rl_src1, rs_r0q);
+
+  // Load RHS into RCX.
+  LoadValueDirectWideFixed(rl_src2, rs_r1q);
+
+  // Copy LHS sign bit into RDX.
+  NewLIR0(kx86Cqo64Da);
+
+  // Handle division by zero case.
+  GenDivZeroCheckWide(rs_r1q);
+
+  // Have to catch 0x8000000000000000/-1 case, or we will get an exception!
+  NewLIR2(kX86Cmp64RI8, rs_r1q.GetReg(), -1);
+  LIR *minus_one_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // RHS is -1.
+  LoadConstantWide(rs_r3q, 0x8000000000000000);
+  NewLIR2(kX86Cmp64RR, rs_r0q.GetReg(), rs_r3q.GetReg());
+  LIR * minint_branch = NewLIR2(kX86Jcc8, 0, kX86CondNe);
+
+  // In 0x8000000000000000/-1 case.
+  if (!is_div) {
+    // For DIV, RAX is already right. For REM, we need RDX 0.
+    NewLIR2(kX86Xor64RR, rs_r2q.GetReg(), rs_r2q.GetReg());
+  }
+  LIR* done = NewLIR1(kX86Jmp8, 0);
+
+  // Expected case.
+  minus_one_branch->target = NewLIR0(kPseudoTargetLabel);
+  minint_branch->target = minus_one_branch->target;
+  NewLIR1(kX86Idivmod64DaR, rs_r1q.GetReg());
+  done->target = NewLIR0(kPseudoTargetLabel);
+
+  // Result is in RAX for div and RDX for rem.
+  RegLocation rl_result = {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1, rs_r0q, INVALID_SREG, INVALID_SREG};
+  if (!is_div) {
+    rl_result.reg.SetReg(r2q);
+  }
+
+  StoreValueWide(rl_dest, rl_result);
 }
 
 void X86Mir2Lir::GenNegLong(RegLocation rl_dest, RegLocation rl_src) {
   rl_src = LoadValueWide(rl_src, kCoreReg);
-  RegLocation rl_result = ForceTempWide(rl_src);
-  if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
-      ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
-    // The registers are the same, so we would clobber it before the use.
-    RegStorage temp_reg = AllocTemp();
-    OpRegCopy(temp_reg, rl_result.reg);
-    rl_result.reg.SetHighReg(temp_reg.GetReg());
+  RegLocation rl_result;
+  if (Gen64Bit()) {
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegReg(kOpNeg, rl_result.reg, rl_src.reg);
+  } else {
+    rl_result = ForceTempWide(rl_src);
+    if (((rl_dest.location == kLocPhysReg) && (rl_src.location == kLocPhysReg)) &&
+        ((rl_dest.reg.GetLowReg() == rl_src.reg.GetHighReg()))) {
+      // The registers are the same, so we would clobber it before the use.
+      RegStorage temp_reg = AllocTemp();
+      OpRegCopy(temp_reg, rl_result.reg);
+      rl_result.reg.SetHighReg(temp_reg.GetReg());
+    }
+    OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
+    OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
+    OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   }
-  OpRegReg(kOpNeg, rl_result.reg.GetLow(), rl_result.reg.GetLow());    // rLow = -rLow
-  OpRegImm(kOpAdc, rl_result.reg.GetHigh(), 0);                   // rHigh = rHigh + CF
-  OpRegReg(kOpNeg, rl_result.reg.GetHigh(), rl_result.reg.GetHigh());  // rHigh = -rHigh
   StoreValueWide(rl_dest, rl_result);
 }
 
@@ -1551,60 +1686,84 @@ void X86Mir2Lir::GenArrayPut(int opt_flags, OpSize size, RegLocation rl_array,
 RegLocation X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest,
                                           RegLocation rl_src, int shift_amount) {
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
-  switch (opcode) {
-    case Instruction::SHL_LONG:
-    case Instruction::SHL_LONG_2ADDR:
-      DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
-        NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetLow(), 0);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(), shift_amount);
-        NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
-      }
-      break;
-    case Instruction::SHR_LONG:
-    case Instruction::SHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount);
-        NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
-      }
-      break;
-    case Instruction::USHR_LONG:
-    case Instruction::USHR_LONG_2ADDR:
-      if (shift_amount == 32) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else if (shift_amount > 31) {
-        OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
-        NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
-        LoadConstant(rl_result.reg.GetHigh(), 0);
-      } else {
-        OpRegCopy(rl_result.reg, rl_src.reg);
-        OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
-        NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(), shift_amount);
-        NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
-      }
-      break;
-    default:
-      LOG(FATAL) << "Unexpected case";
+  if (Gen64Bit()) {
+    OpKind op = static_cast<OpKind>(0);    /* Make gcc happy */
+    switch (opcode) {
+      case Instruction::SHL_LONG:
+      case Instruction::SHL_LONG_2ADDR:
+        op = kOpLsl;
+        break;
+      case Instruction::SHR_LONG:
+      case Instruction::SHR_LONG_2ADDR:
+        op = kOpAsr;
+        break;
+      case Instruction::USHR_LONG:
+      case Instruction::USHR_LONG_2ADDR:
+        op = kOpLsr;
+        break;
+      default:
+        LOG(FATAL) << "Unexpected case";
+    }
+    OpRegRegImm(op, rl_result.reg, rl_src.reg, shift_amount);
+  } else {
+    switch (opcode) {
+      case Instruction::SHL_LONG:
+      case Instruction::SHL_LONG_2ADDR:
+        DCHECK_NE(shift_amount, 1);  // Prevent a double store from happening.
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
+          LoadConstant(rl_result.reg.GetLow(), 0);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetLow());
+          NewLIR2(kX86Sal32RI, rl_result.reg.GetHighReg(), shift_amount - 32);
+          LoadConstant(rl_result.reg.GetLow(), 0);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shld32RRI, rl_result.reg.GetHighReg(), rl_result.reg.GetLowReg(),
+                  shift_amount);
+          NewLIR2(kX86Sal32RI, rl_result.reg.GetLowReg(), shift_amount);
+        }
+        break;
+      case Instruction::SHR_LONG:
+      case Instruction::SHR_LONG_2ADDR:
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), 31);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
+                  shift_amount);
+          NewLIR2(kX86Sar32RI, rl_result.reg.GetHighReg(), shift_amount);
+        }
+        break;
+      case Instruction::USHR_LONG:
+      case Instruction::USHR_LONG_2ADDR:
+        if (shift_amount == 32) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          LoadConstant(rl_result.reg.GetHigh(), 0);
+        } else if (shift_amount > 31) {
+          OpRegCopy(rl_result.reg.GetLow(), rl_src.reg.GetHigh());
+          NewLIR2(kX86Shr32RI, rl_result.reg.GetLowReg(), shift_amount - 32);
+          LoadConstant(rl_result.reg.GetHigh(), 0);
+        } else {
+          OpRegCopy(rl_result.reg, rl_src.reg);
+          OpRegCopy(rl_result.reg.GetHigh(), rl_src.reg.GetHigh());
+          NewLIR3(kX86Shrd32RRI, rl_result.reg.GetLowReg(), rl_result.reg.GetHighReg(),
+                  shift_amount);
+          NewLIR2(kX86Shr32RI, rl_result.reg.GetHighReg(), shift_amount);
+        }
+        break;
+      default:
+        LOG(FATAL) << "Unexpected case";
+    }
   }
   return rl_result;
 }
@@ -1634,24 +1793,26 @@ void X86Mir2Lir::GenShiftImmOpLong(Instruction::Code opcode, RegLocation rl_dest
 
 void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
                                    RegLocation rl_dest, RegLocation rl_src1, RegLocation rl_src2) {
+  bool isConstSuccess = false;
   switch (opcode) {
     case Instruction::ADD_LONG:
     case Instruction::AND_LONG:
     case Instruction::OR_LONG:
     case Instruction::XOR_LONG:
       if (rl_src2.is_const) {
-        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
         DCHECK(rl_src1.is_const);
-        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
       }
       break;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (rl_src2.is_const) {
-        GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
       } else {
         GenSubLong(opcode, rl_dest, rl_src1, rl_src2);
+        isConstSuccess = true;
       }
       break;
     case Instruction::ADD_LONG_2ADDR:
@@ -1660,20 +1821,24 @@ void X86Mir2Lir::GenArithImmOpLong(Instruction::Code opcode,
     case Instruction::AND_LONG_2ADDR:
       if (rl_src2.is_const) {
         if (GenerateTwoOperandInstructions()) {
-          GenLongImm(rl_dest, rl_src2, opcode);
+          isConstSuccess = GenLongImm(rl_dest, rl_src2, opcode);
         } else {
-          GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
+          isConstSuccess = GenLongLongImm(rl_dest, rl_src1, rl_src2, opcode);
         }
       } else {
         DCHECK(rl_src1.is_const);
-        GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
+        isConstSuccess = GenLongLongImm(rl_dest, rl_src2, rl_src1, opcode);
       }
       break;
     default:
-      // Default - bail to non-const handler.
-      GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+      isConstSuccess = false;
       break;
   }
+
+  if (!isConstSuccess) {
+    // Default - bail to non-const handler.
+    GenArithOpLong(opcode, rl_dest, rl_src1, rl_src2);
+  }
 }
 
 bool X86Mir2Lir::IsNoOp(Instruction::Code op, int32_t value) {
@@ -1695,40 +1860,50 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocat
                                 bool is_high_op) {
   bool rhs_in_mem = rhs.location != kLocPhysReg;
   bool dest_in_mem = dest.location != kLocPhysReg;
+  bool is64Bit = Gen64Bit();
   DCHECK(!rhs_in_mem || !dest_in_mem);
   switch (op) {
     case Instruction::ADD_LONG:
     case Instruction::ADD_LONG_2ADDR:
       if (dest_in_mem) {
-        return is_high_op ? kX86Adc32MR : kX86Add32MR;
+        return is64Bit ? kX86Add64MR : is_high_op ? kX86Adc32MR : kX86Add32MR;
       } else if (rhs_in_mem) {
-        return is_high_op ? kX86Adc32RM : kX86Add32RM;
+        return is64Bit ? kX86Add64RM : is_high_op ? kX86Adc32RM : kX86Add32RM;
       }
-      return is_high_op ? kX86Adc32RR : kX86Add32RR;
+      return is64Bit ? kX86Add64RR : is_high_op ? kX86Adc32RR : kX86Add32RR;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (dest_in_mem) {
-        return is_high_op ? kX86Sbb32MR : kX86Sub32MR;
+        return is64Bit ? kX86Sub64MR : is_high_op ? kX86Sbb32MR : kX86Sub32MR;
       } else if (rhs_in_mem) {
-        return is_high_op ? kX86Sbb32RM : kX86Sub32RM;
+        return is64Bit ? kX86Sub64RM : is_high_op ? kX86Sbb32RM : kX86Sub32RM;
       }
-      return is_high_op ? kX86Sbb32RR : kX86Sub32RR;
+      return is64Bit ? kX86Sub64RR : is_high_op ? kX86Sbb32RR : kX86Sub32RR;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (dest_in_mem) {
-        return kX86And32MR;
+        return is64Bit ? kX86And64MR : kX86And32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86And64RM : kX86And64RR;
       }
       return rhs_in_mem ? kX86And32RM : kX86And32RR;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (dest_in_mem) {
-        return kX86Or32MR;
+        return is64Bit ? kX86Or64MR : kX86Or32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86Or64RM : kX86Or64RR;
       }
       return rhs_in_mem ? kX86Or32RM : kX86Or32RR;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (dest_in_mem) {
-        return kX86Xor32MR;
+        return is64Bit ? kX86Xor64MR : kX86Xor32MR;
+      }
+      if (is64Bit) {
+        return rhs_in_mem ? kX86Xor64RM : kX86Xor64RR;
       }
       return rhs_in_mem ? kX86Xor32RM : kX86Xor32RR;
     default:
@@ -1740,6 +1915,7 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation dest, RegLocat
 X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_high_op,
                                 int32_t value) {
   bool in_mem = loc.location != kLocPhysReg;
+  bool is64Bit = Gen64Bit();
   bool byte_imm = IS_SIMM8(value);
   DCHECK(in_mem || !loc.reg.IsFloat());
   switch (op) {
@@ -1747,43 +1923,61 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_h
     case Instruction::ADD_LONG_2ADDR:
       if (byte_imm) {
         if (in_mem) {
-          return is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
+          return is64Bit ? kX86Add64MI8 : is_high_op ? kX86Adc32MI8 : kX86Add32MI8;
         }
-        return is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
+        return is64Bit ? kX86Add64RI8 : is_high_op ? kX86Adc32RI8 : kX86Add32RI8;
       }
       if (in_mem) {
-        return is_high_op ? kX86Adc32MI : kX86Add32MI;
+        return is64Bit ? kX86Add64MI : is_high_op ? kX86Adc32MI : kX86Add32MI;
       }
-      return is_high_op ? kX86Adc32RI : kX86Add32RI;
+      return is64Bit ? kX86Add64RI : is_high_op ? kX86Adc32RI : kX86Add32RI;
     case Instruction::SUB_LONG:
     case Instruction::SUB_LONG_2ADDR:
       if (byte_imm) {
         if (in_mem) {
-          return is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
+          return is64Bit ? kX86Sub64MI8 : is_high_op ? kX86Sbb32MI8 : kX86Sub32MI8;
         }
-        return is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
+        return is64Bit ? kX86Sub64RI8 : is_high_op ? kX86Sbb32RI8 : kX86Sub32RI8;
       }
       if (in_mem) {
-        return is_high_op ? kX86Sbb32MI : kX86Sub32MI;
+        return is64Bit ? kX86Sub64MI : is_high_op ? kX86Sbb32MI : kX86Sub32MI;
       }
-      return is_high_op ? kX86Sbb32RI : kX86Sub32RI;
+      return is64Bit ? kX86Sub64RI : is_high_op ? kX86Sbb32RI : kX86Sub32RI;
     case Instruction::AND_LONG_2ADDR:
     case Instruction::AND_LONG:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86And64MI8 : kX86And64RI8;
+        }
         return in_mem ? kX86And32MI8 : kX86And32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86And64MI : kX86And64RI;
+      }
       return in_mem ? kX86And32MI : kX86And32RI;
     case Instruction::OR_LONG:
     case Instruction::OR_LONG_2ADDR:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86Or64MI8 : kX86Or64RI8;
+        }
         return in_mem ? kX86Or32MI8 : kX86Or32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86Or64MI : kX86Or64RI;
+      }
       return in_mem ? kX86Or32MI : kX86Or32RI;
     case Instruction::XOR_LONG:
     case Instruction::XOR_LONG_2ADDR:
       if (byte_imm) {
+        if (is64Bit) {
+          return in_mem ? kX86Xor64MI8 : kX86Xor64RI8;
+        }
         return in_mem ? kX86Xor32MI8 : kX86Xor32RI8;
       }
+      if (is64Bit) {
+        return in_mem ? kX86Xor64MI : kX86Xor64RI;
+      }
       return in_mem ? kX86Xor32MI : kX86Xor32RI;
     default:
       LOG(FATAL) << "Unexpected opcode: " << op;
@@ -1791,9 +1985,43 @@ X86OpCode X86Mir2Lir::GetOpcode(Instruction::Code op, RegLocation loc, bool is_h
   }
 }
 
-void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
+bool X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction::Code op) {
   DCHECK(rl_src.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src);
+
+  if (Gen64Bit()) {
+    // We can do with imm only if it fits 32 bit
+    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
+      return false;
+    }
+
+    rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
+
+    if ((rl_dest.location == kLocDalvikFrame) ||
+        (rl_dest.location == kLocCompilerTemp)) {
+      int r_base = TargetReg(kSp).GetReg();
+      int displacement = SRegOffset(rl_dest.s_reg_low);
+
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      LIR *lir = NewLIR3(x86op, r_base, displacement + LOWORD_OFFSET, val);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              true /* is_load */, true /* is64bit */);
+      AnnotateDalvikRegAccess(lir, (displacement + LOWORD_OFFSET) >> 2,
+                              false /* is_load */, true /* is64bit */);
+      return true;
+    }
+
+    RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    DCHECK_EQ(rl_result.location, kLocPhysReg);
+    DCHECK(!rl_result.reg.IsFloat());
+
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
+    NewLIR2(x86op, rl_result.reg.GetReg(), val);
+
+    StoreValueWide(rl_dest, rl_result);
+    return true;
+  }
+
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
@@ -1820,7 +2048,7 @@ void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction
       AnnotateDalvikRegAccess(lir, (displacement + HIWORD_OFFSET) >> 2,
                                 false /* is_load */, true /* is64bit */);
     }
-    return;
+    return true;
   }
 
   RegLocation rl_result = EvalLocWide(rl_dest, kCoreReg, true);
@@ -1836,12 +2064,38 @@ void X86Mir2Lir::GenLongImm(RegLocation rl_dest, RegLocation rl_src, Instruction
     NewLIR2(x86op, rl_result.reg.GetHighReg(), val_hi);
   }
   StoreValueWide(rl_dest, rl_result);
+  return true;
 }
 
-void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
+bool X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
                                 RegLocation rl_src2, Instruction::Code op) {
   DCHECK(rl_src2.is_const);
   int64_t val = mir_graph_->ConstantValueWide(rl_src2);
+
+  if (Gen64Bit()) {
+    // We can do with imm only if it fits 32 bit
+    if (val != (static_cast<int64_t>(static_cast<int32_t>(val)))) {
+      return false;
+    }
+    if (rl_dest.location == kLocPhysReg &&
+        rl_src1.location == kLocPhysReg && !rl_dest.reg.IsFloat()) {
+      X86OpCode x86op = GetOpcode(op, rl_dest, false, val);
+      NewLIR2(x86op, rl_dest.reg.GetReg(), val);
+      StoreFinalValueWide(rl_dest, rl_dest);
+      return true;
+    }
+
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    // We need the values to be in a temporary
+    RegLocation rl_result = ForceTempWide(rl_src1);
+
+    X86OpCode x86op = GetOpcode(op, rl_result, false, val);
+    NewLIR2(x86op, rl_result.reg.GetReg(), val);
+
+    StoreFinalValueWide(rl_dest, rl_result);
+    return true;
+  }
+
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   rl_dest = UpdateLocWideTyped(rl_dest, kCoreReg);
@@ -1861,7 +2115,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
     }
 
     StoreFinalValueWide(rl_dest, rl_dest);
-    return;
+    return true;
   }
 
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
@@ -1879,6 +2133,7 @@ void X86Mir2Lir::GenLongLongImm(RegLocation rl_dest, RegLocation rl_src1,
   }
 
   StoreFinalValueWide(rl_dest, rl_result);
+  return true;
 }
 
 // For final classes there are no sub-classes to check and so we can answer the instance-of
@@ -2239,7 +2494,8 @@ void X86Mir2Lir::GenArithOpInt(Instruction::Code opcode, RegLocation rl_dest,
             // We should be careful with order here
             // If rl_dest and rl_lhs points to the same VR we should load first
             // If the are different we should find a register first for dest
-            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) == mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
+            if (mir_graph_->SRegToVReg(rl_dest.s_reg_low) ==
+                mir_graph_->SRegToVReg(rl_lhs.s_reg_low)) {
               rl_lhs = LoadValue(rl_lhs, kCoreReg);
               rl_result = EvalLoc(rl_dest, kCoreReg, true);
               // No-op if these are the same.
@@ -2289,4 +2545,82 @@ bool X86Mir2Lir::IsOperationSafeWithoutTemps(RegLocation rl_lhs, RegLocation rl_
   // Everything will be fine :-).
   return true;
 }
+
+void X86Mir2Lir::GenIntToLong(RegLocation rl_dest, RegLocation rl_src) {
+  if (!Gen64Bit()) {
+    Mir2Lir::GenIntToLong(rl_dest, rl_src);
+    return;
+  }
+  RegLocation rl_result = EvalLoc(rl_dest, kCoreReg, true);
+  if (rl_src.location == kLocPhysReg) {
+    NewLIR2(kX86MovsxdRR, rl_result.reg.GetReg(), rl_src.reg.GetReg());
+  } else {
+    int displacement = SRegOffset(rl_src.s_reg_low);
+    LIR *m = NewLIR3(kX86MovsxdRM, rl_result.reg.GetReg(), rs_rX86_SP.GetReg(),
+                     displacement + LOWORD_OFFSET);
+    AnnotateDalvikRegAccess(m, (displacement + LOWORD_OFFSET) >> 2,
+                            true /* is_load */, true /* is_64bit */);
+  }
+  StoreValueWide(rl_dest, rl_result);
+}
+
+void X86Mir2Lir::GenShiftOpLong(Instruction::Code opcode, RegLocation rl_dest,
+                        RegLocation rl_src1, RegLocation rl_shift) {
+  if (!Gen64Bit()) {
+    Mir2Lir::GenShiftOpLong(opcode, rl_dest, rl_src1, rl_shift);
+    return;
+  }
+
+  bool is_two_addr = false;
+  OpKind op = kOpBkpt;
+  RegLocation rl_result;
+
+  switch (opcode) {
+    case Instruction::SHL_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHL_LONG:
+      op = kOpLsl;
+      break;
+    case Instruction::SHR_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::SHR_LONG:
+      op = kOpAsr;
+      break;
+    case Instruction::USHR_LONG_2ADDR:
+      is_two_addr = true;
+      // Fallthrough
+    case Instruction::USHR_LONG:
+      op = kOpLsr;
+      break;
+    default:
+      op = kOpBkpt;
+  }
+
+  // X86 doesn't require masking and must use ECX.
+  RegStorage t_reg = TargetReg(kCount);  // rCX
+  LoadValueDirectFixed(rl_shift, t_reg);
+  if (is_two_addr) {
+    // Can we do this directly into memory?
+    rl_result = UpdateLocWideTyped(rl_dest, kCoreReg);
+    if (rl_result.location != kLocPhysReg) {
+      // Okay, we can do this into memory
+      OpMemReg(op, rl_result, t_reg.GetReg());
+    } else if (!rl_result.reg.IsFloat()) {
+      // Can do this directly into the result register
+      OpRegReg(op, rl_result.reg, t_reg);
+      StoreFinalValueWide(rl_dest, rl_result);
+    }
+  } else {
+    // Three address form, or we can't do directly.
+    rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+    rl_result = EvalLocWide(rl_dest, kCoreReg, true);
+    OpRegRegReg(op, rl_result.reg, rl_src1.reg, t_reg);
+    StoreFinalValueWide(rl_dest, rl_result);
+  }
+
+  FreeTemp(t_reg);
+}
+
 }  // namespace art
diff --git a/compiler/dex/quick/x86/target_x86.cc b/compiler/dex/quick/x86/target_x86.cc
index 4d8fd1b283..1ac15a21d8 100644
--- a/compiler/dex/quick/x86/target_x86.cc
+++ b/compiler/dex/quick/x86/target_x86.cc
@@ -132,10 +132,18 @@ X86NativeRegisterPool rX86_ARG0;
 X86NativeRegisterPool rX86_ARG1;
 X86NativeRegisterPool rX86_ARG2;
 X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+X86NativeRegisterPool rX86_ARG4;
+X86NativeRegisterPool rX86_ARG5;
+#endif
 X86NativeRegisterPool rX86_FARG0;
 X86NativeRegisterPool rX86_FARG1;
 X86NativeRegisterPool rX86_FARG2;
 X86NativeRegisterPool rX86_FARG3;
+X86NativeRegisterPool rX86_FARG4;
+X86NativeRegisterPool rX86_FARG5;
+X86NativeRegisterPool rX86_FARG6;
+X86NativeRegisterPool rX86_FARG7;
 X86NativeRegisterPool rX86_RET0;
 X86NativeRegisterPool rX86_RET1;
 X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -145,10 +153,16 @@ RegStorage rs_rX86_ARG0;
 RegStorage rs_rX86_ARG1;
 RegStorage rs_rX86_ARG2;
 RegStorage rs_rX86_ARG3;
+RegStorage rs_rX86_ARG4;
+RegStorage rs_rX86_ARG5;
 RegStorage rs_rX86_FARG0;
 RegStorage rs_rX86_FARG1;
 RegStorage rs_rX86_FARG2;
 RegStorage rs_rX86_FARG3;
+RegStorage rs_rX86_FARG4;
+RegStorage rs_rX86_FARG5;
+RegStorage rs_rX86_FARG6;
+RegStorage rs_rX86_FARG7;
 RegStorage rs_rX86_RET0;
 RegStorage rs_rX86_RET1;
 RegStorage rs_rX86_INVOKE_TGT;
@@ -164,7 +178,7 @@ RegLocation X86Mir2Lir::LocCReturnRef() {
 }
 
 RegLocation X86Mir2Lir::LocCReturnWide() {
-  return x86_loc_c_return_wide;
+  return Gen64Bit() ? x86_64_loc_c_return_wide : x86_loc_c_return_wide;
 }
 
 RegLocation X86Mir2Lir::LocCReturnFloat() {
@@ -188,35 +202,27 @@ RegStorage X86Mir2Lir::TargetReg(SpecialTargetRegister reg) {
     case kArg1: res_reg = rs_rX86_ARG1; break;
     case kArg2: res_reg = rs_rX86_ARG2; break;
     case kArg3: res_reg = rs_rX86_ARG3; break;
+    case kArg4: res_reg = rs_rX86_ARG4; break;
+    case kArg5: res_reg = rs_rX86_ARG5; break;
     case kFArg0: res_reg = rs_rX86_FARG0; break;
     case kFArg1: res_reg = rs_rX86_FARG1; break;
     case kFArg2: res_reg = rs_rX86_FARG2; break;
     case kFArg3: res_reg = rs_rX86_FARG3; break;
+    case kFArg4: res_reg = rs_rX86_FARG4; break;
+    case kFArg5: res_reg = rs_rX86_FARG5; break;
+    case kFArg6: res_reg = rs_rX86_FARG6; break;
+    case kFArg7: res_reg = rs_rX86_FARG7; break;
     case kRet0: res_reg = rs_rX86_RET0; break;
     case kRet1: res_reg = rs_rX86_RET1; break;
     case kInvokeTgt: res_reg = rs_rX86_INVOKE_TGT; break;
     case kHiddenArg: res_reg = rs_rAX; break;
     case kHiddenFpArg: res_reg = rs_fr0; break;
     case kCount: res_reg = rs_rX86_COUNT; break;
+    default: res_reg = RegStorage::InvalidReg();
   }
   return res_reg;
 }
 
-RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
-  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
-  // TODO: This is not 64-bit compliant and depends on new internal ABI.
-  switch (arg_num) {
-    case 0:
-      return rs_rX86_ARG1;
-    case 1:
-      return rs_rX86_ARG2;
-    case 2:
-      return rs_rX86_ARG3;
-    default:
-      return RegStorage::InvalidReg();
-  }
-}
-
 /*
  * Decode the register id.
  */
@@ -482,6 +488,20 @@ void X86Mir2Lir::LockCallTemps() {
   LockTemp(rs_rX86_ARG1);
   LockTemp(rs_rX86_ARG2);
   LockTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+  if (Gen64Bit()) {
+    LockTemp(rs_rX86_ARG4);
+    LockTemp(rs_rX86_ARG5);
+    LockTemp(rs_rX86_FARG0);
+    LockTemp(rs_rX86_FARG1);
+    LockTemp(rs_rX86_FARG2);
+    LockTemp(rs_rX86_FARG3);
+    LockTemp(rs_rX86_FARG4);
+    LockTemp(rs_rX86_FARG5);
+    LockTemp(rs_rX86_FARG6);
+    LockTemp(rs_rX86_FARG7);
+  }
+#endif
 }
 
 /* To be used when explicitly managing register use */
@@ -490,6 +510,20 @@ void X86Mir2Lir::FreeCallTemps() {
   FreeTemp(rs_rX86_ARG1);
   FreeTemp(rs_rX86_ARG2);
   FreeTemp(rs_rX86_ARG3);
+#ifdef TARGET_REX_SUPPORT
+  if (Gen64Bit()) {
+    FreeTemp(rs_rX86_ARG4);
+    FreeTemp(rs_rX86_ARG5);
+    FreeTemp(rs_rX86_FARG0);
+    FreeTemp(rs_rX86_FARG1);
+    FreeTemp(rs_rX86_FARG2);
+    FreeTemp(rs_rX86_FARG3);
+    FreeTemp(rs_rX86_FARG4);
+    FreeTemp(rs_rX86_FARG5);
+    FreeTemp(rs_rX86_FARG6);
+    FreeTemp(rs_rX86_FARG7);
+  }
+#endif
 }
 
 bool X86Mir2Lir::ProvidesFullMemoryBarrier(X86OpCode opcode) {
@@ -653,6 +687,14 @@ bool X86Mir2Lir::SupportsVolatileLoadStore(OpSize size) {
 }
 
 RegisterClass X86Mir2Lir::RegClassForFieldLoadStore(OpSize size, bool is_volatile) {
+  // X86_64 can handle any size.
+  if (Gen64Bit()) {
+    if (size == kReference) {
+      return kRefReg;
+    }
+    return kCoreReg;
+  }
+
   if (UNLIKELY(is_volatile)) {
     // On x86, atomic 64-bit load/store requires an fp register.
     // Smaller aligned load/store is atomic for both core and fp registers.
@@ -688,11 +730,37 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator*
     rs_rX86_ARG1 = rs_rSI;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rCX;
+#ifdef TARGET_REX_SUPPORT
+    rs_rX86_ARG4 = rs_r8;
+    rs_rX86_ARG5 = rs_r9;
+#else
+    rs_rX86_ARG4 = RegStorage::InvalidReg();
+    rs_rX86_ARG5 = RegStorage::InvalidReg();
+#endif
+    rs_rX86_FARG0 = rs_fr0;
+    rs_rX86_FARG1 = rs_fr1;
+    rs_rX86_FARG2 = rs_fr2;
+    rs_rX86_FARG3 = rs_fr3;
+    rs_rX86_FARG4 = rs_fr4;
+    rs_rX86_FARG5 = rs_fr5;
+    rs_rX86_FARG6 = rs_fr6;
+    rs_rX86_FARG7 = rs_fr7;
     rX86_ARG0 = rDI;
     rX86_ARG1 = rSI;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rCX;
-    // TODO: ARG4(r8), ARG5(r9), floating point args.
+#ifdef TARGET_REX_SUPPORT
+    rX86_ARG4 = r8;
+    rX86_ARG5 = r9;
+#endif
+    rX86_FARG0 = fr0;
+    rX86_FARG1 = fr1;
+    rX86_FARG2 = fr2;
+    rX86_FARG3 = fr3;
+    rX86_FARG4 = fr4;
+    rX86_FARG5 = fr5;
+    rX86_FARG6 = fr6;
+    rX86_FARG7 = fr7;
   } else {
     rs_rX86_SP = rs_rX86_SP_32;
 
@@ -700,23 +768,32 @@ X86Mir2Lir::X86Mir2Lir(CompilationUnit* cu, MIRGraph* mir_graph, ArenaAllocator*
     rs_rX86_ARG1 = rs_rCX;
     rs_rX86_ARG2 = rs_rDX;
     rs_rX86_ARG3 = rs_rBX;
+    rs_rX86_ARG4 = RegStorage::InvalidReg();
+    rs_rX86_ARG5 = RegStorage::InvalidReg();
+    rs_rX86_FARG0 = rs_rAX;
+    rs_rX86_FARG1 = rs_rCX;
+    rs_rX86_FARG2 = rs_rDX;
+    rs_rX86_FARG3 = rs_rBX;
+    rs_rX86_FARG4 = RegStorage::InvalidReg();
+    rs_rX86_FARG5 = RegStorage::InvalidReg();
+    rs_rX86_FARG6 = RegStorage::InvalidReg();
+    rs_rX86_FARG7 = RegStorage::InvalidReg();
     rX86_ARG0 = rAX;
     rX86_ARG1 = rCX;
     rX86_ARG2 = rDX;
     rX86_ARG3 = rBX;
+    rX86_FARG0 = rAX;
+    rX86_FARG1 = rCX;
+    rX86_FARG2 = rDX;
+    rX86_FARG3 = rBX;
+    // TODO(64): Initialize with invalid reg
+//    rX86_ARG4 = RegStorage::InvalidReg();
+//    rX86_ARG5 = RegStorage::InvalidReg();
   }
-  rs_rX86_FARG0 = rs_rAX;
-  rs_rX86_FARG1 = rs_rCX;
-  rs_rX86_FARG2 = rs_rDX;
-  rs_rX86_FARG3 = rs_rBX;
   rs_rX86_RET0 = rs_rAX;
   rs_rX86_RET1 = rs_rDX;
   rs_rX86_INVOKE_TGT = rs_rAX;
   rs_rX86_COUNT = rs_rCX;
-  rX86_FARG0 = rAX;
-  rX86_FARG1 = rCX;
-  rX86_FARG2 = rDX;
-  rX86_FARG3 = rBX;
   rX86_RET0 = rAX;
   rX86_RET1 = rDX;
   rX86_INVOKE_TGT = rAX;
@@ -1356,7 +1433,11 @@ void X86Mir2Lir::GenConst128(BasicBlock* bb, MIR* mir) {
 
   // Address the start of the method.
   RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-  rl_method = LoadValue(rl_method, kCoreReg);
+  if (rl_method.wide) {
+    rl_method = LoadValueWide(rl_method, kCoreReg);
+  } else {
+    rl_method = LoadValue(rl_method, kCoreReg);
+  }
 
   // Load the proper value from the literal area.
   // We don't know the proper offset for the value, so pick one that will force
@@ -1676,4 +1757,458 @@ LIR *X86Mir2Lir::AddVectorLiteral(MIR *mir) {
   return new_value;
 }
 
+// ------------ ABI support: mapping of args to physical registers -------------
+RegStorage X86Mir2Lir::InToRegStorageX86_64Mapper::GetNextReg(bool is_double_or_float, bool is_wide) {
+  const RegStorage coreArgMappingToPhysicalReg[] = {rs_rX86_ARG1, rs_rX86_ARG2, rs_rX86_ARG3, rs_rX86_ARG4, rs_rX86_ARG5};
+  const int coreArgMappingToPhysicalRegSize = sizeof(coreArgMappingToPhysicalReg) / sizeof(RegStorage);
+  const RegStorage fpArgMappingToPhysicalReg[] = {rs_rX86_FARG0, rs_rX86_FARG1, rs_rX86_FARG2, rs_rX86_FARG3,
+                                                  rs_rX86_FARG4, rs_rX86_FARG5, rs_rX86_FARG6, rs_rX86_FARG7};
+  const int fpArgMappingToPhysicalRegSize = sizeof(fpArgMappingToPhysicalReg) / sizeof(RegStorage);
+
+  RegStorage result = RegStorage::InvalidReg();
+  if (is_double_or_float) {
+    if (cur_fp_reg_ < fpArgMappingToPhysicalRegSize) {
+      result = fpArgMappingToPhysicalReg[cur_fp_reg_++];
+      if (result.Valid()) {
+        result = is_wide ? RegStorage::FloatSolo64(result.GetReg()) : RegStorage::FloatSolo32(result.GetReg());
+      }
+    }
+  } else {
+    if (cur_core_reg_ < coreArgMappingToPhysicalRegSize) {
+      result = coreArgMappingToPhysicalReg[cur_core_reg_++];
+      if (result.Valid()) {
+        result = is_wide ? RegStorage::Solo64(result.GetReg()) : RegStorage::Solo32(result.GetReg());
+      }
+    }
+  }
+  return result;
+}
+
+RegStorage X86Mir2Lir::InToRegStorageMapping::Get(int in_position) {
+  DCHECK(IsInitialized());
+  auto res = mapping_.find(in_position);
+  return res != mapping_.end() ? res->second : RegStorage::InvalidReg();
+}
+
+void X86Mir2Lir::InToRegStorageMapping::Initialize(RegLocation* arg_locs, int count, InToRegStorageMapper* mapper) {
+  DCHECK(mapper != nullptr);
+  max_mapped_in_ = -1;
+  is_there_stack_mapped_ = false;
+  for (int in_position = 0; in_position < count; in_position++) {
+     RegStorage reg = mapper->GetNextReg(arg_locs[in_position].fp, arg_locs[in_position].wide);
+     if (reg.Valid()) {
+       mapping_[in_position] = reg;
+       max_mapped_in_ = std::max(max_mapped_in_, in_position);
+       if (reg.Is64BitSolo()) {
+         // We covered 2 args, so skip the next one
+         in_position++;
+       }
+     } else {
+       is_there_stack_mapped_ = true;
+     }
+  }
+  initialized_ = true;
+}
+
+RegStorage X86Mir2Lir::GetArgMappingToPhysicalReg(int arg_num) {
+  if (!Gen64Bit()) {
+    return GetCoreArgMappingToPhysicalReg(arg_num);
+  }
+
+  if (!in_to_reg_storage_mapping_.IsInitialized()) {
+    int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+    RegLocation* arg_locs = &mir_graph_->reg_location_[start_vreg];
+
+    InToRegStorageX86_64Mapper mapper;
+    in_to_reg_storage_mapping_.Initialize(arg_locs, cu_->num_ins, &mapper);
+  }
+  return in_to_reg_storage_mapping_.Get(arg_num);
+}
+
+RegStorage X86Mir2Lir::GetCoreArgMappingToPhysicalReg(int core_arg_num) {
+  // For the 32-bit internal ABI, the first 3 arguments are passed in registers.
+  // Not used for 64-bit, TODO: Move X86_32 to the same framework
+  switch (core_arg_num) {
+    case 0:
+      return rs_rX86_ARG1;
+    case 1:
+      return rs_rX86_ARG2;
+    case 2:
+      return rs_rX86_ARG3;
+    default:
+      return RegStorage::InvalidReg();
+  }
+}
+
+// ---------End of ABI support: mapping of args to physical registers -------------
+
+/*
+ * If there are any ins passed in registers that have not been promoted
+ * to a callee-save register, flush them to the frame.  Perform initial
+ * assignment of promoted arguments.
+ *
+ * ArgLocs is an array of location records describing the incoming arguments
+ * with one location record per word of argument.
+ */
+void X86Mir2Lir::FlushIns(RegLocation* ArgLocs, RegLocation rl_method) {
+  if (!Gen64Bit()) return Mir2Lir::FlushIns(ArgLocs, rl_method);
+  /*
+   * Dummy up a RegLocation for the incoming Method*
+   * It will attempt to keep kArg0 live (or copy it to home location
+   * if promoted).
+   */
+
+  RegLocation rl_src = rl_method;
+  rl_src.location = kLocPhysReg;
+  rl_src.reg = TargetReg(kArg0);
+  rl_src.home = false;
+  MarkLive(rl_src);
+  StoreValue(rl_method, rl_src);
+  // If Method* has been promoted, explicitly flush
+  if (rl_method.location == kLocPhysReg) {
+    StoreRefDisp(TargetReg(kSp), 0, TargetReg(kArg0));
+  }
+
+  if (cu_->num_ins == 0) {
+    return;
+  }
+
+  int start_vreg = cu_->num_dalvik_registers - cu_->num_ins;
+  /*
+   * Copy incoming arguments to their proper home locations.
+   * NOTE: an older version of dx had an issue in which
+   * it would reuse static method argument registers.
+   * This could result in the same Dalvik virtual register
+   * being promoted to both core and fp regs. To account for this,
+   * we only copy to the corresponding promoted physical register
+   * if it matches the type of the SSA name for the incoming
+   * argument.  It is also possible that long and double arguments
+   * end up half-promoted.  In those cases, we must flush the promoted
+   * half to memory as well.
+   */
+  for (int i = 0; i < cu_->num_ins; i++) {
+    PromotionMap* v_map = &promotion_map_[start_vreg + i];
+    RegStorage reg = RegStorage::InvalidReg();
+    // get reg corresponding to input
+    reg = GetArgMappingToPhysicalReg(i);
+
+    if (reg.Valid()) {
+      // If arriving in register
+      bool need_flush = true;
+      RegLocation* t_loc = &ArgLocs[i];
+      if ((v_map->core_location == kLocPhysReg) && !t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->core_reg), reg);
+        need_flush = false;
+      } else if ((v_map->fp_location == kLocPhysReg) && t_loc->fp) {
+        OpRegCopy(RegStorage::Solo32(v_map->FpReg), reg);
+        need_flush = false;
+      } else {
+        need_flush = true;
+      }
+
+      // For wide args, force flush if not fully promoted
+      if (t_loc->wide) {
+        PromotionMap* p_map = v_map + (t_loc->high_word ? -1 : +1);
+        // Is only half promoted?
+        need_flush |= (p_map->core_location != v_map->core_location) ||
+            (p_map->fp_location != v_map->fp_location);
+      }
+      if (need_flush) {
+        if (t_loc->wide && t_loc->fp) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+          // Increment i to skip the next one
+          i++;
+        } else if (t_loc->wide && !t_loc->fp) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(start_vreg + i), reg, k64);
+          // Increment i to skip the next one
+          i++;
+        } else {
+          Store32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), reg);
+        }
+      }
+    } else {
+      // If arriving in frame & promoted
+      if (v_map->core_location == kLocPhysReg) {
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->core_reg));
+      }
+      if (v_map->fp_location == kLocPhysReg) {
+        Load32Disp(TargetReg(kSp), SRegOffset(start_vreg + i), RegStorage::Solo32(v_map->FpReg));
+      }
+    }
+  }
+}
+
+/*
+ * Load up to 5 arguments, the first three of which will be in
+ * kArg1 .. kArg3.  On entry kArg0 contains the current method pointer,
+ * and as part of the load sequence, it must be replaced with
+ * the target method pointer.  Note, this may also be called
+ * for "range" variants if the number of arguments is 5 or fewer.
+ */
+int X86Mir2Lir::GenDalvikArgsNoRange(CallInfo* info,
+                                  int call_state, LIR** pcrLabel, NextCallInsn next_call_insn,
+                                  const MethodReference& target_method,
+                                  uint32_t vtable_idx, uintptr_t direct_code,
+                                  uintptr_t direct_method, InvokeType type, bool skip_this) {
+  if (!Gen64Bit()) {
+    return Mir2Lir::GenDalvikArgsNoRange(info,
+                                  call_state, pcrLabel, next_call_insn,
+                                  target_method,
+                                  vtable_idx, direct_code,
+                                  direct_method, type, skip_this);
+  }
+  return GenDalvikArgsRange(info,
+                       call_state, pcrLabel, next_call_insn,
+                       target_method,
+                       vtable_idx, direct_code,
+                       direct_method, type, skip_this);
+}
+
+/*
+ * May have 0+ arguments (also used for jumbo).  Note that
+ * source virtual registers may be in physical registers, so may
+ * need to be flushed to home location before copying.  This
+ * applies to arg3 and above (see below).
+ *
+ * Two general strategies:
+ *    If < 20 arguments
+ *       Pass args 3-18 using vldm/vstm block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *    If 20+ arguments
+ *       Pass args arg19+ using memcpy block copy
+ *       Pass arg0, arg1 & arg2 in kArg1-kArg3
+ *
+ */
+int X86Mir2Lir::GenDalvikArgsRange(CallInfo* info, int call_state,
+                                LIR** pcrLabel, NextCallInsn next_call_insn,
+                                const MethodReference& target_method,
+                                uint32_t vtable_idx, uintptr_t direct_code, uintptr_t direct_method,
+                                InvokeType type, bool skip_this) {
+  if (!Gen64Bit()) {
+    return Mir2Lir::GenDalvikArgsRange(info, call_state,
+                                pcrLabel, next_call_insn,
+                                target_method,
+                                vtable_idx, direct_code, direct_method,
+                                type, skip_this);
+  }
+
+  /* If no arguments, just return */
+  if (info->num_arg_words == 0)
+    return call_state;
+
+  const int start_index = skip_this ? 1 : 0;
+
+  InToRegStorageX86_64Mapper mapper;
+  InToRegStorageMapping in_to_reg_storage_mapping;
+  in_to_reg_storage_mapping.Initialize(info->args, info->num_arg_words, &mapper);
+  const int last_mapped_in = in_to_reg_storage_mapping.GetMaxMappedIn();
+  const int size_of_the_last_mapped = last_mapped_in == -1 ? 1 :
+          in_to_reg_storage_mapping.Get(last_mapped_in).Is64BitSolo() ? 2 : 1;
+  int regs_left_to_pass_via_stack = info->num_arg_words - (last_mapped_in + size_of_the_last_mapped);
+
+  // Fisrt of all, check whether it make sense to use bulk copying
+  // Optimization is aplicable only for range case
+  // TODO: make a constant instead of 2
+  if (info->is_range && regs_left_to_pass_via_stack >= 2) {
+    // Scan the rest of the args - if in phys_reg flush to memory
+    for (int next_arg = last_mapped_in + size_of_the_last_mapped; next_arg < info->num_arg_words;) {
+      RegLocation loc = info->args[next_arg];
+      if (loc.wide) {
+        loc = UpdateLocWide(loc);
+        if (loc.location == kLocPhysReg) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k64);
+        }
+        next_arg += 2;
+      } else {
+        loc = UpdateLoc(loc);
+        if (loc.location == kLocPhysReg) {
+          StoreBaseDisp(TargetReg(kSp), SRegOffset(loc.s_reg_low), loc.reg, k32);
+        }
+        next_arg++;
+      }
+    }
+
+    // Logic below assumes that Method pointer is at offset zero from SP.
+    DCHECK_EQ(VRegOffset(static_cast<int>(kVRegMethodPtrBaseReg)), 0);
+
+    // The rest can be copied together
+    int start_offset = SRegOffset(info->args[last_mapped_in + size_of_the_last_mapped].s_reg_low);
+    int outs_offset = StackVisitor::GetOutVROffset(last_mapped_in + size_of_the_last_mapped, cu_->instruction_set);
+
+    int current_src_offset = start_offset;
+    int current_dest_offset = outs_offset;
+
+    while (regs_left_to_pass_via_stack > 0) {
+      // This is based on the knowledge that the stack itself is 16-byte aligned.
+      bool src_is_16b_aligned = (current_src_offset & 0xF) == 0;
+      bool dest_is_16b_aligned = (current_dest_offset & 0xF) == 0;
+      size_t bytes_to_move;
+
+      /*
+       * The amount to move defaults to 32-bit. If there are 4 registers left to move, then do a
+       * a 128-bit move because we won't get the chance to try to aligned. If there are more than
+       * 4 registers left to move, consider doing a 128-bit only if either src or dest are aligned.
+       * We do this because we could potentially do a smaller move to align.
+       */
+      if (regs_left_to_pass_via_stack == 4 ||
+          (regs_left_to_pass_via_stack > 4 && (src_is_16b_aligned || dest_is_16b_aligned))) {
+        // Moving 128-bits via xmm register.
+        bytes_to_move = sizeof(uint32_t) * 4;
+
+        // Allocate a free xmm temp. Since we are working through the calling sequence,
+        // we expect to have an xmm temporary available.  AllocTempDouble will abort if
+        // there are no free registers.
+        RegStorage temp = AllocTempDouble();
+
+        LIR* ld1 = nullptr;
+        LIR* ld2 = nullptr;
+        LIR* st1 = nullptr;
+        LIR* st2 = nullptr;
+
+        /*
+         * The logic is similar for both loads and stores. If we have 16-byte alignment,
+         * do an aligned move. If we have 8-byte alignment, then do the move in two
+         * parts. This approach prevents possible cache line splits. Finally, fall back
+         * to doing an unaligned move. In most cases we likely won't split the cache
+         * line but we cannot prove it and thus take a conservative approach.
+         */
+        bool src_is_8b_aligned = (current_src_offset & 0x7) == 0;
+        bool dest_is_8b_aligned = (current_dest_offset & 0x7) == 0;
+
+        if (src_is_16b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovA128FP);
+        } else if (src_is_8b_aligned) {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovLo128FP);
+          ld2 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset + (bytes_to_move >> 1),
+                            kMovHi128FP);
+        } else {
+          ld1 = OpMovRegMem(temp, TargetReg(kSp), current_src_offset, kMovU128FP);
+        }
+
+        if (dest_is_16b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovA128FP);
+        } else if (dest_is_8b_aligned) {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovLo128FP);
+          st2 = OpMovMemReg(TargetReg(kSp), current_dest_offset + (bytes_to_move >> 1),
+                            temp, kMovHi128FP);
+        } else {
+          st1 = OpMovMemReg(TargetReg(kSp), current_dest_offset, temp, kMovU128FP);
+        }
+
+        // TODO If we could keep track of aliasing information for memory accesses that are wider
+        // than 64-bit, we wouldn't need to set up a barrier.
+        if (ld1 != nullptr) {
+          if (ld2 != nullptr) {
+            // For 64-bit load we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(ld1, current_src_offset >> 2, true, true);
+            AnnotateDalvikRegAccess(ld2, (current_src_offset + (bytes_to_move >> 1)) >> 2, true, true);
+          } else {
+            // Set barrier for 128-bit load.
+            SetMemRefType(ld1, true /* is_load */, kDalvikReg);
+            ld1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+        if (st1 != nullptr) {
+          if (st2 != nullptr) {
+            // For 64-bit store we can actually set up the aliasing information.
+            AnnotateDalvikRegAccess(st1, current_dest_offset >> 2, false, true);
+            AnnotateDalvikRegAccess(st2, (current_dest_offset + (bytes_to_move >> 1)) >> 2, false, true);
+          } else {
+            // Set barrier for 128-bit store.
+            SetMemRefType(st1, false /* is_load */, kDalvikReg);
+            st1->u.m.def_mask = ENCODE_ALL;
+          }
+        }
+
+        // Free the temporary used for the data movement.
+        FreeTemp(temp);
+      } else {
+        // Moving 32-bits via general purpose register.
+        bytes_to_move = sizeof(uint32_t);
+
+        // Instead of allocating a new temp, simply reuse one of the registers being used
+        // for argument passing.
+        RegStorage temp = TargetReg(kArg3);
+
+        // Now load the argument VR and store to the outs.
+        Load32Disp(TargetReg(kSp), current_src_offset, temp);
+        Store32Disp(TargetReg(kSp), current_dest_offset, temp);
+      }
+
+      current_src_offset += bytes_to_move;
+      current_dest_offset += bytes_to_move;
+      regs_left_to_pass_via_stack -= (bytes_to_move >> 2);
+    }
+    DCHECK_EQ(regs_left_to_pass_via_stack, 0);
+  }
+
+  // Now handle rest not registers if they are
+  if (in_to_reg_storage_mapping.IsThereStackMapped()) {
+    RegStorage regSingle = TargetReg(kArg2);
+    RegStorage regWide = RegStorage::Solo64(TargetReg(kArg3).GetReg());
+    for (int i = start_index; i <= last_mapped_in + regs_left_to_pass_via_stack; i++) {
+      RegLocation rl_arg = info->args[i];
+      rl_arg = UpdateRawLoc(rl_arg);
+      RegStorage reg = in_to_reg_storage_mapping.Get(i);
+      if (!reg.Valid()) {
+        int out_offset = StackVisitor::GetOutVROffset(i, cu_->instruction_set);
+
+        if (rl_arg.wide) {
+          if (rl_arg.location == kLocPhysReg) {
+            StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k64);
+          } else {
+            LoadValueDirectWideFixed(rl_arg, regWide);
+            StoreBaseDisp(TargetReg(kSp), out_offset, regWide, k64);
+          }
+          i++;
+        } else {
+          if (rl_arg.location == kLocPhysReg) {
+            StoreBaseDisp(TargetReg(kSp), out_offset, rl_arg.reg, k32);
+          } else {
+            LoadValueDirectFixed(rl_arg, regSingle);
+            StoreBaseDisp(TargetReg(kSp), out_offset, regSingle, k32);
+          }
+        }
+        call_state = next_call_insn(cu_, info, call_state, target_method,
+                                    vtable_idx, direct_code, direct_method, type);
+      }
+    }
+  }
+
+  // Finish with mapped registers
+  for (int i = start_index; i <= last_mapped_in; i++) {
+    RegLocation rl_arg = info->args[i];
+    rl_arg = UpdateRawLoc(rl_arg);
+    RegStorage reg = in_to_reg_storage_mapping.Get(i);
+    if (reg.Valid()) {
+      if (rl_arg.wide) {
+        LoadValueDirectWideFixed(rl_arg, reg);
+        i++;
+      } else {
+        LoadValueDirectFixed(rl_arg, reg);
+      }
+      call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                               direct_code, direct_method, type);
+    }
+  }
+
+  call_state = next_call_insn(cu_, info, call_state, target_method, vtable_idx,
+                           direct_code, direct_method, type);
+  if (pcrLabel) {
+    if (Runtime::Current()->ExplicitNullChecks()) {
+      *pcrLabel = GenExplicitNullCheck(TargetReg(kArg1), info->opt_flags);
+    } else {
+      *pcrLabel = nullptr;
+      // In lieu of generating a check for kArg1 being null, we need to
+      // perform a load when doing implicit checks.
+      RegStorage tmp = AllocTemp();
+      Load32Disp(TargetReg(kArg1), 0, tmp);
+      MarkPossibleNullPointerException(info->opt_flags);
+      FreeTemp(tmp);
+    }
+  }
+  return call_state;
+}
+
 }  // namespace art
+
diff --git a/compiler/dex/quick/x86/utility_x86.cc b/compiler/dex/quick/x86/utility_x86.cc
index 618b3a5987..d074d8104d 100644
--- a/compiler/dex/quick/x86/utility_x86.cc
+++ b/compiler/dex/quick/x86/utility_x86.cc
@@ -89,11 +89,8 @@ LIR* X86Mir2Lir::LoadConstantNoClobber(RegStorage r_dest, int value) {
     res = NewLIR2(kX86Xor32RR, r_dest.GetReg(), r_dest.GetReg());
   } else {
     // Note, there is no byte immediate form of a 32 bit immediate move.
-    if (r_dest.Is64Bit()) {
-      res = NewLIR2(kX86Mov64RI, r_dest.GetReg(), value);
-    } else {
-      res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
-    }
+    // 64-bit immediate is not supported by LIR structure
+    res = NewLIR2(kX86Mov32RI, r_dest.GetReg(), value);
   }
 
   if (r_dest_save.IsFloat()) {
@@ -120,8 +117,8 @@ LIR* X86Mir2Lir::OpCondBranch(ConditionCode cc, LIR* target) {
 LIR* X86Mir2Lir::OpReg(OpKind op, RegStorage r_dest_src) {
   X86OpCode opcode = kX86Bkpt;
   switch (op) {
-    case kOpNeg: opcode = kX86Neg32R; break;
-    case kOpNot: opcode = kX86Not32R; break;
+    case kOpNeg: opcode = r_dest_src.Is64Bit() ? kX86Neg64R : kX86Neg32R; break;
+    case kOpNot: opcode = r_dest_src.Is64Bit() ? kX86Not64R : kX86Not32R; break;
     case kOpRev: opcode = kX86Bswap32R; break;
     case kOpBlx: opcode = kX86CallR; break;
     default:
@@ -138,6 +135,9 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
     switch (op) {
       case kOpAdd: opcode = byte_imm ? kX86Add64RI8 : kX86Add64RI; break;
       case kOpSub: opcode = byte_imm ? kX86Sub64RI8 : kX86Sub64RI; break;
+      case kOpLsl: opcode = kX86Sal64RI; break;
+      case kOpLsr: opcode = kX86Shr64RI; break;
+      case kOpAsr: opcode = kX86Sar64RI; break;
       default:
         LOG(FATAL) << "Bad case in OpRegImm (64-bit) " << op;
     }
@@ -189,6 +189,7 @@ LIR* X86Mir2Lir::OpRegImm(OpKind op, RegStorage r_dest_src1, int value) {
 }
 
 LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2) {
+    bool is64Bit = r_dest_src1.Is64Bit();
     X86OpCode opcode = kX86Nop;
     bool src2_must_be_cx = false;
     switch (op) {
@@ -207,33 +208,34 @@ LIR* X86Mir2Lir::OpRegReg(OpKind op, RegStorage r_dest_src1, RegStorage r_src2)
         OpReg(kOpRev, r_dest_src1);
         return OpRegImm(kOpAsr, r_dest_src1, 16);
         // X86 binary opcodes
-      case kOpSub: opcode = kX86Sub32RR; break;
-      case kOpSbc: opcode = kX86Sbb32RR; break;
-      case kOpLsl: opcode = kX86Sal32RC; src2_must_be_cx = true; break;
-      case kOpLsr: opcode = kX86Shr32RC; src2_must_be_cx = true; break;
-      case kOpAsr: opcode = kX86Sar32RC; src2_must_be_cx = true; break;
-      case kOpMov: opcode = kX86Mov32RR; break;
-      case kOpCmp: opcode = kX86Cmp32RR; break;
-      case kOpAdd: opcode = kX86Add32RR; break;
-      case kOpAdc: opcode = kX86Adc32RR; break;
-      case kOpAnd: opcode = kX86And32RR; break;
-      case kOpOr:  opcode = kX86Or32RR; break;
-      case kOpXor: opcode = kX86Xor32RR; break;
+      case kOpSub: opcode = is64Bit ? kX86Sub64RR : kX86Sub32RR; break;
+      case kOpSbc: opcode = is64Bit ? kX86Sbb64RR : kX86Sbb32RR; break;
+      case kOpLsl: opcode = is64Bit ? kX86Sal64RC : kX86Sal32RC; src2_must_be_cx = true; break;
+      case kOpLsr: opcode = is64Bit ? kX86Shr64RC : kX86Shr32RC; src2_must_be_cx = true; break;
+      case kOpAsr: opcode = is64Bit ? kX86Sar64RC : kX86Sar32RC; src2_must_be_cx = true; break;
+      case kOpMov: opcode = is64Bit ? kX86Mov64RR : kX86Mov32RR; break;
+      case kOpCmp: opcode = is64Bit ? kX86Cmp64RR : kX86Cmp32RR; break;
+      case kOpAdd: opcode = is64Bit ? kX86Add64RR : kX86Add32RR; break;
+      case kOpAdc: opcode = is64Bit ? kX86Adc64RR : kX86Adc32RR; break;
+      case kOpAnd: opcode = is64Bit ? kX86And64RR : kX86And32RR; break;
+      case kOpOr:  opcode = is64Bit ? kX86Or64RR : kX86Or32RR; break;
+      case kOpXor: opcode = is64Bit ? kX86Xor64RR : kX86Xor32RR; break;
       case kOp2Byte:
         // TODO: there are several instances of this check.  A utility function perhaps?
         // TODO: Similar to Arm's reg < 8 check.  Perhaps add attribute checks to RegStorage?
         // Use shifts instead of a byte operand if the source can't be byte accessed.
         if (r_src2.GetRegNum() >= rs_rX86_SP.GetRegNum()) {
-          NewLIR2(kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
-          NewLIR2(kX86Sal32RI, r_dest_src1.GetReg(), 24);
-          return NewLIR2(kX86Sar32RI, r_dest_src1.GetReg(), 24);
+          NewLIR2(is64Bit ? kX86Mov64RR : kX86Mov32RR, r_dest_src1.GetReg(), r_src2.GetReg());
+          NewLIR2(is64Bit ? kX86Sal64RI : kX86Sal32RI, r_dest_src1.GetReg(), is64Bit ? 56 : 24);
+          return NewLIR2(is64Bit ? kX86Sar64RI : kX86Sar32RI, r_dest_src1.GetReg(),
+                         is64Bit ? 56 : 24);
         } else {
-          opcode = kX86Movsx8RR;
+          opcode = is64Bit ? kX86Bkpt : kX86Movsx8RR;
         }
         break;
-      case kOp2Short: opcode = kX86Movsx16RR; break;
-      case kOp2Char: opcode = kX86Movzx16RR; break;
-      case kOpMul: opcode = kX86Imul32RR; break;
+      case kOp2Short: opcode = is64Bit ? kX86Bkpt : kX86Movsx16RR; break;
+      case kOp2Char: opcode = is64Bit ? kX86Bkpt : kX86Movzx16RR; break;
+      case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RR; break;
       default:
         LOG(FATAL) << "Bad case in OpRegReg " << op;
         break;
@@ -354,16 +356,17 @@ LIR* X86Mir2Lir::OpCondRegReg(OpKind op, ConditionCode cc, RegStorage r_dest, Re
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int offset) {
+  bool is64Bit = r_dest.Is64Bit();
   X86OpCode opcode = kX86Nop;
   switch (op) {
       // X86 binary opcodes
-    case kOpSub: opcode = kX86Sub32RM; break;
-    case kOpMov: opcode = kX86Mov32RM; break;
-    case kOpCmp: opcode = kX86Cmp32RM; break;
-    case kOpAdd: opcode = kX86Add32RM; break;
-    case kOpAnd: opcode = kX86And32RM; break;
-    case kOpOr:  opcode = kX86Or32RM; break;
-    case kOpXor: opcode = kX86Xor32RM; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
     case kOp2Byte: opcode = kX86Movsx8RM; break;
     case kOp2Short: opcode = kX86Movsx16RM; break;
     case kOp2Char: opcode = kX86Movzx16RM; break;
@@ -382,63 +385,68 @@ LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegStorage r_base, int o
 LIR* X86Mir2Lir::OpMemReg(OpKind op, RegLocation rl_dest, int r_value) {
   DCHECK_NE(rl_dest.location, kLocPhysReg);
   int displacement = SRegOffset(rl_dest.s_reg_low);
+  bool is64Bit = rl_dest.wide != 0;
   X86OpCode opcode = kX86Nop;
   switch (op) {
-    case kOpSub: opcode = kX86Sub32MR; break;
-    case kOpMov: opcode = kX86Mov32MR; break;
-    case kOpCmp: opcode = kX86Cmp32MR; break;
-    case kOpAdd: opcode = kX86Add32MR; break;
-    case kOpAnd: opcode = kX86And32MR; break;
-    case kOpOr:  opcode = kX86Or32MR; break;
-    case kOpXor: opcode = kX86Xor32MR; break;
-    case kOpLsl: opcode = kX86Sal32MC; break;
-    case kOpLsr: opcode = kX86Shr32MC; break;
-    case kOpAsr: opcode = kX86Sar32MC; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64MR : kX86Sub32MR; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64MR : kX86Mov32MR; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64MR : kX86Cmp32MR; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64MR : kX86Add32MR; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64MR : kX86And32MR; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64MR : kX86Or32MR; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64MR : kX86Xor32MR; break;
+    case kOpLsl: opcode = is64Bit ? kX86Sal64MC : kX86Sal32MC; break;
+    case kOpLsr: opcode = is64Bit ? kX86Shr64MC : kX86Shr32MC; break;
+    case kOpAsr: opcode = is64Bit ? kX86Sar64MC : kX86Sar32MC; break;
     default:
       LOG(FATAL) << "Bad case in OpMemReg " << op;
       break;
   }
   LIR *l = NewLIR3(opcode, rs_rX86_SP.GetReg(), displacement, r_value);
-  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
-  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, false /* is_64bit */);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
+  AnnotateDalvikRegAccess(l, displacement >> 2, false /* is_load */, is64Bit /* is_64bit */);
   return l;
 }
 
 LIR* X86Mir2Lir::OpRegMem(OpKind op, RegStorage r_dest, RegLocation rl_value) {
   DCHECK_NE(rl_value.location, kLocPhysReg);
+  bool is64Bit = r_dest.Is64Bit();
   int displacement = SRegOffset(rl_value.s_reg_low);
   X86OpCode opcode = kX86Nop;
   switch (op) {
-    case kOpSub: opcode = kX86Sub32RM; break;
-    case kOpMov: opcode = kX86Mov32RM; break;
-    case kOpCmp: opcode = kX86Cmp32RM; break;
-    case kOpAdd: opcode = kX86Add32RM; break;
-    case kOpAnd: opcode = kX86And32RM; break;
-    case kOpOr:  opcode = kX86Or32RM; break;
-    case kOpXor: opcode = kX86Xor32RM; break;
-    case kOpMul: opcode = kX86Imul32RM; break;
+    case kOpSub: opcode = is64Bit ? kX86Sub64RM : kX86Sub32RM; break;
+    case kOpMov: opcode = is64Bit ? kX86Mov64RM : kX86Mov32RM; break;
+    case kOpCmp: opcode = is64Bit ? kX86Cmp64RM : kX86Cmp32RM; break;
+    case kOpAdd: opcode = is64Bit ? kX86Add64RM : kX86Add32RM; break;
+    case kOpAnd: opcode = is64Bit ? kX86And64RM : kX86And32RM; break;
+    case kOpOr:  opcode = is64Bit ? kX86Or64RM : kX86Or32RM; break;
+    case kOpXor: opcode = is64Bit ? kX86Xor64RM : kX86Xor32RM; break;
+    case kOpMul: opcode = is64Bit ? kX86Bkpt : kX86Imul32RM; break;
     default:
       LOG(FATAL) << "Bad case in OpRegMem " << op;
       break;
   }
   LIR *l = NewLIR3(opcode, r_dest.GetReg(), rs_rX86_SP.GetReg(), displacement);
-  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, false /* is_64bit */);
+  AnnotateDalvikRegAccess(l, displacement >> 2, true /* is_load */, is64Bit /* is_64bit */);
   return l;
 }
 
 LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1,
                              RegStorage r_src2) {
+  bool is64Bit = r_dest.Is64Bit();
   if (r_dest != r_src1 && r_dest != r_src2) {
     if (op == kOpAdd) {  // lea special case, except can't encode rbp as base
       if (r_src1 == r_src2) {
         OpRegCopy(r_dest, r_src1);
         return OpRegImm(kOpLsl, r_dest, 1);
       } else if (r_src1 != rs_rBP) {
-        return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src1.GetReg() /* base */,
-                       r_src2.GetReg() /* index */, 0 /* scale */, 0 /* disp */);
+        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                       r_src1.GetReg() /* base */, r_src2.GetReg() /* index */,
+                       0 /* scale */, 0 /* disp */);
       } else {
-        return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src2.GetReg() /* base */,
-                       r_src1.GetReg() /* index */, 0 /* scale */, 0 /* disp */);
+        return NewLIR5(is64Bit ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                       r_src2.GetReg() /* base */, r_src1.GetReg() /* index */,
+                       0 /* scale */, 0 /* disp */);
       }
     } else {
       OpRegCopy(r_dest, r_src1);
@@ -476,10 +484,10 @@ LIR* X86Mir2Lir::OpRegRegReg(OpKind op, RegStorage r_dest, RegStorage r_src1,
 }
 
 LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int value) {
-  if (op == kOpMul) {
+  if (op == kOpMul && !Gen64Bit()) {
     X86OpCode opcode = IS_SIMM8(value) ? kX86Imul32RRI8 : kX86Imul32RRI;
     return NewLIR3(opcode, r_dest.GetReg(), r_src.GetReg(), value);
-  } else if (op == kOpAnd) {
+  } else if (op == kOpAnd && !Gen64Bit()) {
     if (value == 0xFF && r_src.Low4()) {
       return NewLIR2(kX86Movzx8RR, r_dest.GetReg(), r_src.GetReg());
     } else if (value == 0xFFFF) {
@@ -492,8 +500,9 @@ LIR* X86Mir2Lir::OpRegRegImm(OpKind op, RegStorage r_dest, RegStorage r_src, int
       return NewLIR5(kX86Lea32RA, r_dest.GetReg(),  r5sib_no_base /* base */,
                      r_src.GetReg() /* index */, value /* scale */, 0 /* disp */);
     } else if (op == kOpAdd) {  // lea add special case
-      return NewLIR5(kX86Lea32RA, r_dest.GetReg(), r_src.GetReg() /* base */,
-                     rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */, 0 /* scale */, value /* disp */);
+      return NewLIR5(Gen64Bit() ? kX86Lea64RA : kX86Lea32RA, r_dest.GetReg(),
+                     r_src.GetReg() /* base */, rs_rX86_SP.GetReg()/*r4sib_no_index*/ /* index */,
+                     0 /* scale */, value /* disp */);
     }
     OpRegCopy(r_dest, r_src);
   }
@@ -556,7 +565,11 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
 
         // Address the start of the method
         RegLocation rl_method = mir_graph_->GetRegLocation(base_of_code_->s_reg_low);
-        rl_method = LoadValue(rl_method, kCoreReg);
+        if (rl_method.wide) {
+          rl_method = LoadValueWide(rl_method, kCoreReg);
+        } else {
+          rl_method = LoadValue(rl_method, kCoreReg);
+        }
 
         // Load the proper value from the literal area.
         // We don't know the proper offset for the value, so pick one that will force
@@ -582,8 +595,20 @@ LIR* X86Mir2Lir::LoadConstantWide(RegStorage r_dest, int64_t value) {
         }
       }
     } else {
-      res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
-      LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+      if (r_dest.IsPair()) {
+        res = LoadConstantNoClobber(r_dest.GetLow(), val_lo);
+        LoadConstantNoClobber(r_dest.GetHigh(), val_hi);
+      } else {
+        // TODO(64) make int64_t value parameter of LoadConstantNoClobber
+        if (val_lo < 0) {
+          val_hi += 1;
+        }
+        res = LoadConstantNoClobber(RegStorage::Solo32(r_dest.GetReg()), val_hi);
+        NewLIR2(kX86Sal64RI, r_dest.GetReg(), 32);
+        if (val_lo != 0) {
+          NewLIR2(kX86Add64RI, r_dest.GetReg(), val_lo);
+        }
+      }
     }
     return res;
 }
@@ -601,6 +626,8 @@ LIR* X86Mir2Lir::LoadBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int
     case kDouble:
       if (r_dest.IsFloat()) {
         opcode = is_array ? kX86MovsdRA : kX86MovsdRM;
+      } else if (!pair) {
+        opcode = is_array ? kX86Mov64RA  : kX86Mov64RM;
       } else {
         opcode = is_array ? kX86Mov32RA  : kX86Mov32RM;
       }
@@ -742,13 +769,10 @@ LIR* X86Mir2Lir::StoreBaseIndexedDisp(RegStorage r_base, RegStorage r_index, int
     case kDouble:
       if (r_src.IsFloat()) {
         opcode = is_array ? kX86MovsdAR : kX86MovsdMR;
+      } else if (!pair) {
+        opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
       } else {
-        if (Gen64Bit()) {
-          opcode = is_array ? kX86Mov64AR  : kX86Mov64MR;
-        } else {
-          // TODO(64): pair = true;
-          opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
-        }
+        opcode = is_array ? kX86Mov32AR  : kX86Mov32MR;
       }
       // TODO: double store is to unaligned address
       DCHECK_EQ((displacement & 0x3), 0);
@@ -855,7 +879,7 @@ void X86Mir2Lir::AnalyzeMIR() {
 
   // Did we need a pointer to the method code?
   if (store_method_addr_) {
-    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, false);
+    base_of_code_ = mir_graph_->GetNewCompilerTemp(kCompilerTempVR, Gen64Bit() == true);
   } else {
     base_of_code_ = nullptr;
   }
@@ -971,6 +995,7 @@ RegLocation X86Mir2Lir::UpdateLocTyped(RegLocation loc, int reg_class) {
       loc.location = kLocDalvikFrame;
     }
   }
+  DCHECK(CheckCorePoolSanity());
   return loc;
 }
 
@@ -984,7 +1009,7 @@ RegLocation X86Mir2Lir::UpdateLocWideTyped(RegLocation loc, int reg_class) {
       loc.location = kLocDalvikFrame;
     }
   }
+  DCHECK(CheckCorePoolSanity());
   return loc;
 }
-
 }  // namespace art
diff --git a/compiler/dex/quick/x86/x86_lir.h b/compiler/dex/quick/x86/x86_lir.h
index bb8df893f8..e550488a03 100644
--- a/compiler/dex/quick/x86/x86_lir.h
+++ b/compiler/dex/quick/x86/x86_lir.h
@@ -334,10 +334,18 @@ extern X86NativeRegisterPool rX86_ARG0;
 extern X86NativeRegisterPool rX86_ARG1;
 extern X86NativeRegisterPool rX86_ARG2;
 extern X86NativeRegisterPool rX86_ARG3;
+#ifdef TARGET_REX_SUPPORT
+extern X86NativeRegisterPool rX86_ARG4;
+extern X86NativeRegisterPool rX86_ARG5;
+#endif
 extern X86NativeRegisterPool rX86_FARG0;
 extern X86NativeRegisterPool rX86_FARG1;
 extern X86NativeRegisterPool rX86_FARG2;
 extern X86NativeRegisterPool rX86_FARG3;
+extern X86NativeRegisterPool rX86_FARG4;
+extern X86NativeRegisterPool rX86_FARG5;
+extern X86NativeRegisterPool rX86_FARG6;
+extern X86NativeRegisterPool rX86_FARG7;
 extern X86NativeRegisterPool rX86_RET0;
 extern X86NativeRegisterPool rX86_RET1;
 extern X86NativeRegisterPool rX86_INVOKE_TGT;
@@ -347,10 +355,16 @@ extern RegStorage rs_rX86_ARG0;
 extern RegStorage rs_rX86_ARG1;
 extern RegStorage rs_rX86_ARG2;
 extern RegStorage rs_rX86_ARG3;
+extern RegStorage rs_rX86_ARG4;
+extern RegStorage rs_rX86_ARG5;
 extern RegStorage rs_rX86_FARG0;
 extern RegStorage rs_rX86_FARG1;
 extern RegStorage rs_rX86_FARG2;
 extern RegStorage rs_rX86_FARG3;
+extern RegStorage rs_rX86_FARG4;
+extern RegStorage rs_rX86_FARG5;
+extern RegStorage rs_rX86_FARG6;
+extern RegStorage rs_rX86_FARG7;
 extern RegStorage rs_rX86_RET0;
 extern RegStorage rs_rX86_RET1;
 extern RegStorage rs_rX86_INVOKE_TGT;
@@ -363,6 +377,9 @@ const RegLocation x86_loc_c_return
 const RegLocation x86_loc_c_return_wide
     {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
      RegStorage(RegStorage::k64BitPair, rAX, rDX), INVALID_SREG, INVALID_SREG};
+const RegLocation x86_64_loc_c_return_wide
+    {kLocPhysReg, 1, 0, 0, 0, 0, 0, 0, 1,
+     RegStorage(RegStorage::k64BitSolo, rAX), INVALID_SREG, INVALID_SREG};
 const RegLocation x86_loc_c_return_float
     {kLocPhysReg, 0, 0, 0, 1, 0, 0, 0, 1,
      RegStorage(RegStorage::k32BitSolo, fr0), INVALID_SREG, INVALID_SREG};
@@ -505,6 +522,7 @@ enum X86OpCode {
   UnaryOpcode(kX86Divmod,  DaR, DaM, DaA),
   UnaryOpcode(kX86Idivmod, DaR, DaM, DaA),
   kx86Cdq32Da,
+  kx86Cqo64Da,
   kX86Bswap32R,
   kX86Push32R, kX86Pop32R,
 #undef UnaryOpcode
@@ -518,8 +536,12 @@ enum X86OpCode {
   kX86MovssAR,
   Binary0fOpCode(kX86Cvtsi2sd),  // int to double
   Binary0fOpCode(kX86Cvtsi2ss),  // int to float
+  Binary0fOpCode(kX86Cvtsqi2sd),  // long to double
+  Binary0fOpCode(kX86Cvtsqi2ss),  // long to float
   Binary0fOpCode(kX86Cvttsd2si),  // truncating double to int
   Binary0fOpCode(kX86Cvttss2si),  // truncating float to int
+  Binary0fOpCode(kX86Cvttsd2sqi),  // truncating double to long
+  Binary0fOpCode(kX86Cvttss2sqi),  // truncating float to long
   Binary0fOpCode(kX86Cvtsd2si),  // rounding double to int
   Binary0fOpCode(kX86Cvtss2si),  // rounding float to int
   Binary0fOpCode(kX86Ucomisd),  // unordered double compare
@@ -587,11 +609,15 @@ enum X86OpCode {
   kX86MovhpsRM, kX86MovhpsRA,   // load packed single FP values from m64 to high quadword of xmm
   kX86MovhpsMR, kX86MovhpsAR,   // store packed single FP values from high quadword of xmm to m64
   Binary0fOpCode(kX86Movdxr),   // move into xmm from gpr
+  Binary0fOpCode(kX86Movqxr),   // move into xmm from 64 bit gpr
+  kX86MovqrxRR, kX86MovqrxMR, kX86MovqrxAR,  // move into 64 bit reg from xmm
   kX86MovdrxRR, kX86MovdrxMR, kX86MovdrxAR,  // move into reg from xmm
+  kX86MovsxdRR, kX86MovsxdRM, kX86MovsxdRA,  // move 32 bit to 64 bit with sign extension
   kX86Set8R, kX86Set8M, kX86Set8A,  // set byte depending on condition operand
   kX86Mfence,                   // memory barrier
   Binary0fOpCode(kX86Imul16),   // 16bit multiply
   Binary0fOpCode(kX86Imul32),   // 32bit multiply
+  Binary0fOpCode(kX86Imul64),   // 64bit multiply
   kX86CmpxchgRR, kX86CmpxchgMR, kX86CmpxchgAR,  // compare and exchange
   kX86LockCmpxchgMR, kX86LockCmpxchgAR,  // locked compare and exchange
   kX86LockCmpxchg8bM, kX86LockCmpxchg8bA,  // locked compare and exchange