From 71fb52fee246b7d511f520febbd73dc7a9bbca79 Mon Sep 17 00:00:00 2001 From: Andreas Gampe Date: Mon, 29 Dec 2014 17:43:08 -0800 Subject: ART: Optimizing compiler intrinsics Add intrinsics infrastructure to the optimizing compiler. Add almost all intrinsics supported by Quick to the x86-64 backend. Further intrinsics require more assembler support. Change-Id: I48de9b44c82886bb298d16e74e12a9506b8e8807 --- compiler/optimizing/builder.cc | 2 +- compiler/optimizing/code_generator_arm.cc | 2 +- compiler/optimizing/code_generator_arm64.cc | 2 +- compiler/optimizing/code_generator_x86.cc | 2 +- compiler/optimizing/code_generator_x86_64.cc | 89 ++- compiler/optimizing/code_generator_x86_64.h | 19 +- compiler/optimizing/inliner.cc | 4 +- compiler/optimizing/intrinsics.cc | 366 ++++++++++ compiler/optimizing/intrinsics.h | 86 +++ compiler/optimizing/intrinsics_list.h | 87 +++ compiler/optimizing/intrinsics_x86_64.cc | 984 +++++++++++++++++++++++++++ compiler/optimizing/intrinsics_x86_64.h | 83 +++ compiler/optimizing/locations.cc | 7 +- compiler/optimizing/locations.h | 11 +- compiler/optimizing/nodes.h | 61 +- compiler/optimizing/optimizing_compiler.cc | 5 + compiler/optimizing/register_allocator.cc | 15 +- 17 files changed, 1758 insertions(+), 67 deletions(-) create mode 100644 compiler/optimizing/intrinsics.cc create mode 100644 compiler/optimizing/intrinsics.h create mode 100644 compiler/optimizing/intrinsics_list.h create mode 100644 compiler/optimizing/intrinsics_x86_64.cc create mode 100644 compiler/optimizing/intrinsics_x86_64.h (limited to 'compiler/optimizing') diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index f6ca6c740..9c2facb75 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -604,7 +604,7 @@ bool HGraphBuilder::BuildInvoke(const Instruction& instruction, HInvoke* invoke = nullptr; if (optimized_invoke_type == kVirtual) { invoke = new (arena_) HInvokeVirtual( - arena_, number_of_arguments, return_type, dex_pc, table_index); + arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); } else if (optimized_invoke_type == kInterface) { invoke = new (arena_) HInvokeInterface( arena_, number_of_arguments, return_type, dex_pc, method_idx, table_index); diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index 1862061bc..c4ba0fd3e 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -1190,7 +1190,7 @@ void InstructionCodeGeneratorARM::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()); // temp = temp[index_in_cache] __ LoadFromOffset( - kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache())); + kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())); // LR = temp[offset_of_quick_compiled_code] __ LoadFromOffset(kLoadWord, LR, temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 7b19f44e7..6d2c3de5d 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1975,7 +1975,7 @@ void InstructionCodeGeneratorARM64::VisitInvokeStaticOrDirect(HInvokeStaticOrDir // Make sure that ArtMethod* is passed in W0 as per the calling convention DCHECK(temp.Is(w0)); size_t index_in_cache = mirror::Array::DataOffset(kHeapRefSize).SizeValue() + - invoke->GetIndexInDexCache() * kHeapRefSize; + invoke->GetDexMethodIndex() * kHeapRefSize; // TODO: Implement all kinds of calls: // 1) boot -> boot diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 04e36cc58..1a0df44ea 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1135,7 +1135,7 @@ void InstructionCodeGeneratorX86::VisitInvokeStaticOrDirect(HInvokeStaticOrDirec // temp = temp->dex_cache_resolved_methods_; __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value())); // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); // (temp + offset_of_quick_compiled_code)() __ call(Address( temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value())); diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 5fc24f71e..3d7f122d3 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -18,6 +18,8 @@ #include "entrypoints/quick/quick_entrypoints.h" #include "gc/accounting/card_table.h" +#include "intrinsics.h" +#include "intrinsics_x86_64.h" #include "mirror/array-inl.h" #include "mirror/art_method.h" #include "mirror/class.h" @@ -61,20 +63,6 @@ class InvokeRuntimeCallingConvention : public CallingConvention(codegen->GetAssembler())-> -class SlowPathCodeX86_64 : public SlowPathCode { - public: - SlowPathCodeX86_64() : entry_label_(), exit_label_() {} - - Label* GetEntryLabel() { return &entry_label_; } - Label* GetExitLabel() { return &exit_label_; } - - private: - Label entry_label_; - Label exit_label_; - - DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); -}; - class NullCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : instruction_(instruction) {} @@ -375,6 +363,31 @@ inline Condition X86_64Condition(IfCondition cond) { return kEqual; } +void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, + CpuRegister temp) { + // All registers are assumed to be correctly set up. + + // TODO: Implement all kinds of calls: + // 1) boot -> boot + // 2) app -> boot + // 3) app -> app + // + // Currently we implement the app -> app logic, which looks up in the resolve cache. + + // temp = method; + LoadCurrentMethod(temp); + // temp = temp->dex_cache_resolved_methods_; + __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); + // temp = temp[index_in_cache] + __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()))); + // (temp + offset_of_quick_compiled_code)() + __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( + kX86_64WordSize).SizeValue())); + + DCHECK(!IsLeafMethod()); + RecordPcInfo(invoke, invoke->GetDexPc()); +} + void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const { stream << X86_64ManagedRegister::FromCpuRegister(Register(reg)); } @@ -1123,30 +1136,31 @@ Location InvokeDexCallingConventionVisitor::GetNextLocation(Primitive::Type type } void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } -void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { - CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister(); - // TODO: Implement all kinds of calls: - // 1) boot -> boot - // 2) app -> boot - // 3) app -> app - // - // Currently we implement the app -> app logic, which looks up in the resolve cache. +static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) { + if (invoke->GetLocations()->Intrinsified()) { + IntrinsicCodeGeneratorX86_64 intrinsic(codegen); + intrinsic.Dispatch(invoke); + return true; + } + return false; +} - // temp = method; - codegen_->LoadCurrentMethod(temp); - // temp = temp->dex_cache_resolved_methods_; - __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue())); - // temp = temp[index_in_cache] - __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetIndexInDexCache()))); - // (temp + offset_of_quick_compiled_code)() - __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset( - kX86_64WordSize).SizeValue())); +void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } - DCHECK(!codegen_->IsLeafMethod()); - codegen_->RecordPcInfo(invoke, invoke->GetDexPc()); + codegen_->GenerateStaticOrDirectCall( + invoke, + invoke->GetLocations()->GetTemp(0).AsRegister()); } void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { @@ -1182,10 +1196,19 @@ void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) { } void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + IntrinsicLocationsBuilderX86_64 intrinsic(GetGraph()->GetArena()); + if (intrinsic.TryDispatch(invoke)) { + return; + } + HandleInvoke(invoke); } void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) { + if (TryGenerateIntrinsicCode(invoke, codegen_)) { + return; + } + CpuRegister temp = invoke->GetLocations()->GetTemp(0).AsRegister(); size_t method_offset = mirror::Class::EmbeddedVTableOffset().SizeValue() + invoke->GetVTableIndex() * sizeof(mirror::Class::VTableEntry); diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 343fba301..c501568a8 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -36,6 +36,8 @@ static constexpr FloatRegister kParameterFloatRegisters[] = static constexpr size_t kParameterCoreRegistersLength = arraysize(kParameterCoreRegisters); static constexpr size_t kParameterFloatRegistersLength = arraysize(kParameterFloatRegisters); +static constexpr bool kCoalescedImplicitNullCheck = false; + class InvokeDexCallingConvention : public CallingConvention { public: InvokeDexCallingConvention() : CallingConvention( @@ -67,7 +69,20 @@ class InvokeDexCallingConventionVisitor { }; class CodeGeneratorX86_64; -class SlowPathCodeX86_64; + +class SlowPathCodeX86_64 : public SlowPathCode { + public: + SlowPathCodeX86_64() : entry_label_(), exit_label_() {} + + Label* GetEntryLabel() { return &entry_label_; } + Label* GetExitLabel() { return &exit_label_; } + + private: + Label entry_label_; + Label exit_label_; + + DISALLOW_COPY_AND_ASSIGN(SlowPathCodeX86_64); +}; class ParallelMoveResolverX86_64 : public ParallelMoveResolver { public: @@ -226,6 +241,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { return false; } + void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, CpuRegister temp); + private: // Labels for each block that will be compiled. GrowableArray