// Copyright 2010 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following // disclaimer in the documentation and/or other materials provided // with the distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "v8.h" #include "api.h" #include "ast.h" #include "bootstrapper.h" #include "codegen.h" #include "compiler.h" #include "func-name-inferrer.h" #include "messages.h" #include "parser.h" #include "platform.h" #include "preparser.h" #include "runtime.h" #include "scopeinfo.h" #include "string-stream.h" #include "ast-inl.h" #include "jump-target-inl.h" namespace v8 { namespace internal { // PositionStack is used for on-stack allocation of token positions for // new expressions. Please look at ParseNewExpression. class PositionStack { public: explicit PositionStack(bool* ok) : top_(NULL), ok_(ok) {} ~PositionStack() { ASSERT(!*ok_ || is_empty()); } class Element { public: Element(PositionStack* stack, int value) { previous_ = stack->top(); value_ = value; stack->set_top(this); } private: Element* previous() { return previous_; } int value() { return value_; } friend class PositionStack; Element* previous_; int value_; }; bool is_empty() { return top_ == NULL; } int pop() { ASSERT(!is_empty()); int result = top_->value(); top_ = top_->previous(); return result; } private: Element* top() { return top_; } void set_top(Element* value) { top_ = value; } Element* top_; bool* ok_; }; RegExpBuilder::RegExpBuilder() : pending_empty_(false), characters_(NULL), terms_(), alternatives_() #ifdef DEBUG , last_added_(ADD_NONE) #endif {} void RegExpBuilder::FlushCharacters() { pending_empty_ = false; if (characters_ != NULL) { RegExpTree* atom = new RegExpAtom(characters_->ToConstVector()); characters_ = NULL; text_.Add(atom); LAST(ADD_ATOM); } } void RegExpBuilder::FlushText() { FlushCharacters(); int num_text = text_.length(); if (num_text == 0) { return; } else if (num_text == 1) { terms_.Add(text_.last()); } else { RegExpText* text = new RegExpText(); for (int i = 0; i < num_text; i++) text_.Get(i)->AppendToText(text); terms_.Add(text); } text_.Clear(); } void RegExpBuilder::AddCharacter(uc16 c) { pending_empty_ = false; if (characters_ == NULL) { characters_ = new ZoneList(4); } characters_->Add(c); LAST(ADD_CHAR); } void RegExpBuilder::AddEmpty() { pending_empty_ = true; } void RegExpBuilder::AddAtom(RegExpTree* term) { if (term->IsEmpty()) { AddEmpty(); return; } if (term->IsTextElement()) { FlushCharacters(); text_.Add(term); } else { FlushText(); terms_.Add(term); } LAST(ADD_ATOM); } void RegExpBuilder::AddAssertion(RegExpTree* assert) { FlushText(); terms_.Add(assert); LAST(ADD_ASSERT); } void RegExpBuilder::NewAlternative() { FlushTerms(); } void RegExpBuilder::FlushTerms() { FlushText(); int num_terms = terms_.length(); RegExpTree* alternative; if (num_terms == 0) { alternative = RegExpEmpty::GetInstance(); } else if (num_terms == 1) { alternative = terms_.last(); } else { alternative = new RegExpAlternative(terms_.GetList()); } alternatives_.Add(alternative); terms_.Clear(); LAST(ADD_NONE); } RegExpTree* RegExpBuilder::ToRegExp() { FlushTerms(); int num_alternatives = alternatives_.length(); if (num_alternatives == 0) { return RegExpEmpty::GetInstance(); } if (num_alternatives == 1) { return alternatives_.last(); } return new RegExpDisjunction(alternatives_.GetList()); } void RegExpBuilder::AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type) { if (pending_empty_) { pending_empty_ = false; return; } RegExpTree* atom; if (characters_ != NULL) { ASSERT(last_added_ == ADD_CHAR); // Last atom was character. Vector char_vector = characters_->ToConstVector(); int num_chars = char_vector.length(); if (num_chars > 1) { Vector prefix = char_vector.SubVector(0, num_chars - 1); text_.Add(new RegExpAtom(prefix)); char_vector = char_vector.SubVector(num_chars - 1, num_chars); } characters_ = NULL; atom = new RegExpAtom(char_vector); FlushText(); } else if (text_.length() > 0) { ASSERT(last_added_ == ADD_ATOM); atom = text_.RemoveLast(); FlushText(); } else if (terms_.length() > 0) { ASSERT(last_added_ == ADD_ATOM); atom = terms_.RemoveLast(); if (atom->max_match() == 0) { // Guaranteed to only match an empty string. LAST(ADD_TERM); if (min == 0) { return; } terms_.Add(atom); return; } } else { // Only call immediately after adding an atom or character! UNREACHABLE(); return; } terms_.Add(new RegExpQuantifier(min, max, type, atom)); LAST(ADD_TERM); } // A temporary scope stores information during parsing, just like // a plain scope. However, temporary scopes are not kept around // after parsing or referenced by syntax trees so they can be stack- // allocated and hence used by the pre-parser. class TemporaryScope BASE_EMBEDDED { public: explicit TemporaryScope(TemporaryScope** variable); ~TemporaryScope(); int NextMaterializedLiteralIndex() { int next_index = materialized_literal_count_ + JSFunction::kLiteralsPrefixSize; materialized_literal_count_++; return next_index; } int materialized_literal_count() { return materialized_literal_count_; } void SetThisPropertyAssignmentInfo( bool only_simple_this_property_assignments, Handle this_property_assignments) { only_simple_this_property_assignments_ = only_simple_this_property_assignments; this_property_assignments_ = this_property_assignments; } bool only_simple_this_property_assignments() { return only_simple_this_property_assignments_; } Handle this_property_assignments() { return this_property_assignments_; } void AddProperty() { expected_property_count_++; } int expected_property_count() { return expected_property_count_; } void AddLoop() { loop_count_++; } bool ContainsLoops() const { return loop_count_ > 0; } private: // Captures the number of literals that need materialization in the // function. Includes regexp literals, and boilerplate for object // and array literals. int materialized_literal_count_; // Properties count estimation. int expected_property_count_; // Keeps track of assignments to properties of this. Used for // optimizing constructors. bool only_simple_this_property_assignments_; Handle this_property_assignments_; // Captures the number of loops inside the scope. int loop_count_; // Bookkeeping TemporaryScope** variable_; TemporaryScope* parent_; }; TemporaryScope::TemporaryScope(TemporaryScope** variable) : materialized_literal_count_(0), expected_property_count_(0), only_simple_this_property_assignments_(false), this_property_assignments_(Factory::empty_fixed_array()), loop_count_(0), variable_(variable), parent_(*variable) { *variable = this; } TemporaryScope::~TemporaryScope() { *variable_ = parent_; } Handle Parser::LookupSymbol(int symbol_id, Vector string) { // Length of symbol cache is the number of identified symbols. // If we are larger than that, or negative, it's not a cached symbol. // This might also happen if there is no preparser symbol data, even // if there is some preparser data. if (static_cast(symbol_id) >= static_cast(symbol_cache_.length())) { return Factory::LookupSymbol(string); } return LookupCachedSymbol(symbol_id, string); } Handle Parser::LookupCachedSymbol(int symbol_id, Vector string) { // Make sure the cache is large enough to hold the symbol identifier. if (symbol_cache_.length() <= symbol_id) { // Increase length to index + 1. symbol_cache_.AddBlock(Handle::null(), symbol_id + 1 - symbol_cache_.length()); } Handle result = symbol_cache_.at(symbol_id); if (result.is_null()) { result = Factory::LookupSymbol(string); symbol_cache_.at(symbol_id) = result; return result; } Counters::total_preparse_symbols_skipped.Increment(); return result; } FunctionEntry ScriptDataImpl::GetFunctionEntry(int start) { // The current pre-data entry must be a FunctionEntry with the given // start position. if ((function_index_ + FunctionEntry::kSize <= store_.length()) && (static_cast(store_[function_index_]) == start)) { int index = function_index_; function_index_ += FunctionEntry::kSize; return FunctionEntry(store_.SubVector(index, index + FunctionEntry::kSize)); } return FunctionEntry(); } int ScriptDataImpl::GetSymbolIdentifier() { return ReadNumber(&symbol_data_); } bool ScriptDataImpl::SanityCheck() { // Check that the header data is valid and doesn't specify // point to positions outside the store. if (store_.length() < PreparseDataConstants::kHeaderSize) return false; if (magic() != PreparseDataConstants::kMagicNumber) return false; if (version() != PreparseDataConstants::kCurrentVersion) return false; if (has_error()) { // Extra sane sanity check for error message encoding. if (store_.length() <= PreparseDataConstants::kHeaderSize + PreparseDataConstants::kMessageTextPos) { return false; } if (Read(PreparseDataConstants::kMessageStartPos) > Read(PreparseDataConstants::kMessageEndPos)) { return false; } unsigned arg_count = Read(PreparseDataConstants::kMessageArgCountPos); int pos = PreparseDataConstants::kMessageTextPos; for (unsigned int i = 0; i <= arg_count; i++) { if (store_.length() <= PreparseDataConstants::kHeaderSize + pos) { return false; } int length = static_cast(Read(pos)); if (length < 0) return false; pos += 1 + length; } if (store_.length() < PreparseDataConstants::kHeaderSize + pos) { return false; } return true; } // Check that the space allocated for function entries is sane. int functions_size = static_cast(store_[PreparseDataConstants::kFunctionsSizeOffset]); if (functions_size < 0) return false; if (functions_size % FunctionEntry::kSize != 0) return false; // Check that the count of symbols is non-negative. int symbol_count = static_cast(store_[PreparseDataConstants::kSymbolCountOffset]); if (symbol_count < 0) return false; // Check that the total size has room for header and function entries. int minimum_size = PreparseDataConstants::kHeaderSize + functions_size; if (store_.length() < minimum_size) return false; return true; } const char* ScriptDataImpl::ReadString(unsigned* start, int* chars) { int length = start[0]; char* result = NewArray(length + 1); for (int i = 0; i < length; i++) { result[i] = start[i + 1]; } result[length] = '\0'; if (chars != NULL) *chars = length; return result; } Scanner::Location ScriptDataImpl::MessageLocation() { int beg_pos = Read(PreparseDataConstants::kMessageStartPos); int end_pos = Read(PreparseDataConstants::kMessageEndPos); return Scanner::Location(beg_pos, end_pos); } const char* ScriptDataImpl::BuildMessage() { unsigned* start = ReadAddress(PreparseDataConstants::kMessageTextPos); return ReadString(start, NULL); } Vector ScriptDataImpl::BuildArgs() { int arg_count = Read(PreparseDataConstants::kMessageArgCountPos); const char** array = NewArray(arg_count); // Position after text found by skipping past length field and // length field content words. int pos = PreparseDataConstants::kMessageTextPos + 1 + Read(PreparseDataConstants::kMessageTextPos); for (int i = 0; i < arg_count; i++) { int count = 0; array[i] = ReadString(ReadAddress(pos), &count); pos += count + 1; } return Vector(array, arg_count); } unsigned ScriptDataImpl::Read(int position) { return store_[PreparseDataConstants::kHeaderSize + position]; } unsigned* ScriptDataImpl::ReadAddress(int position) { return &store_[PreparseDataConstants::kHeaderSize + position]; } Scope* Parser::NewScope(Scope* parent, Scope::Type type, bool inside_with) { Scope* result = new Scope(parent, type); result->Initialize(inside_with); return result; } // ---------------------------------------------------------------------------- // Target is a support class to facilitate manipulation of the // Parser's target_stack_ (the stack of potential 'break' and // 'continue' statement targets). Upon construction, a new target is // added; it is removed upon destruction. class Target BASE_EMBEDDED { public: Target(Target** variable, AstNode* node) : variable_(variable), node_(node), previous_(*variable) { *variable = this; } ~Target() { *variable_ = previous_; } Target* previous() { return previous_; } AstNode* node() { return node_; } private: Target** variable_; AstNode* node_; Target* previous_; }; class TargetScope BASE_EMBEDDED { public: explicit TargetScope(Target** variable) : variable_(variable), previous_(*variable) { *variable = NULL; } ~TargetScope() { *variable_ = previous_; } private: Target** variable_; Target* previous_; }; // ---------------------------------------------------------------------------- // LexicalScope is a support class to facilitate manipulation of the // Parser's scope stack. The constructor sets the parser's top scope // to the incoming scope, and the destructor resets it. class LexicalScope BASE_EMBEDDED { public: LexicalScope(Scope** scope_variable, int* with_nesting_level_variable, Scope* scope) : scope_variable_(scope_variable), with_nesting_level_variable_(with_nesting_level_variable), prev_scope_(*scope_variable), prev_level_(*with_nesting_level_variable) { *scope_variable = scope; *with_nesting_level_variable = 0; } ~LexicalScope() { (*scope_variable_)->Leave(); *scope_variable_ = prev_scope_; *with_nesting_level_variable_ = prev_level_; } private: Scope** scope_variable_; int* with_nesting_level_variable_; Scope* prev_scope_; int prev_level_; }; // ---------------------------------------------------------------------------- // The CHECK_OK macro is a convenient macro to enforce error // handling for functions that may fail (by returning !*ok). // // CAUTION: This macro appends extra statements after a call, // thus it must never be used where only a single statement // is correct (e.g. an if statement branch w/o braces)! #define CHECK_OK ok); \ if (!*ok) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work #undef DUMMY #define CHECK_FAILED /**/); \ if (failed_) return NULL; \ ((void)0 #define DUMMY ) // to make indentation work #undef DUMMY // ---------------------------------------------------------------------------- // Implementation of Parser Parser::Parser(Handle