1 files changed, 172 insertions, 181 deletions
diff --git a/src/scanner.h b/src/scanner.h
index 045e7d27..16c3a427 100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -41,26 +41,6 @@
 namespace v8 {
 namespace internal {
 
-
-// General collection of (multi-)bit-flags that can be passed to scanners and
-// parsers to signify their (initial) mode of operation.
-enum ParsingFlags {
-  kNoParsingFlags = 0,
-  // Embed LanguageMode values in parsing flags, i.e., equivalent to:
-  // CLASSIC_MODE = 0,
-  // STRICT_MODE,
-  // EXTENDED_MODE,
-  kLanguageModeMask = 0x03,
-  kAllowLazy = 0x04,
-  kAllowNativesSyntax = 0x08,
-  kAllowModules = 0x10
-};
-
-STATIC_ASSERT((kLanguageModeMask & CLASSIC_MODE) == CLASSIC_MODE);
-STATIC_ASSERT((kLanguageModeMask & STRICT_MODE) == STRICT_MODE);
-STATIC_ASSERT((kLanguageModeMask & EXTENDED_MODE) == EXTENDED_MODE);
-
-
 // Returns the value (0 .. 15) of a hexadecimal character c.
 // If c is not a legal hexadecimal character, returns a value < 0.
 inline int HexValue(uc32 c) {
@@ -73,17 +53,15 @@ inline int HexValue(uc32 c) {
 
 
 // ---------------------------------------------------------------------
-// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
-// A code unit is a 16 bit value representing either a 16 bit code point
-// or one part of a surrogate pair that make a single 21 bit code point.
+// Buffered stream of characters, using an internal UC16 buffer.
 
-class Utf16CharacterStream {
+class UC16CharacterStream {
  public:
-  Utf16CharacterStream() : pos_(0) { }
-  virtual ~Utf16CharacterStream() { }
+  UC16CharacterStream() : pos_(0) { }
+  virtual ~UC16CharacterStream() { }
 
-  // Returns and advances past the next UTF-16 code unit in the input
-  // stream. If there are no more code units, it returns a negative
+  // Returns and advances past the next UC16 character in the input
+  // stream. If there are no more characters, it returns a negative
   // value.
   inline uc32 Advance() {
     if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
@@ -92,47 +70,47 @@ class Utf16CharacterStream {
     }
     // Note: currently the following increment is necessary to avoid a
     // parser problem! The scanner treats the final kEndOfInput as
-    // a code unit with a position, and does math relative to that
+    // a character with a position, and does math relative to that
     // position.
     pos_++;
 
     return kEndOfInput;
   }
 
-  // Return the current position in the code unit stream.
+  // Return the current position in the character stream.
   // Starts at zero.
   inline unsigned pos() const { return pos_; }
 
-  // Skips forward past the next code_unit_count UTF-16 code units
+  // Skips forward past the next character_count UC16 characters
   // in the input, or until the end of input if that comes sooner.
-  // Returns the number of code units actually skipped. If less
-  // than code_unit_count,
-  inline unsigned SeekForward(unsigned code_unit_count) {
+  // Returns the number of characters actually skipped. If less
+  // than character_count,
+  inline unsigned SeekForward(unsigned character_count) {
     unsigned buffered_chars =
         static_cast<unsigned>(buffer_end_ - buffer_cursor_);
-    if (code_unit_count <= buffered_chars) {
-      buffer_cursor_ += code_unit_count;
-      pos_ += code_unit_count;
-      return code_unit_count;
+    if (character_count <= buffered_chars) {
+      buffer_cursor_ += character_count;
+      pos_ += character_count;
+      return character_count;
     }
-    return SlowSeekForward(code_unit_count);
+    return SlowSeekForward(character_count);
   }
 
-  // Pushes back the most recently read UTF-16 code unit (or negative
+  // Pushes back the most recently read UC16 character (or negative
   // value if at end of input), i.e., the value returned by the most recent
   // call to Advance.
   // Must not be used right after calling SeekForward.
-  virtual void PushBack(int32_t code_unit) = 0;
+  virtual void PushBack(int32_t character) = 0;
 
  protected:
   static const uc32 kEndOfInput = -1;
 
-  // Ensures that the buffer_cursor_ points to the code_unit at
+  // Ensures that the buffer_cursor_ points to the character at
   // position pos_ of the input, if possible. If the position
   // is at or after the end of the input, return false. If there
-  // are more code_units available, return true.
+  // are more characters available, return true.
   virtual bool ReadBlock() = 0;
-  virtual unsigned SlowSeekForward(unsigned code_unit_count) = 0;
+  virtual unsigned SlowSeekForward(unsigned character_count) = 0;
 
   const uc16* buffer_cursor_;
   const uc16* buffer_end_;
@@ -180,24 +158,23 @@ class LiteralBuffer {
     }
   }
 
-  INLINE(void AddChar(uint32_t code_unit)) {
+  inline void AddChar(uc16 character) {
     if (position_ >= backing_store_.length()) ExpandBuffer();
     if (is_ascii_) {
-      if (code_unit < kMaxAsciiCharCodeU) {
-        backing_store_[position_] = static_cast<byte>(code_unit);
+      if (character < kMaxAsciiCharCodeU) {
+        backing_store_[position_] = static_cast<byte>(character);
         position_ += kASCIISize;
         return;
       }
-      ConvertToUtf16();
+      ConvertToUC16();
     }
-    ASSERT(code_unit < 0x10000u);
-    *reinterpret_cast<uc16*>(&backing_store_[position_]) = code_unit;
+    *reinterpret_cast<uc16*>(&backing_store_[position_]) = character;
     position_ += kUC16Size;
   }
 
   bool is_ascii() { return is_ascii_; }
 
-  Vector<const uc16> utf16_literal() {
+  Vector<const uc16> uc16_literal() {
     ASSERT(!is_ascii_);
     ASSERT((position_ & 0x1) == 0);
     return Vector<const uc16>(
@@ -239,13 +216,13 @@ class LiteralBuffer {
     backing_store_ = new_store;
   }
 
-  void ConvertToUtf16() {
+  void ConvertToUC16() {
     ASSERT(is_ascii_);
     Vector<byte> new_store;
     int new_content_size = position_ * kUC16Size;
     if (new_content_size >= backing_store_.length()) {
-      // Ensure room for all currently read code units as UC16 as well
-      // as the code unit about to be stored.
+      // Ensure room for all currently read characters as UC16 as well
+      // as the character about to be stored.
       new_store = Vector<byte>::New(NewCapacity(new_content_size));
     } else {
       new_store = backing_store_;
@@ -272,32 +249,35 @@ class LiteralBuffer {
 
 
 // ----------------------------------------------------------------------------
-// JavaScript Scanner.
+// Scanner base-class.
 
+// Generic functionality used by both JSON and JavaScript scanners.
 class Scanner {
  public:
-  // Scoped helper for literal recording. Automatically drops the literal
-  // if aborting the scanning before it's complete.
+  // -1 is outside of the range of any real source code.
+  static const int kNoOctalLocation = -1;
+
+  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+
   class LiteralScope {
    public:
-    explicit LiteralScope(Scanner* self)
-        : scanner_(self), complete_(false) {
-      scanner_->StartLiteral();
-    }
-     ~LiteralScope() {
-       if (!complete_) scanner_->DropLiteral();
-     }
-    void Complete() {
-      scanner_->TerminateLiteral();
-      complete_ = true;
-    }
+    explicit LiteralScope(Scanner* self);
+    ~LiteralScope();
+    void Complete();
 
    private:
     Scanner* scanner_;
     bool complete_;
   };
 
-  // Representation of an interval of source positions.
+  explicit Scanner(UnicodeCache* scanner_contants);
+
+  // Returns the current token again.
+  Token::Value current_token() { return current_.token; }
+
+  // One token look-ahead (past the token returned by Next()).
+  Token::Value peek() const { return next_.token; }
+
   struct Location {
     Location(int b, int e) : beg_pos(b), end_pos(e) { }
     Location() : beg_pos(0), end_pos(0) { }
@@ -312,39 +292,28 @@ class Scanner {
     int end_pos;
   };
 
-  // -1 is outside of the range of any real source code.
-  static const int kNoOctalLocation = -1;
-
-  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
-
-  explicit Scanner(UnicodeCache* scanner_contants);
-
-  void Initialize(Utf16CharacterStream* source);
-
-  // Returns the next token and advances input.
-  Token::Value Next();
-  // Returns the current token again.
-  Token::Value current_token() { return current_.token; }
   // Returns the location information for the current token
-  // (the token last returned by Next()).
+  // (the token returned by Next()).
   Location location() const { return current_.location; }
+  Location peek_location() const { return next_.location; }
+
   // Returns the literal string, if any, for the current token (the
-  // token last returned by Next()). The string is 0-terminated.
-  // Literal strings are collected for identifiers, strings, and
-  // numbers.
+  // token returned by Next()). The string is 0-terminated and in
+  // UTF-8 format; they may contain 0-characters. Literal strings are
+  // collected for identifiers, strings, and numbers.
   // These functions only give the correct result if the literal
   // was scanned between calls to StartLiteral() and TerminateLiteral().
-  Vector<const char> literal_ascii_string() {
+  bool is_literal_ascii() {
     ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->ascii_literal();
+    return current_.literal_chars->is_ascii();
   }
-  Vector<const uc16> literal_utf16_string() {
+  Vector<const char> literal_ascii_string() {
     ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->utf16_literal();
+    return current_.literal_chars->ascii_literal();
   }
-  bool is_literal_ascii() {
+  Vector<const uc16> literal_uc16_string() {
     ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->is_ascii();
+    return current_.literal_chars->uc16_literal();
   }
   int literal_length() const {
     ASSERT_NOT_NULL(current_.literal_chars);
@@ -361,26 +330,19 @@ class Scanner {
     return current_.literal_chars->length() != source_length;
   }
 
-  // Similar functions for the upcoming token.
-
-  // One token look-ahead (past the token returned by Next()).
-  Token::Value peek() const { return next_.token; }
-
-  Location peek_location() const { return next_.location; }
-
   // Returns the literal string for the next token (the token that
   // would be returned if Next() were called).
-  Vector<const char> next_literal_ascii_string() {
+  bool is_next_literal_ascii() {
     ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->ascii_literal();
+    return next_.literal_chars->is_ascii();
   }
-  Vector<const uc16> next_literal_utf16_string() {
+  Vector<const char> next_literal_ascii_string() {
     ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->utf16_literal();
+    return next_.literal_chars->ascii_literal();
   }
-  bool is_next_literal_ascii() {
+  Vector<const uc16> next_literal_uc16_string() {
     ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->is_ascii();
+    return next_.literal_chars->uc16_literal();
   }
   int next_literal_length() const {
     ASSERT_NOT_NULL(next_.literal_chars);
@@ -391,52 +353,7 @@ class Scanner {
 
   static const int kCharacterLookaheadBufferSize = 1;
 
-  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
-  uc32 ScanOctalEscape(uc32 c, int length);
-
-  // Returns the location of the last seen octal literal.
-  Location octal_position() const { return octal_pos_; }
-  void clear_octal_position() { octal_pos_ = Location::invalid(); }
-
-  // Seek forward to the given position.  This operation does not
-  // work in general, for instance when there are pushed back
-  // characters, but works for seeking forward until simple delimiter
-  // tokens, which is what it is used for.
-  void SeekForward(int pos);
-
-  bool HarmonyScoping() const {
-    return harmony_scoping_;
-  }
-  void SetHarmonyScoping(bool scoping) {
-    harmony_scoping_ = scoping;
-  }
-  bool HarmonyModules() const {
-    return harmony_modules_;
-  }
-  void SetHarmonyModules(bool modules) {
-    harmony_modules_ = modules;
-  }
-
-
-  // Returns true if there was a line terminator before the peek'ed token,
-  // possibly inside a multi-line comment.
-  bool HasAnyLineTerminatorBeforeNext() const {
-    return has_line_terminator_before_next_ ||
-           has_multiline_comment_before_next_;
-  }
-
-  // Scans the input as a regular expression pattern, previous
-  // character(s) must be /(=). Returns true if a pattern is scanned.
-  bool ScanRegExpPattern(bool seen_equal);
-  // Returns true if regexp flags are scanned (always since flags can
-  // be empty).
-  bool ScanRegExpFlags();
-
-  // Tells whether the buffer contains an identifier (no escapes).
-  // Used for checking if a property name is an identifier.
-  static bool IsIdentifier(unibrow::CharacterStream* buffer);
-
- private:
+ protected:
   // The current and look-ahead token.
   struct TokenDesc {
     Token::Value token;
@@ -461,7 +378,7 @@ class Scanner {
     next_.literal_chars = free_buffer;
   }
 
-  INLINE(void AddLiteralChar(uc32 c)) {
+  inline void AddLiteralChar(uc32 c) {
     ASSERT_NOT_NULL(next_.literal_chars);
     next_.literal_chars->AddChar(c);
   }
@@ -506,14 +423,107 @@ class Scanner {
 
   uc32 ScanHexNumber(int expected_length);
 
-  // Scans a single JavaScript token.
-  void Scan();
+  // Return the current source position.
+  int source_pos() {
+    return source_->pos() - kCharacterLookaheadBufferSize;
+  }
+
+  UnicodeCache* unicode_cache_;
+
+  // Buffers collecting literal strings, numbers, etc.
+  LiteralBuffer literal_buffer1_;
+  LiteralBuffer literal_buffer2_;
+
+  TokenDesc current_;  // desc for current token (as returned by Next())
+  TokenDesc next_;     // desc for next token (one token look-ahead)
+
+  // Input stream. Must be initialized to an UC16CharacterStream.
+  UC16CharacterStream* source_;
+
+  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
+  uc32 c0_;
+};
+
+// ----------------------------------------------------------------------------
+// JavaScriptScanner - base logic for JavaScript scanning.
+
+class JavaScriptScanner : public Scanner {
+ public:
+  // A LiteralScope that disables recording of some types of JavaScript
+  // literals. If the scanner is configured to not record the specific
+  // type of literal, the scope will not call StartLiteral.
+  class LiteralScope {
+   public:
+    explicit LiteralScope(JavaScriptScanner* self)
+        : scanner_(self), complete_(false) {
+      scanner_->StartLiteral();
+    }
+     ~LiteralScope() {
+       if (!complete_) scanner_->DropLiteral();
+     }
+    void Complete() {
+      scanner_->TerminateLiteral();
+      complete_ = true;
+    }
+
+   private:
+    JavaScriptScanner* scanner_;
+    bool complete_;
+  };
+
+  explicit JavaScriptScanner(UnicodeCache* scanner_contants);
+
+  void Initialize(UC16CharacterStream* source);
 
+  // Returns the next token.
+  Token::Value Next();
+
+  // Returns true if there was a line terminator before the peek'ed token,
+  // possibly inside a multi-line comment.
+  bool HasAnyLineTerminatorBeforeNext() const {
+    return has_line_terminator_before_next_ ||
+           has_multiline_comment_before_next_;
+  }
+
+  // Scans the input as a regular expression pattern, previous
+  // character(s) must be /(=). Returns true if a pattern is scanned.
+  bool ScanRegExpPattern(bool seen_equal);
+  // Returns true if regexp flags are scanned (always since flags can
+  // be empty).
+  bool ScanRegExpFlags();
+
+  // Tells whether the buffer contains an identifier (no escapes).
+  // Used for checking if a property name is an identifier.
+  static bool IsIdentifier(unibrow::CharacterStream* buffer);
+
+  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
+  uc32 ScanOctalEscape(uc32 c, int length);
+
+  // Returns the location of the last seen octal literal
+  Location octal_position() const { return octal_pos_; }
+  void clear_octal_position() { octal_pos_ = Location::invalid(); }
+
+  // Seek forward to the given position.  This operation does not
+  // work in general, for instance when there are pushed back
+  // characters, but works for seeking forward until simple delimiter
+  // tokens, which is what it is used for.
+  void SeekForward(int pos);
+
+  bool HarmonyBlockScoping() const {
+    return harmony_block_scoping_;
+  }
+  void SetHarmonyBlockScoping(bool block_scoping) {
+    harmony_block_scoping_ = block_scoping;
+  }
+
+
+ protected:
   bool SkipWhiteSpace();
   Token::Value SkipSingleLineComment();
   Token::Value SkipMultiLineComment();
-  // Scans a possible HTML comment -- begins with '<!'.
-  Token::Value ScanHtmlComment();
+
+  // Scans a single JavaScript token.
+  void Scan();
 
   void ScanDecimalDigits();
   Token::Value ScanNumber(bool seen_period);
@@ -523,6 +533,9 @@ class Scanner {
   void ScanEscape();
   Token::Value ScanString();
 
+  // Scans a possible HTML comment -- begins with '<!'.
+  Token::Value ScanHtmlComment();
+
   // Decodes a unicode escape-sequence which is part of an identifier.
   // If the escape sequence cannot be decoded the result is kBadChar.
   uc32 ScanIdentifierUnicodeEscape();
@@ -531,30 +544,9 @@ class Scanner {
   // flags.
   bool ScanLiteralUnicodeEscape();
 
-  // Return the current source position.
-  int source_pos() {
-    return source_->pos() - kCharacterLookaheadBufferSize;
-  }
-
-  UnicodeCache* unicode_cache_;
-
-  // Buffers collecting literal strings, numbers, etc.
-  LiteralBuffer literal_buffer1_;
-  LiteralBuffer literal_buffer2_;
-
-  TokenDesc current_;  // desc for current token (as returned by Next())
-  TokenDesc next_;     // desc for next token (one token look-ahead)
-
-  // Input stream. Must be initialized to an Utf16CharacterStream.
-  Utf16CharacterStream* source_;
-
-
   // Start position of the octal literal last scanned.
   Location octal_pos_;
 
-  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
-  uc32 c0_;
-
   // Whether there is a line terminator whitespace character after
   // the current token, and  before the next. Does not count newlines
   // inside multiline comments.
@@ -562,10 +554,9 @@ class Scanner {
   // Whether there is a multi-line comment that contains a
   // line-terminator after the current token, and before the next.
   bool has_multiline_comment_before_next_;
-  // Whether we scan 'let' as a keyword for harmony block-scoped let bindings.
-  bool harmony_scoping_;
-  // Whether we scan 'module', 'import', 'export' as keywords.
-  bool harmony_modules_;
+  // Whether we scan 'let' as a keyword for harmony block scoped
+  // let bindings.
+  bool harmony_block_scoping_;
 };
 
 } }  // namespace v8::internal