summaryrefslogtreecommitdiffstats
path: root/src/jsregexp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/jsregexp.cc')
-rw-r--r--src/jsregexp.cc183
1 files changed, 106 insertions, 77 deletions
diff --git a/src/jsregexp.cc b/src/jsregexp.cc
index b271b027..06aae352 100644
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -35,7 +35,6 @@
#include "platform.h"
#include "string-search.h"
#include "runtime.h"
-#include "top.h"
#include "compilation-cache.h"
#include "string-stream.h"
#include "parser.h"
@@ -51,6 +50,8 @@
#include "x64/regexp-macro-assembler-x64.h"
#elif V8_TARGET_ARCH_ARM
#include "arm/regexp-macro-assembler-arm.h"
+#elif V8_TARGET_ARCH_MIPS
+#include "mips/regexp-macro-assembler-mips.h"
#else
#error Unsupported target architecture.
#endif
@@ -62,7 +63,6 @@
namespace v8 {
namespace internal {
-
Handle<Object> RegExpImpl::CreateRegExpLiteral(Handle<JSFunction> constructor,
Handle<String> pattern,
Handle<String> flags,
@@ -97,12 +97,14 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> error_text,
const char* message) {
- Handle<FixedArray> elements = Factory::NewFixedArray(2);
+ Isolate* isolate = re->GetIsolate();
+ Factory* factory = isolate->factory();
+ Handle<FixedArray> elements = factory->NewFixedArray(2);
elements->set(0, *pattern);
elements->set(1, *error_text);
- Handle<JSArray> array = Factory::NewJSArrayWithElements(elements);
- Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
- Top::Throw(*regexp_err);
+ Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
+ Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
+ isolate->Throw(*regexp_err);
}
@@ -112,10 +114,12 @@ static inline void ThrowRegExpException(Handle<JSRegExp> re,
Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
Handle<String> pattern,
Handle<String> flag_str) {
+ Isolate* isolate = re->GetIsolate();
JSRegExp::Flags flags = RegExpFlagsFromString(flag_str);
- Handle<FixedArray> cached = CompilationCache::LookupRegExp(pattern, flags);
+ CompilationCache* compilation_cache = isolate->compilation_cache();
+ Handle<FixedArray> cached = compilation_cache->LookupRegExp(pattern, flags);
bool in_cache = !cached.is_null();
- LOG(RegExpCompileEvent(re, in_cache));
+ LOG(isolate, RegExpCompileEvent(re, in_cache));
Handle<Object> result;
if (in_cache) {
@@ -124,9 +128,9 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
}
pattern = FlattenGetString(pattern);
CompilationZoneScope zone_scope(DELETE_ON_EXIT);
- PostponeInterruptsScope postpone;
+ PostponeInterruptsScope postpone(isolate);
RegExpCompileData parse_result;
- FlatStringReader reader(pattern);
+ FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
&parse_result)) {
// Throw an exception if we fail to parse the pattern.
@@ -145,7 +149,8 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
- Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
+ Handle<String> atom_string =
+ isolate->factory()->NewStringFromTwoByte(atom_pattern);
AtomCompile(re, pattern, flags, atom_string);
} else {
IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
@@ -154,7 +159,7 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
// Compilation succeeded so the data is set on the regexp
// and we can store it in the cache.
Handle<FixedArray> data(FixedArray::cast(re->data()));
- CompilationCache::PutRegExp(pattern, flags, data);
+ compilation_cache->PutRegExp(pattern, flags, data);
return re;
}
@@ -170,7 +175,7 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
case JSRegExp::IRREGEXP: {
Handle<Object> result =
IrregexpExec(regexp, subject, index, last_match_info);
- ASSERT(!result.is_null() || Top::has_pending_exception());
+ ASSERT(!result.is_null() || Isolate::Current()->has_pending_exception());
return result;
}
default:
@@ -187,11 +192,11 @@ void RegExpImpl::AtomCompile(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags,
Handle<String> match_pattern) {
- Factory::SetRegExpAtomData(re,
- JSRegExp::ATOM,
- pattern,
- flags,
- match_pattern);
+ re->GetIsolate()->factory()->SetRegExpAtomData(re,
+ JSRegExp::ATOM,
+ pattern,
+ flags,
+ match_pattern);
}
@@ -224,6 +229,8 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
Handle<String> subject,
int index,
Handle<JSArray> last_match_info) {
+ Isolate* isolate = re->GetIsolate();
+
ASSERT(0 <= index);
ASSERT(index <= subject->length());
@@ -237,24 +244,30 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
int needle_len = needle->length();
if (needle_len != 0) {
- if (index + needle_len > subject->length()) return Factory::null_value();
+ if (index + needle_len > subject->length())
+ return isolate->factory()->null_value();
+
// dispatch on type of strings
index = (needle->IsAsciiRepresentation()
? (seq_sub->IsAsciiRepresentation()
- ? SearchString(seq_sub->ToAsciiVector(),
+ ? SearchString(isolate,
+ seq_sub->ToAsciiVector(),
needle->ToAsciiVector(),
index)
- : SearchString(seq_sub->ToUC16Vector(),
+ : SearchString(isolate,
+ seq_sub->ToUC16Vector(),
needle->ToAsciiVector(),
index))
: (seq_sub->IsAsciiRepresentation()
- ? SearchString(seq_sub->ToAsciiVector(),
+ ? SearchString(isolate,
+ seq_sub->ToAsciiVector(),
needle->ToUC16Vector(),
index)
- : SearchString(seq_sub->ToUC16Vector(),
+ : SearchString(isolate,
+ seq_sub->ToUC16Vector(),
needle->ToUC16Vector(),
index)));
- if (index == -1) return Factory::null_value();
+ if (index == -1) return FACTORY->null_value();
}
ASSERT(last_match_info->HasFastElements());
@@ -288,13 +301,14 @@ bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
// Compile the RegExp.
+ Isolate* isolate = re->GetIsolate();
CompilationZoneScope zone_scope(DELETE_ON_EXIT);
- PostponeInterruptsScope postpone;
+ PostponeInterruptsScope postpone(isolate);
Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
if (entry->IsJSObject()) {
// If it's a JSObject, a previous compilation failed and threw this object.
// Re-throw the object without trying again.
- Top::Throw(entry);
+ isolate->Throw(entry);
return false;
}
ASSERT(entry->IsTheHole());
@@ -307,7 +321,7 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
}
RegExpCompileData compile_data;
- FlatStringReader reader(pattern);
+ FlatStringReader reader(isolate, pattern);
if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
&compile_data)) {
// Throw an exception if we fail to parse the pattern.
@@ -326,15 +340,16 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, bool is_ascii) {
is_ascii);
if (result.error_message != NULL) {
// Unable to compile regexp.
- Handle<FixedArray> elements = Factory::NewFixedArray(2);
+ Factory* factory = isolate->factory();
+ Handle<FixedArray> elements = factory->NewFixedArray(2);
elements->set(0, *pattern);
Handle<String> error_message =
- Factory::NewStringFromUtf8(CStrVector(result.error_message));
+ factory->NewStringFromUtf8(CStrVector(result.error_message));
elements->set(1, *error_message);
- Handle<JSArray> array = Factory::NewJSArrayWithElements(elements);
+ Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
Handle<Object> regexp_err =
- Factory::NewSyntaxError("malformed_regexp", array);
- Top::Throw(*regexp_err);
+ factory->NewSyntaxError("malformed_regexp", array);
+ isolate->Throw(*regexp_err);
re->SetDataAt(JSRegExp::code_index(is_ascii), *regexp_err);
return false;
}
@@ -386,11 +401,11 @@ void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
JSRegExp::Flags flags,
int capture_count) {
// Initialize compiled code entries to null.
- Factory::SetRegExpIrregexpData(re,
- JSRegExp::IRREGEXP,
- pattern,
- flags,
- capture_count);
+ re->GetIsolate()->factory()->SetRegExpIrregexpData(re,
+ JSRegExp::IRREGEXP,
+ pattern,
+ flags,
+ capture_count);
}
@@ -428,7 +443,9 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
Handle<String> subject,
int index,
Vector<int> output) {
- Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
+ Isolate* isolate = regexp->GetIsolate();
+
+ Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
ASSERT(index >= 0);
ASSERT(index <= subject->length());
@@ -436,24 +453,24 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
// A flat ASCII string might have a two-byte first part.
if (subject->IsConsString()) {
- subject = Handle<String>(ConsString::cast(*subject)->first());
+ subject = Handle<String>(ConsString::cast(*subject)->first(), isolate);
}
#ifndef V8_INTERPRETED_REGEXP
- ASSERT(output.length() >=
- (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
+ ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
bool is_ascii = subject->IsAsciiRepresentation();
- Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii));
+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
NativeRegExpMacroAssembler::Result res =
NativeRegExpMacroAssembler::Match(code,
subject,
output.start(),
output.length(),
- index);
+ index,
+ isolate);
if (res != NativeRegExpMacroAssembler::RETRY) {
ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
- Top::has_pending_exception());
+ isolate->has_pending_exception());
STATIC_ASSERT(
static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
STATIC_ASSERT(
@@ -484,9 +501,10 @@ RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
register_vector[i] = -1;
}
- Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii));
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
- if (IrregexpInterpreter::Match(byte_codes,
+ if (IrregexpInterpreter::Match(isolate,
+ byte_codes,
subject,
register_vector,
index)) {
@@ -516,7 +534,7 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
if (required_registers < 0) {
// Compiling failed with an exception.
- ASSERT(Top::has_pending_exception());
+ ASSERT(Isolate::Current()->has_pending_exception());
return Handle<Object>::null();
}
@@ -542,11 +560,11 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
return last_match_info;
}
if (res == RE_EXCEPTION) {
- ASSERT(Top::has_pending_exception());
+ ASSERT(Isolate::Current()->has_pending_exception());
return Handle<Object>::null();
}
ASSERT(res == RE_FAILURE);
- return Factory::null_value();
+ return Isolate::Current()->factory()->null_value();
}
@@ -860,9 +878,11 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble(
if (reg_exp_too_big_) return IrregexpRegExpTooBig();
Handle<Object> code = macro_assembler_->GetCode(pattern);
-
work_list_ = NULL;
#ifdef DEBUG
+ if (FLAG_print_code) {
+ Handle<Code>::cast(code)->Disassemble(*pattern->ToCString());
+ }
if (FLAG_trace_regexp_assembler) {
delete macro_assembler_;
}
@@ -1304,16 +1324,14 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
}
-static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
-static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
-
-
// Returns the number of characters in the equivalence class, omitting those
// that cannot occur in the source string because it is ASCII.
-static int GetCaseIndependentLetters(uc16 character,
+static int GetCaseIndependentLetters(Isolate* isolate,
+ uc16 character,
bool ascii_subject,
unibrow::uchar* letters) {
- int length = uncanonicalize.get(character, '\0', letters);
+ int length =
+ isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
// Unibrow returns 0 or 1 for characters where case independence is
// trivial.
if (length == 0) {
@@ -1329,7 +1347,8 @@ static int GetCaseIndependentLetters(uc16 character,
}
-static inline bool EmitSimpleCharacter(RegExpCompiler* compiler,
+static inline bool EmitSimpleCharacter(Isolate* isolate,
+ RegExpCompiler* compiler,
uc16 c,
Label* on_failure,
int cp_offset,
@@ -1351,7 +1370,8 @@ static inline bool EmitSimpleCharacter(RegExpCompiler* compiler,
// Only emits non-letters (things that don't have case). Only used for case
// independent matches.
-static inline bool EmitAtomNonLetter(RegExpCompiler* compiler,
+static inline bool EmitAtomNonLetter(Isolate* isolate,
+ RegExpCompiler* compiler,
uc16 c,
Label* on_failure,
int cp_offset,
@@ -1360,7 +1380,7 @@ static inline bool EmitAtomNonLetter(RegExpCompiler* compiler,
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
bool ascii = compiler->ascii();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(c, ascii, chars);
+ int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
if (length < 1) {
// This can't match. Must be an ASCII subject and a non-ASCII character.
// We do not need to do anything since the ASCII pass already handled this.
@@ -1422,7 +1442,8 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
}
-typedef bool EmitCharacterFunction(RegExpCompiler* compiler,
+typedef bool EmitCharacterFunction(Isolate* isolate,
+ RegExpCompiler* compiler,
uc16 c,
Label* on_failure,
int cp_offset,
@@ -1431,7 +1452,8 @@ typedef bool EmitCharacterFunction(RegExpCompiler* compiler,
// Only emits letters (things that have case). Only used for case independent
// matches.
-static inline bool EmitAtomLetter(RegExpCompiler* compiler,
+static inline bool EmitAtomLetter(Isolate* isolate,
+ RegExpCompiler* compiler,
uc16 c,
Label* on_failure,
int cp_offset,
@@ -1440,7 +1462,7 @@ static inline bool EmitAtomLetter(RegExpCompiler* compiler,
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
bool ascii = compiler->ascii();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(c, ascii, chars);
+ int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
if (length <= 1) return false;
// We may not need to check against the end of the input string
// if this character lies before a character that matched.
@@ -1878,6 +1900,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start) {
+ Isolate* isolate = Isolate::Current();
ASSERT(characters_filled_in < details->characters());
int characters = details->characters();
int char_mask;
@@ -1908,7 +1931,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(c, compiler->ascii(), chars);
+ int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
+ chars);
ASSERT(length != 0); // Can only happen if c > char_mask (see above).
if (length == 1) {
// This letter has no case equivalents, so it's nice and simple
@@ -2408,6 +2432,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
Trace* trace,
bool first_element_checked,
int* checked_up_to) {
+ Isolate* isolate = Isolate::Current();
RegExpMacroAssembler* assembler = compiler->macro_assembler();
bool ascii = compiler->ascii();
Label* backtrack = trace->backtrack();
@@ -2443,7 +2468,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
break;
}
if (emit_function != NULL) {
- bool bound_checked = emit_function(compiler,
+ bool bound_checked = emit_function(isolate,
+ compiler,
quarks[j],
backtrack,
cp_offset + j,
@@ -4083,13 +4109,15 @@ void CharacterRange::Split(ZoneList<CharacterRange>* base,
}
-static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
+static void AddUncanonicals(Isolate* isolate,
+ ZoneList<CharacterRange>* ranges,
int bottom,
int top);
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
bool is_ascii) {
+ Isolate* isolate = Isolate::Current();
uc16 bottom = from();
uc16 top = to();
if (is_ascii) {
@@ -4099,7 +4127,7 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
if (top == bottom) {
// If this is a singleton we just expand the one character.
- int length = uncanonicalize.get(bottom, '\0', chars);
+ int length = isolate->jsregexp_uncanonicalize()->get(bottom, '\0', chars);
for (int i = 0; i < length; i++) {
uc32 chr = chars[i];
if (chr != bottom) {
@@ -4128,7 +4156,7 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
unibrow::uchar range[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int pos = bottom;
while (pos < top) {
- int length = canonrange.get(pos, '\0', range);
+ int length = isolate->jsregexp_canonrange()->get(pos, '\0', range);
uc16 block_end;
if (length == 0) {
block_end = pos;
@@ -4137,7 +4165,7 @@ void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
block_end = range[0];
}
int end = (block_end > top) ? top : block_end;
- length = uncanonicalize.get(block_end, '\0', range);
+ length = isolate->jsregexp_uncanonicalize()->get(block_end, '\0', range);
for (int i = 0; i < length; i++) {
uc32 c = range[i];
uc16 range_from = c - (block_end - pos);
@@ -4247,7 +4275,8 @@ SetRelation CharacterRange::WordCharacterRelation(
}
-static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
+static void AddUncanonicals(Isolate* isolate,
+ ZoneList<CharacterRange>* ranges,
int bottom,
int top) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
@@ -4279,8 +4308,8 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
// case mappings.
for (int i = 0; i < boundary_count; i++) {
if (bottom < boundaries[i] && top >= boundaries[i]) {
- AddUncanonicals(ranges, bottom, boundaries[i] - 1);
- AddUncanonicals(ranges, boundaries[i], top);
+ AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
+ AddUncanonicals(isolate, ranges, boundaries[i], top);
return;
}
}
@@ -4291,7 +4320,8 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
#ifdef DEBUG
for (int j = bottom; j <= top; j++) {
unsigned current_char = j;
- int length = uncanonicalize.get(current_char, '\0', chars);
+ int length = isolate->jsregexp_uncanonicalize()->get(current_char,
+ '\0', chars);
for (int k = 0; k < length; k++) {
ASSERT(chars[k] == current_char);
}
@@ -4304,7 +4334,7 @@ static void AddUncanonicals(ZoneList<CharacterRange>* ranges,
// Step through the range finding equivalent characters.
ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
for (int i = bottom; i <= top; i++) {
- int length = uncanonicalize.get(i, '\0', chars);
+ int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
for (int j = 0; j < length; j++) {
uc32 chr = chars[j];
if (chr != i && (chr < bottom || chr > top)) {
@@ -4826,7 +4856,7 @@ OutSet* DispatchTable::Get(uc16 value) {
void Analysis::EnsureAnalyzed(RegExpNode* that) {
- StackLimitCheck check;
+ StackLimitCheck check(Isolate::Current());
if (check.HasOverflowed()) {
fail("Stack overflow");
return;
@@ -5312,6 +5342,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
RegExpMacroAssemblerX64 macro_assembler(mode, (data->capture_count + 1) * 2);
#elif V8_TARGET_ARCH_ARM
RegExpMacroAssemblerARM macro_assembler(mode, (data->capture_count + 1) * 2);
+#elif V8_TARGET_ARCH_MIPS
+ RegExpMacroAssemblerMIPS macro_assembler(mode, (data->capture_count + 1) * 2);
#endif
#else // V8_INTERPRETED_REGEXP
@@ -5336,7 +5368,4 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
}
-int OffsetsVector::static_offsets_vector_[
- OffsetsVector::kStaticOffsetsVectorSize];
-
}} // namespace v8::internal