static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
static int IrregexpNumberOfCaptures(FixedArray* re);
static int IrregexpNumberOfRegisters(FixedArray* re);
- static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
- static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
+ static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte);
+ static Code* IrregexpNativeCode(FixedArray* re, bool is_one_byte);
// Limit the space regexps take up on the heap. In order to limit this we
// would like to keep track of the amount of regexp code on the heap. This
static const int kRegWxpCompiledLimit = 1 * MB;
private:
- static bool CompileIrregexp(
- Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
- static inline bool EnsureCompiledIrregexp(
- Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii);
+ static bool CompileIrregexp(Handle<JSRegExp> re,
+ Handle<String> sample_subject, bool is_one_byte);
+ static inline bool EnsureCompiledIrregexp(Handle<JSRegExp> re,
+ Handle<String> sample_subject,
+ bool is_one_byte);
};
bool is_valid() { return from_ <= to_; }
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
bool IsSingleton() { return (from_ == to_); }
- void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_ascii,
+ void AddCaseEquivalents(ZoneList<CharacterRange>* ranges, bool is_one_byte,
Zone* zone);
static void Split(ZoneList<CharacterRange>* base,
Vector<const int> overlay,
#undef FORWARD_DECLARE
-class TextElement V8_FINAL BASE_EMBEDDED {
+class TextElement FINAL BASE_EMBEDDED {
public:
enum TextType {
ATOM,
class Trace;
-
+struct PreloadState;
+class GreedyLoopState;
+class AlternativeGenerationList;
struct NodeInfo {
NodeInfo()
mask_(0),
value_(0),
cannot_match_(false) { }
- bool Rationalize(bool ascii);
+ bool Rationalize(bool one_byte);
// Merge in the information from another branch of an alternation.
void Merge(QuickCheckDetails* other, int from_index);
// Advance the current position by some amount.
- void Advance(int by, bool ascii);
+ void Advance(int by, bool one_byte);
void Clear();
bool cannot_match() { return cannot_match_; }
void set_cannot_match() { cannot_match_ = true; }
// Falls through on certain failure, jumps to the label on possible success.
// If the node cannot make a quick check it does nothing and returns false.
bool EmitQuickCheck(RegExpCompiler* compiler,
+ Trace* bounds_check_trace,
Trace* trace,
bool preload_has_checked_bounds,
Label* on_possible_success,
UNREACHABLE();
}
- // If we know that the input is ASCII then there are some nodes that can
+ // If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or NULL if the node can never match.
- virtual RegExpNode* FilterASCII(int depth, bool ignore_case) { return this; }
- // Helper for FilterASCII.
+ virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) {
+ return this;
+ }
+ // Helper for FilterOneByte.
RegExpNode* replacement() {
DCHECK(info()->replacement_calculated);
return replacement_;
: RegExpNode(on_success->zone()), on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
- virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual void FillInBMInfo(int offset,
int budget,
BoyerMooreLookahead* bm,
int characters_filled_in,
bool not_at_start);
ZoneList<TextElement>* elements() { return elms_; }
- void MakeCaseIndependent(bool is_ascii);
+ void MakeCaseIndependent(bool is_one_byte);
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
RegExpCompiler* compiler);
BoyerMooreLookahead* bm,
bool not_at_start);
void CalculateOffsets();
- virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
private:
enum TextEmitPassType {
- NON_ASCII_MATCH, // Check for characters that can't match.
+ NON_LATIN1_MATCH, // Check for characters that can't match.
SIMPLE_CHARACTER_MATCH, // Case-dependent single character check.
NON_LETTER_CHARACTER_MATCH, // Check characters that have no case equivs.
CASE_CHARACTER_MATCH, // Case-independent single character check.
bool not_at_start() { return not_at_start_; }
void set_not_at_start() { not_at_start_ = true; }
void set_being_calculated(bool b) { being_calculated_ = b; }
- virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
- virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
+ virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
+ return true;
+ }
+ virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
protected:
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
AlternativeGeneration* alt_gen,
int preload_characters,
bool next_expects_preload);
+ void SetUpPreLoad(RegExpCompiler* compiler,
+ Trace* current_trace,
+ PreloadState* preloads);
+ void AssertGuardsMentionRegisters(Trace* trace);
+ int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
+ Trace* EmitGreedyLoop(RegExpCompiler* compiler,
+ Trace* trace,
+ AlternativeGenerationList* alt_gens,
+ PreloadState* preloads,
+ GreedyLoopState* greedy_loop_state,
+ int text_length);
+ void EmitChoices(RegExpCompiler* compiler,
+ AlternativeGenerationList* alt_gens,
+ int first_choice,
+ Trace* trace,
+ PreloadState* preloads);
DispatchTable* table_;
// If true, this node is never checked at the start of the input.
// Allows a new trace to start with at_start() set to false.
// starts by loading enough characters for the alternative that takes fewest
// characters, but on a negative lookahead the negative branch did not take
// part in that calculation (EatsAtLeast) so the assumptions don't hold.
- virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
- virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
+ virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
+ return !is_first;
+ }
+ virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
};
: ChoiceNode(2, zone),
loop_node_(NULL),
continue_node_(NULL),
- body_can_be_zero_length_(body_can_be_zero_length) { }
+ body_can_be_zero_length_(body_can_be_zero_length)
+ { }
void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual void Accept(NodeVisitor* visitor);
- virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
+ virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
private:
// AddAlternative is made private for loop nodes because alternatives
void SetRest(int from_map) {
for (int i = from_map; i < length_; i++) SetAll(i);
}
- bool EmitSkipInstructions(RegExpMacroAssembler* masm);
+ void EmitSkipInstructions(RegExpMacroAssembler* masm);
private:
// This is the value obtained by EatsAtLeast. If we do not have at least this
// point.
int length_;
RegExpCompiler* compiler_;
- // 0x7f for ASCII, 0xffff for UTF-16.
+ // 0xff for Latin1, 0xffff for UTF-16.
int max_char_;
ZoneList<BoyerMoorePositionInfo*>* bitmaps_;
};
+class GreedyLoopState {
+ public:
+ explicit GreedyLoopState(bool not_at_start);
+
+ Label* label() { return &label_; }
+ Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; }
+
+ private:
+ Label label_;
+ Trace counter_backtrack_trace_;
+};
+
+
+struct PreloadState {
+ static const int kEatsAtLeastNotYetInitialized = -1;
+ bool preload_is_current_;
+ bool preload_has_checked_bounds_;
+ int preload_characters_;
+ int eats_at_least_;
+ void init() {
+ eats_at_least_ = kEatsAtLeastNotYetInitialized;
+ }
+};
+
+
class NodeVisitor {
public:
virtual ~NodeVisitor() { }
// +-------+ +------------+
class Analysis: public NodeVisitor {
public:
- Analysis(bool ignore_case, bool is_ascii)
+ Analysis(bool ignore_case, bool is_one_byte)
: ignore_case_(ignore_case),
- is_ascii_(is_ascii),
- error_message_(NULL) { }
+ is_one_byte_(is_one_byte),
+ error_message_(NULL) {}
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
private:
bool ignore_case_;
- bool is_ascii_;
+ bool is_one_byte_;
const char* error_message_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
int num_registers;
};
- static CompilationResult Compile(RegExpCompileData* input,
- bool ignore_case,
- bool global,
- bool multiline,
+ static CompilationResult Compile(RegExpCompileData* input, bool ignore_case,
+ bool global, bool multiline, bool sticky,
Handle<String> pattern,
Handle<String> sample_subject,
- bool is_ascii, Zone* zone);
+ bool is_one_byte, Zone* zone);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
};