1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #ifndef V8_DATEPARSER_H_
29 #define V8_DATEPARSER_H_
31 #include "allocation.h"
32 #include "char-predicates-inl.h"
37 class DateParser : public AllStatic {
39 // Parse the string as a date. If parsing succeeds, return true after
40 // filling out the output array as follows (all integers are Smis):
42 // [1]: month (0 = Jan, 1 = Feb, ...)
48 // [7]: UTC offset in seconds, or null value if no timezone specified
49 // If parsing fails, return false (content of output array is not defined).
50 template <typename Char>
51 static bool Parse(Vector<Char> str, FixedArray* output, UnicodeCache* cache);
54 YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, MILLISECOND, UTC_OFFSET, OUTPUT_SIZE
59 static inline bool Between(int x, int lo, int hi) {
60 return static_cast<unsigned>(x - lo) <= static_cast<unsigned>(hi - lo);
63 // Indicates a missing value.
64 static const int kNone = kMaxInt;
66 // Maximal number of digits used to build the value of a numeral.
67 // Remaining digits are ignored.
68 static const int kMaxSignificantDigits = 9;
70 // InputReader provides basic string parsing and character classification.
71 template <typename Char>
72 class InputReader BASE_EMBEDDED {
74 InputReader(UnicodeCache* unicode_cache, Vector<Char> s)
77 unicode_cache_(unicode_cache) {
81 int position() { return index_; }
83 // Advance to the next character of the string.
85 ch_ = (index_ < buffer_.length()) ? buffer_[index_] : 0;
89 // Read a string of digits as an unsigned number. Cap value at
90 // kMaxSignificantDigits, but skip remaining digits if the numeral
92 int ReadUnsignedNumeral() {
95 while (IsAsciiDigit()) {
96 if (i < kMaxSignificantDigits) n = n * 10 + ch_ - '0';
103 // Read a word (sequence of chars. >= 'A'), fill the given buffer with a
104 // lower-case prefix, and pad any remainder of the buffer with zeroes.
105 // Return word length.
106 int ReadWord(uint32_t* prefix, int prefix_size) {
108 for (len = 0; IsAsciiAlphaOrAbove(); Next(), len++) {
109 if (len < prefix_size) prefix[len] = AsciiAlphaToLower(ch_);
111 for (int i = len; i < prefix_size; i++) prefix[i] = 0;
115 // The skip methods return whether they actually skipped something.
116 bool Skip(uint32_t c) {
124 bool SkipWhiteSpace() {
125 if (unicode_cache_->IsWhiteSpace(ch_)) {
132 bool SkipParentheses() {
133 if (ch_ != '(') return false;
136 if (ch_ == ')') --balance;
137 else if (ch_ == '(') ++balance;
139 } while (balance > 0 && ch_);
143 // Character testing/classification. Non-ASCII digits are not supported.
144 bool Is(uint32_t c) const { return ch_ == c; }
145 bool IsEnd() const { return ch_ == 0; }
146 bool IsAsciiDigit() const { return IsDecimalDigit(ch_); }
147 bool IsAsciiAlphaOrAbove() const { return ch_ >= 'A'; }
148 bool IsAsciiSign() const { return ch_ == '+' || ch_ == '-'; }
150 // Return 1 for '+' and -1 for '-'.
151 int GetAsciiSignValue() const { return 44 - static_cast<int>(ch_); }
155 Vector<Char> buffer_;
157 UnicodeCache* unicode_cache_;
161 INVALID, MONTH_NAME, TIME_ZONE_NAME, TIME_SEPARATOR, AM_PM
166 bool IsInvalid() { return tag_ == kInvalidTokenTag; }
167 bool IsUnknown() { return tag_ == kUnknownTokenTag; }
168 bool IsNumber() { return tag_ == kNumberTag; }
169 bool IsSymbol() { return tag_ == kSymbolTag; }
170 bool IsWhiteSpace() { return tag_ == kWhiteSpaceTag; }
171 bool IsEndOfInput() { return tag_ == kEndOfInputTag; }
172 bool IsKeyword() { return tag_ >= kKeywordTagStart; }
174 int length() { return length_; }
180 KeywordType keyword_type() {
182 return static_cast<KeywordType>(tag_);
184 int keyword_value() {
190 return static_cast<char>(value_);
192 bool IsSymbol(char symbol) {
193 return IsSymbol() && this->symbol() == symbol;
195 bool IsKeywordType(KeywordType tag) {
198 bool IsFixedLengthNumber(int length) {
199 return IsNumber() && length_ == length;
202 return tag_ == kSymbolTag && (value_ == '-' || value_ == '+');
205 ASSERT(IsAsciiSign());
209 return IsKeywordType(TIME_ZONE_NAME) && length_ == 1 && value_ == 0;
211 bool IsUnknown(int character) {
212 return IsUnknown() && value_ == character;
214 // Factory functions.
215 static DateToken Keyword(KeywordType tag, int value, int length) {
216 return DateToken(tag, length, value);
218 static DateToken Number(int value, int length) {
219 return DateToken(kNumberTag, length, value);
221 static DateToken Symbol(char symbol) {
222 return DateToken(kSymbolTag, 1, symbol);
224 static DateToken EndOfInput() {
225 return DateToken(kEndOfInputTag, 0, -1);
227 static DateToken WhiteSpace(int length) {
228 return DateToken(kWhiteSpaceTag, length, -1);
230 static DateToken Unknown() {
231 return DateToken(kUnknownTokenTag, 1, -1);
233 static DateToken Invalid() {
234 return DateToken(kInvalidTokenTag, 0, -1);
239 kInvalidTokenTag = -6,
240 kUnknownTokenTag = -5,
247 DateToken(int tag, int length, int value)
253 int length_; // Number of characters.
257 template <typename Char>
258 class DateStringTokenizer {
260 explicit DateStringTokenizer(InputReader<Char>* in)
261 : in_(in), next_(Scan()) { }
263 DateToken result = next_;
271 bool SkipSymbol(char symbol) {
272 if (next_.IsSymbol(symbol)) {
282 InputReader<Char>* in_;
286 static int ReadMilliseconds(DateToken number);
288 // KeywordTable maps names of months, time zones, am/pm to numbers.
289 class KeywordTable : public AllStatic {
291 // Look up a word in the keyword table and return an index.
292 // 'pre' contains a prefix of the word, zero-padded to size kPrefixLength
293 // and 'len' is the word length.
294 static int Lookup(const uint32_t* pre, int len);
295 // Get the type of the keyword at index i.
296 static KeywordType GetType(int i) {
297 return static_cast<KeywordType>(array[i][kTypeOffset]);
299 // Get the value of the keyword at index i.
300 static int GetValue(int i) { return array[i][kValueOffset]; }
302 static const int kPrefixLength = 3;
303 static const int kTypeOffset = kPrefixLength;
304 static const int kValueOffset = kTypeOffset + 1;
305 static const int kEntrySize = kValueOffset + 1;
306 static const int8_t array[][kEntrySize];
309 class TimeZoneComposer BASE_EMBEDDED {
311 TimeZoneComposer() : sign_(kNone), hour_(kNone), minute_(kNone) {}
312 void Set(int offset_in_hours) {
313 sign_ = offset_in_hours < 0 ? -1 : 1;
314 hour_ = offset_in_hours * sign_;
317 void SetSign(int sign) { sign_ = sign < 0 ? -1 : 1; }
318 void SetAbsoluteHour(int hour) { hour_ = hour; }
319 void SetAbsoluteMinute(int minute) { minute_ = minute; }
320 bool IsExpecting(int n) const {
321 return hour_ != kNone && minute_ == kNone && TimeComposer::IsMinute(n);
323 bool IsUTC() const { return hour_ == 0 && minute_ == 0; }
324 bool Write(FixedArray* output);
325 bool IsEmpty() { return hour_ == kNone; }
332 class TimeComposer BASE_EMBEDDED {
334 TimeComposer() : index_(0), hour_offset_(kNone) {}
335 bool IsEmpty() const { return index_ == 0; }
336 bool IsExpecting(int n) const {
337 return (index_ == 1 && IsMinute(n)) ||
338 (index_ == 2 && IsSecond(n)) ||
339 (index_ == 3 && IsMillisecond(n));
342 return index_ < kSize ? (comp_[index_++] = n, true) : false;
344 bool AddFinal(int n) {
345 if (!Add(n)) return false;
346 while (index_ < kSize) comp_[index_++] = 0;
349 void SetHourOffset(int n) { hour_offset_ = n; }
350 bool Write(FixedArray* output);
352 static bool IsMinute(int x) { return Between(x, 0, 59); }
353 static bool IsHour(int x) { return Between(x, 0, 23); }
354 static bool IsSecond(int x) { return Between(x, 0, 59); }
357 static bool IsHour12(int x) { return Between(x, 0, 12); }
358 static bool IsMillisecond(int x) { return Between(x, 0, 999); }
360 static const int kSize = 4;
366 class DayComposer BASE_EMBEDDED {
368 DayComposer() : index_(0), named_month_(kNone), is_iso_date_(false) {}
369 bool IsEmpty() const { return index_ == 0; }
371 if (index_ < kSize) {
378 void SetNamedMonth(int n) { named_month_ = n; }
379 bool Write(FixedArray* output);
380 void set_iso_date() { is_iso_date_ = true; }
381 static bool IsMonth(int x) { return Between(x, 1, 12); }
382 static bool IsDay(int x) { return Between(x, 1, 31); }
385 static const int kSize = 3;
389 // If set, ensures that data is always parsed in year-month-date order.
393 // Tries to parse an ES5 Date Time String. Returns the next token
394 // to continue with in the legacy date string parser. If parsing is
395 // complete, returns DateToken::EndOfInput(). If terminally unsuccessful,
396 // returns DateToken::Invalid(). Otherwise parsing continues in the
398 template <typename Char>
399 static DateParser::DateToken ParseES5DateTime(
400 DateStringTokenizer<Char>* scanner,
403 TimeZoneComposer* tz);
407 } } // namespace v8::internal
409 #endif // V8_DATEPARSER_H_