2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
14 * * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 #include "core/html/track/vtt/VTTTokenizer.h"
35 #include "core/xml/parser/MarkupTokenizerInlines.h"
36 #include "wtf/text/StringBuilder.h"
37 #include "wtf/unicode/CharacterNames.h"
41 #define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName:
42 #define WEBVTT_ADVANCE_TO(stateName) \
45 ASSERT(!m_input.isEmpty()); \
46 m_inputStreamPreprocessor.advance(m_input); \
47 cc = m_inputStreamPreprocessor.nextInputCharacter(); \
51 template<unsigned charactersCount>
52 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount])
54 return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersCount - 1);
57 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass)
59 if (!classes.isEmpty())
61 classes.append(newClass);
64 inline bool emitToken(VTTToken& resultToken, const VTTToken& token)
70 inline bool advanceAndEmitToken(SegmentedString& source, VTTToken& resultToken, const VTTToken& token)
72 source.advanceAndUpdateLineNumber();
73 return emitToken(resultToken, token);
76 VTTTokenizer::VTTTokenizer(const String& input)
78 , m_inputStreamPreprocessor(this)
80 // Append a EOF marker and close the input "stream".
81 ASSERT(!m_input.isClosed());
82 m_input.append(SegmentedString(String(&kEndOfFileMarker, 1)));
86 bool VTTTokenizer::nextToken(VTTToken& token)
88 if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input))
91 UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
92 if (cc == kEndOfFileMarker) {
93 m_inputStreamPreprocessor.advance(m_input);
99 StringBuilder classes;
106 StartTagAnnotationState,
111 // 4.8.10.13.4 WebVTT cue text tokenizer
113 WEBVTT_BEGIN_STATE(DataState) {
115 buffer.append(static_cast<LChar>(cc));
116 WEBVTT_ADVANCE_TO(EscapeState);
117 } else if (cc == '<') {
118 if (result.isEmpty()) {
119 WEBVTT_ADVANCE_TO(TagState);
121 // We don't want to advance input or perform a state transition - just return a (new) token.
122 // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.)
123 return emitToken(token, VTTToken::StringToken(result.toString()));
125 } else if (cc == kEndOfFileMarker) {
126 return advanceAndEmitToken(m_input, token, VTTToken::StringToken(result.toString()));
129 WEBVTT_ADVANCE_TO(DataState);
134 WEBVTT_BEGIN_STATE(EscapeState) {
136 if (equalLiteral(buffer, "&")) {
138 } else if (equalLiteral(buffer, "<")) {
140 } else if (equalLiteral(buffer, ">")) {
142 } else if (equalLiteral(buffer, "&lrm")) {
143 result.append(leftToRightMark);
144 } else if (equalLiteral(buffer, "&rlm")) {
145 result.append(rightToLeftMark);
146 } else if (equalLiteral(buffer, " ")) {
147 result.append(noBreakSpace);
149 buffer.append(static_cast<LChar>(cc));
150 result.append(buffer);
153 WEBVTT_ADVANCE_TO(DataState);
154 } else if (isASCIIAlphanumeric(cc)) {
155 buffer.append(static_cast<LChar>(cc));
156 WEBVTT_ADVANCE_TO(EscapeState);
157 } else if (cc == '<') {
158 result.append(buffer);
159 return emitToken(token, VTTToken::StringToken(result.toString()));
160 } else if (cc == kEndOfFileMarker) {
161 result.append(buffer);
162 return advanceAndEmitToken(m_input, token, VTTToken::StringToken(result.toString()));
164 result.append(buffer);
168 buffer.append(static_cast<LChar>(cc));
169 WEBVTT_ADVANCE_TO(EscapeState);
172 WEBVTT_ADVANCE_TO(DataState);
177 WEBVTT_BEGIN_STATE(TagState) {
178 if (isTokenizerWhitespace(cc)) {
179 ASSERT(result.isEmpty());
180 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
181 } else if (cc == '.') {
182 ASSERT(result.isEmpty());
183 WEBVTT_ADVANCE_TO(StartTagClassState);
184 } else if (cc == '/') {
185 WEBVTT_ADVANCE_TO(EndTagState);
186 } else if (WTF::isASCIIDigit(cc)) {
188 WEBVTT_ADVANCE_TO(TimestampTagState);
189 } else if (cc == '>' || cc == kEndOfFileMarker) {
190 ASSERT(result.isEmpty());
191 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString()));
194 WEBVTT_ADVANCE_TO(StartTagState);
199 WEBVTT_BEGIN_STATE(StartTagState) {
200 if (isTokenizerWhitespace(cc)) {
201 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
202 } else if (cc == '.') {
203 WEBVTT_ADVANCE_TO(StartTagClassState);
204 } else if (cc == '>' || cc == kEndOfFileMarker) {
205 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString()));
208 WEBVTT_ADVANCE_TO(StartTagState);
213 WEBVTT_BEGIN_STATE(StartTagClassState) {
214 if (isTokenizerWhitespace(cc)) {
215 addNewClass(classes, buffer);
217 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
218 } else if (cc == '.') {
219 addNewClass(classes, buffer);
221 WEBVTT_ADVANCE_TO(StartTagClassState);
222 } else if (cc == '>' || cc == kEndOfFileMarker) {
223 addNewClass(classes, buffer);
225 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString(), classes.toAtomicString()));
228 WEBVTT_ADVANCE_TO(StartTagClassState);
233 WEBVTT_BEGIN_STATE(StartTagAnnotationState) {
234 if (cc == '>' || cc == kEndOfFileMarker) {
235 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString()));
238 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
242 WEBVTT_BEGIN_STATE(EndTagState) {
243 if (cc == '>' || cc == kEndOfFileMarker)
244 return advanceAndEmitToken(m_input, token, VTTToken::EndTag(result.toString()));
246 WEBVTT_ADVANCE_TO(EndTagState);
250 WEBVTT_BEGIN_STATE(TimestampTagState) {
251 if (cc == '>' || cc == kEndOfFileMarker)
252 return advanceAndEmitToken(m_input, token, VTTToken::TimestampTag(result.toString()));
254 WEBVTT_ADVANCE_TO(TimestampTagState);
260 ASSERT_NOT_REACHED();