Upstream version 5.34.92.0
[platform/framework/web/crosswalk.git] / src / third_party / WebKit / Source / core / html / track / vtt / VTTTokenizer.cpp
1 /*
2  * Copyright (C) 2011 Google Inc.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions are
6  * met:
7  *
8  *     * Redistributions of source code must retain the above copyright
9  * notice, this list of conditions and the following disclaimer.
10  *     * Redistributions in binary form must reproduce the above
11  * copyright notice, this list of conditions and the following disclaimer
12  * in the documentation and/or other materials provided with the
13  * distribution.
14  *     * Neither the name of Google Inc. nor the names of its
15  * contributors may be used to endorse or promote products derived from
16  * this software without specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29  */
30
31 #include "config.h"
32
33 #include "core/html/track/vtt/VTTTokenizer.h"
34
35 #include "core/xml/parser/MarkupTokenizerInlines.h"
36 #include "wtf/text/StringBuilder.h"
37 #include "wtf/unicode/CharacterNames.h"
38
39 namespace WebCore {
40
41 #define WEBVTT_BEGIN_STATE(stateName) case stateName: stateName:
42 #define WEBVTT_ADVANCE_TO(stateName)                               \
43     do {                                                           \
44         state = stateName;                                         \
45         ASSERT(!m_input.isEmpty());                                \
46         m_inputStreamPreprocessor.advance(m_input);                \
47         cc = m_inputStreamPreprocessor.nextInputCharacter();       \
48         goto stateName;                                            \
49     } while (false)
50
51 template<unsigned charactersCount>
52 ALWAYS_INLINE bool equalLiteral(const StringBuilder& s, const char (&characters)[charactersCount])
53 {
54     return WTF::equal(s, reinterpret_cast<const LChar*>(characters), charactersCount - 1);
55 }
56
57 static void addNewClass(StringBuilder& classes, const StringBuilder& newClass)
58 {
59     if (!classes.isEmpty())
60         classes.append(' ');
61     classes.append(newClass);
62 }
63
64 inline bool emitToken(VTTToken& resultToken, const VTTToken& token)
65 {
66     resultToken = token;
67     return true;
68 }
69
70 inline bool advanceAndEmitToken(SegmentedString& source, VTTToken& resultToken, const VTTToken& token)
71 {
72     source.advanceAndUpdateLineNumber();
73     return emitToken(resultToken, token);
74 }
75
76 VTTTokenizer::VTTTokenizer(const String& input)
77     : m_input(input)
78     , m_inputStreamPreprocessor(this)
79 {
80     // Append a EOF marker and close the input "stream".
81     ASSERT(!m_input.isClosed());
82     m_input.append(SegmentedString(String(&kEndOfFileMarker, 1)));
83     m_input.close();
84 }
85
86 bool VTTTokenizer::nextToken(VTTToken& token)
87 {
88     if (m_input.isEmpty() || !m_inputStreamPreprocessor.peek(m_input))
89         return false;
90
91     UChar cc = m_inputStreamPreprocessor.nextInputCharacter();
92     if (cc == kEndOfFileMarker) {
93         m_inputStreamPreprocessor.advance(m_input);
94         return false;
95     }
96
97     StringBuilder buffer;
98     StringBuilder result;
99     StringBuilder classes;
100     enum {
101         DataState,
102         EscapeState,
103         TagState,
104         StartTagState,
105         StartTagClassState,
106         StartTagAnnotationState,
107         EndTagState,
108         TimestampTagState,
109     } state = DataState;
110
111     // 4.8.10.13.4 WebVTT cue text tokenizer
112     switch (state) {
113         WEBVTT_BEGIN_STATE(DataState) {
114             if (cc == '&') {
115                 buffer.append(static_cast<LChar>(cc));
116                 WEBVTT_ADVANCE_TO(EscapeState);
117             } else if (cc == '<') {
118                 if (result.isEmpty()) {
119                     WEBVTT_ADVANCE_TO(TagState);
120                 } else {
121                     // We don't want to advance input or perform a state transition - just return a (new) token.
122                     // (On the next call to nextToken we will see '<' again, but take the other branch in this if instead.)
123                     return emitToken(token, VTTToken::StringToken(result.toString()));
124                 }
125             } else if (cc == kEndOfFileMarker) {
126                 return advanceAndEmitToken(m_input, token, VTTToken::StringToken(result.toString()));
127             } else {
128                 result.append(cc);
129                 WEBVTT_ADVANCE_TO(DataState);
130             }
131         }
132         END_STATE()
133
134         WEBVTT_BEGIN_STATE(EscapeState) {
135             if (cc == ';') {
136                 if (equalLiteral(buffer, "&amp")) {
137                     result.append('&');
138                 } else if (equalLiteral(buffer, "&lt")) {
139                     result.append('<');
140                 } else if (equalLiteral(buffer, "&gt")) {
141                     result.append('>');
142                 } else if (equalLiteral(buffer, "&lrm")) {
143                     result.append(leftToRightMark);
144                 } else if (equalLiteral(buffer, "&rlm")) {
145                     result.append(rightToLeftMark);
146                 } else if (equalLiteral(buffer, "&nbsp")) {
147                     result.append(noBreakSpace);
148                 } else {
149                     buffer.append(static_cast<LChar>(cc));
150                     result.append(buffer);
151                 }
152                 buffer.clear();
153                 WEBVTT_ADVANCE_TO(DataState);
154             } else if (isASCIIAlphanumeric(cc)) {
155                 buffer.append(static_cast<LChar>(cc));
156                 WEBVTT_ADVANCE_TO(EscapeState);
157             } else if (cc == '<') {
158                 result.append(buffer);
159                 return emitToken(token, VTTToken::StringToken(result.toString()));
160             } else if (cc == kEndOfFileMarker) {
161                 result.append(buffer);
162                 return advanceAndEmitToken(m_input, token, VTTToken::StringToken(result.toString()));
163             } else {
164                 result.append(buffer);
165                 buffer.clear();
166
167                 if (cc == '&') {
168                     buffer.append(static_cast<LChar>(cc));
169                     WEBVTT_ADVANCE_TO(EscapeState);
170                 }
171                 result.append(cc);
172                 WEBVTT_ADVANCE_TO(DataState);
173             }
174         }
175         END_STATE()
176
177         WEBVTT_BEGIN_STATE(TagState) {
178             if (isTokenizerWhitespace(cc)) {
179                 ASSERT(result.isEmpty());
180                 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
181             } else if (cc == '.') {
182                 ASSERT(result.isEmpty());
183                 WEBVTT_ADVANCE_TO(StartTagClassState);
184             } else if (cc == '/') {
185                 WEBVTT_ADVANCE_TO(EndTagState);
186             } else if (WTF::isASCIIDigit(cc)) {
187                 result.append(cc);
188                 WEBVTT_ADVANCE_TO(TimestampTagState);
189             } else if (cc == '>' || cc == kEndOfFileMarker) {
190                 ASSERT(result.isEmpty());
191                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString()));
192             } else {
193                 result.append(cc);
194                 WEBVTT_ADVANCE_TO(StartTagState);
195             }
196         }
197         END_STATE()
198
199         WEBVTT_BEGIN_STATE(StartTagState) {
200             if (isTokenizerWhitespace(cc)) {
201                 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
202             } else if (cc == '.') {
203                 WEBVTT_ADVANCE_TO(StartTagClassState);
204             } else if (cc == '>' || cc == kEndOfFileMarker) {
205                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString()));
206             } else {
207                 result.append(cc);
208                 WEBVTT_ADVANCE_TO(StartTagState);
209             }
210         }
211         END_STATE()
212
213         WEBVTT_BEGIN_STATE(StartTagClassState) {
214             if (isTokenizerWhitespace(cc)) {
215                 addNewClass(classes, buffer);
216                 buffer.clear();
217                 WEBVTT_ADVANCE_TO(StartTagAnnotationState);
218             } else if (cc == '.') {
219                 addNewClass(classes, buffer);
220                 buffer.clear();
221                 WEBVTT_ADVANCE_TO(StartTagClassState);
222             } else if (cc == '>' || cc == kEndOfFileMarker) {
223                 addNewClass(classes, buffer);
224                 buffer.clear();
225                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString(), classes.toAtomicString()));
226             } else {
227                 buffer.append(cc);
228                 WEBVTT_ADVANCE_TO(StartTagClassState);
229             }
230         }
231         END_STATE()
232
233         WEBVTT_BEGIN_STATE(StartTagAnnotationState) {
234             if (cc == '>' || cc == kEndOfFileMarker) {
235                 return advanceAndEmitToken(m_input, token, VTTToken::StartTag(result.toString(), classes.toAtomicString(), buffer.toAtomicString()));
236             }
237             buffer.append(cc);
238             WEBVTT_ADVANCE_TO(StartTagAnnotationState);
239         }
240         END_STATE()
241
242         WEBVTT_BEGIN_STATE(EndTagState) {
243             if (cc == '>' || cc == kEndOfFileMarker)
244                 return advanceAndEmitToken(m_input, token, VTTToken::EndTag(result.toString()));
245             result.append(cc);
246             WEBVTT_ADVANCE_TO(EndTagState);
247         }
248         END_STATE()
249
250         WEBVTT_BEGIN_STATE(TimestampTagState) {
251             if (cc == '>' || cc == kEndOfFileMarker)
252                 return advanceAndEmitToken(m_input, token, VTTToken::TimestampTag(result.toString()));
253             result.append(cc);
254             WEBVTT_ADVANCE_TO(TimestampTagState);
255         }
256         END_STATE()
257
258     }
259
260     ASSERT_NOT_REACHED();
261     return false;
262 }
263
264 }
265