+2010-12-25 Peter Rybin <peter.rybin@gmail.com>
+
+ Reviewed by Adam Barth.
+
+ Adds currentColumn (and currentLine) to SegmentedString. Switches
+ HTMLTreeBuilder to HTMLDocumentParser::textPosition when it
+ needs position for a <script> tag.
+
+ SegmentedString should provide column position
+ https://bugs.webkit.org/show_bug.cgi?id=51311
+
+ * html/parser/HTMLDocumentParser.cpp:
+ (WebCore::HTMLDocumentParser::HTMLDocumentParser):
+ (WebCore::HTMLDocumentParser::textPosition):
+ * html/parser/HTMLInputStream.h:
+ (WebCore::HTMLInputStream::current):
+ (WebCore::InsertionPointRecord::InsertionPointRecord):
+ (WebCore::InsertionPointRecord::~InsertionPointRecord):
+ * html/parser/HTMLTreeBuilder.cpp:
+ (WebCore::HTMLTreeBuilder::HTMLTreeBuilder):
+ (WebCore::HTMLTreeBuilder::processScriptStartTag):
+ * html/parser/HTMLTreeBuilder.h:
+ (WebCore::HTMLTreeBuilder::create):
+ * platform/text/SegmentedString.cpp:
+ (WebCore::SegmentedString::operator=):
+ (WebCore::SegmentedString::numberOfCharactersConsumedSlow):
+ (WebCore::SegmentedString::advanceSlowCase):
+ (WebCore::SegmentedString::currentLine):
+ (WebCore::SegmentedString::currentColumn):
+ (WebCore::SegmentedString::setCurrentPosition):
+ * platform/text/SegmentedString.h:
+ (WebCore::SegmentedString::SegmentedString):
+ (WebCore::SegmentedString::advancePastNewline):
+ (WebCore::SegmentedString::advance):
+ (WebCore::SegmentedString::numberOfCharactersConsumed):
+
2010-12-25 Andreas Kling <kling@webkit.org>
Reviewed by Kenneth Rohde Christiansen.
: ScriptableDocumentParser(document)
, m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(document)))
, m_scriptRunner(HTMLScriptRunner::create(document, this))
- , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), document, reportErrors, usePreHTML5ParserQuirks(document)))
+ , m_treeBuilder(HTMLTreeBuilder::create(this, document, reportErrors, usePreHTML5ParserQuirks(document)))
, m_parserScheduler(HTMLParserScheduler::create(this))
, m_endWasDelayed(false)
, m_writeNestingLevel(0)
HTMLDocumentParser::HTMLDocumentParser(DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission)
: ScriptableDocumentParser(fragment->document())
, m_tokenizer(HTMLTokenizer::create(usePreHTML5ParserQuirks(fragment->document())))
- , m_treeBuilder(HTMLTreeBuilder::create(m_tokenizer.get(), fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document())))
+ , m_treeBuilder(HTMLTreeBuilder::create(this, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks(fragment->document())))
, m_endWasDelayed(false)
, m_writeNestingLevel(0)
{
TextPosition0 HTMLDocumentParser::textPosition() const
{
- int lineZeroBased = m_tokenizer->lineNumber();
- int columnOneBased = m_tokenizer->columnNumber();
+ const SegmentedString& currentString = m_input.current();
+ WTF::ZeroBasedNumber line = currentString.currentLine();
+ WTF::ZeroBasedNumber column = currentString.currentColumn();
+ ASSERT(m_tokenizer->lineNumber() == line.zeroBasedInt());
- return TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(lineZeroBased),
- WTF::OneBasedNumber::fromOneBasedInt(columnOneBased).convertToZeroBased());
+ return TextPosition0(line, column);
}
bool HTMLDocumentParser::isWaitingForScripts() const
static bool usePreHTML5ParserQuirks(Document*);
+ HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
+
+ virtual TextPosition0 textPosition() const;
+
protected:
virtual void insert(const SegmentedString&);
virtual void append(const SegmentedString&);
HTMLDocumentParser(HTMLDocument*, bool reportErrors);
HTMLDocumentParser(DocumentFragment*, Element* contextElement, FragmentScriptingPermission);
- HTMLTokenizer* tokenizer() const { return m_tokenizer.get(); }
HTMLTreeBuilder* treeBuilder() const { return m_treeBuilder.get(); }
private:
virtual bool isExecutingScript() const;
virtual void executeScriptsWaitingForStylesheets();
virtual int lineNumber() const;
- virtual TextPosition0 textPosition() const;
// HTMLScriptRunnerHost
virtual void watchForLoad(CachedResource*);
}
SegmentedString& current() { return m_first; }
+ const SegmentedString& current() const { return m_first; }
void splitInto(SegmentedString& next)
{
explicit InsertionPointRecord(HTMLInputStream& inputStream)
: m_inputStream(&inputStream)
{
+ m_line = m_inputStream->current().currentLine();
+ m_column = m_inputStream->current().currentColumn();
m_inputStream->splitInto(m_next);
+ // We 'fork' current position and use it for the generated script part.
+ // This is a bit weird, because generated part does not have positions within an HTML document.
+ m_inputStream->current().setCurrentPosition(m_line, m_column, 0);
}
~InsertionPointRecord()
{
+ // Some inserted text may have remained in input stream. E.g. if script has written "&" or "<table",
+ // it stays in buffer because it cannot be properly tokenized before we see next part.
+ int unparsedRemainderLength = m_inputStream->current().length();
m_inputStream->mergeFrom(m_next);
+ // We restore position for the character that goes right after unparsed remainder.
+ m_inputStream->current().setCurrentPosition(m_line, m_column, unparsedRemainderLength);
}
private:
HTMLInputStream* m_inputStream;
SegmentedString m_next;
+ WTF::ZeroBasedNumber m_line;
+ WTF::ZeroBasedNumber m_column;
};
}
#include "DocumentType.h"
#include "Frame.h"
#include "HTMLDocument.h"
+#include "HTMLDocumentParser.h"
#include "HTMLElementFactory.h"
#include "HTMLFormElement.h"
#include "HTMLHtmlElement.h"
};
-HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
: m_framesetOk(true)
, m_document(document)
, m_tree(document, FragmentScriptingAllowed, false)
, m_isPaused(false)
, m_insertionMode(InitialMode)
, m_originalInsertionMode(InitialMode)
- , m_tokenizer(tokenizer)
+ , m_parser(parser)
, m_scriptToProcessStartPosition(uninitializedPositionValue1())
, m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
, m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
// FIXME: Member variables should be grouped into self-initializing structs to
// minimize code duplication between these constructors.
-HTMLTreeBuilder::HTMLTreeBuilder(HTMLTokenizer* tokenizer, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
+HTMLTreeBuilder::HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
: m_framesetOk(true)
, m_fragmentContext(fragment, contextElement, scriptingPermission)
, m_document(m_fragmentContext.document())
, m_isPaused(false)
, m_insertionMode(InitialMode)
, m_originalInsertionMode(InitialMode)
- , m_tokenizer(tokenizer)
+ , m_parser(parser)
, m_scriptToProcessStartPosition(uninitializedPositionValue1())
, m_lastScriptElementStartPosition(TextPosition0::belowRangePosition())
, m_usePreHTML5ParserQuirks(usePreHTML5ParserQuirks)
// Swallowing U+0000 characters isn't in the HTML5 spec, but turning all
// the U+0000 characters into replacement characters has compatibility
// problems.
- m_tokenizer->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
- m_tokenizer->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
+ m_parser->tokenizer()->setForceNullCharacterReplacement(m_insertionMode == TextMode || m_insertionMode == InForeignContentMode);
+ m_parser->tokenizer()->setShouldAllowCDATA(m_insertionMode == InForeignContentMode && m_tree.currentElement()->namespaceURI() != xhtmlNamespaceURI);
}
void HTMLTreeBuilder::processToken(AtomicHTMLToken& token)
if (token.name() == preTag || token.name() == listingTag) {
processFakePEndTagIfPInButtonScope();
m_tree.insertHTMLElement(token);
- m_tokenizer->setSkipLeadingNewLineForListing(true);
+ m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
m_framesetOk = false;
return;
}
if (token.name() == plaintextTag) {
processFakePEndTagIfPInButtonScope();
m_tree.insertHTMLElement(token);
- m_tokenizer->setState(HTMLTokenizer::PLAINTEXTState);
+ m_parser->tokenizer()->setState(HTMLTokenizer::PLAINTEXTState);
return;
}
if (token.name() == buttonTag) {
}
if (token.name() == textareaTag) {
m_tree.insertHTMLElement(token);
- m_tokenizer->setSkipLeadingNewLineForListing(true);
- m_tokenizer->setState(HTMLTokenizer::RCDATAState);
+ m_parser->tokenizer()->setSkipLeadingNewLineForListing(true);
+ m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
m_originalInsertionMode = m_insertionMode;
m_framesetOk = false;
setInsertionMode(TextMode);
// self-closing script tag was encountered and pre-HTML5 parser
// quirks are enabled. We must set the tokenizer's state to
// DataState explicitly if the tokenizer didn't have a chance to.
- ASSERT(m_tokenizer->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
- m_tokenizer->setState(HTMLTokenizer::DataState);
+ ASSERT(m_parser->tokenizer()->state() == HTMLTokenizer::DataState || m_usePreHTML5ParserQuirks);
+ m_parser->tokenizer()->setState(HTMLTokenizer::DataState);
return;
}
m_tree.openElements()->pop();
{
ASSERT(token.type() == HTMLToken::StartTag);
m_tree.insertHTMLElement(token);
- m_tokenizer->setState(HTMLTokenizer::RCDATAState);
+ m_parser->tokenizer()->setState(HTMLTokenizer::RCDATAState);
m_originalInsertionMode = m_insertionMode;
setInsertionMode(TextMode);
}
{
ASSERT(token.type() == HTMLToken::StartTag);
m_tree.insertHTMLElement(token);
- m_tokenizer->setState(HTMLTokenizer::RAWTEXTState);
+ m_parser->tokenizer()->setState(HTMLTokenizer::RAWTEXTState);
m_originalInsertionMode = m_insertionMode;
setInsertionMode(TextMode);
}
{
ASSERT(token.type() == HTMLToken::StartTag);
m_tree.insertScriptElement(token);
- m_tokenizer->setState(HTMLTokenizer::ScriptDataState);
+ m_parser->tokenizer()->setState(HTMLTokenizer::ScriptDataState);
m_originalInsertionMode = m_insertionMode;
- TextPosition0 position = TextPosition0(WTF::ZeroBasedNumber::fromZeroBasedInt(m_tokenizer->lineNumber()), WTF::ZeroBasedNumber::base());
+ TextPosition0 position = m_parser->textPosition();
+
+ ASSERT(position.m_line.zeroBasedInt() == m_parser->tokenizer()->lineNumber());
+
m_lastScriptElementStartPosition = position;
setInsertionMode(TextMode);
class HTMLToken;
class HTMLDocument;
class Node;
+class HTMLDocumentParser;
class HTMLTreeBuilder : public Noncopyable {
public:
- static PassOwnPtr<HTMLTreeBuilder> create(HTMLTokenizer* tokenizer, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
+ static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, HTMLDocument* document, bool reportErrors, bool usePreHTML5ParserQuirks)
{
- return adoptPtr(new HTMLTreeBuilder(tokenizer, document, reportErrors, usePreHTML5ParserQuirks));
+ return adoptPtr(new HTMLTreeBuilder(parser, document, reportErrors, usePreHTML5ParserQuirks));
}
- static PassOwnPtr<HTMLTreeBuilder> create(HTMLTokenizer* tokenizer, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
+ static PassOwnPtr<HTMLTreeBuilder> create(HTMLDocumentParser* parser, DocumentFragment* fragment, Element* contextElement, FragmentScriptingPermission scriptingPermission, bool usePreHTML5ParserQuirks)
{
- return adoptPtr(new HTMLTreeBuilder(tokenizer, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
+ return adoptPtr(new HTMLTreeBuilder(parser, fragment, contextElement, scriptingPermission, usePreHTML5ParserQuirks));
}
~HTMLTreeBuilder();
AfterAfterFramesetMode,
};
- HTMLTreeBuilder(HTMLTokenizer*, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
- HTMLTreeBuilder(HTMLTokenizer*, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
+ HTMLTreeBuilder(HTMLDocumentParser* parser, HTMLDocument*, bool reportErrors, bool usePreHTML5ParserQuirks);
+ HTMLTreeBuilder(HTMLDocumentParser* parser, DocumentFragment*, Element* contextElement, FragmentScriptingPermission, bool usePreHTML5ParserQuirks);
void processToken(AtomicHTMLToken&);
// http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#pending-table-character-tokens
Vector<UChar> m_pendingTableCharacters;
- // HTML5 spec requires that we be able to change the state of the tokenizer
- // from within parser actions.
- HTMLTokenizer* m_tokenizer;
+ // We access parser because HTML5 spec requires that we be able to change the state of the tokenizer
+ // from within parser actions. We also need it to track the current position.
+ HTMLDocumentParser* m_parser;
RefPtr<Element> m_scriptToProcess; // <script> tag which needs processing before resuming the parser.
TextPosition1 m_scriptToProcessStartPosition; // Starting line number of the script tag needing processing.
m_currentChar = other.m_currentChar;
m_closed = other.m_closed;
m_numberOfCharactersConsumedPriorToCurrentString = other.m_numberOfCharactersConsumedPriorToCurrentString;
+ m_numberOfCharactersConsumedPriorToCurrentLine = other.m_numberOfCharactersConsumedPriorToCurrentLine;
+ m_currentLine = other.m_currentLine;
+
return *this;
}
}
}
+int SegmentedString::numberOfCharactersConsumedSlow() const
+{
+ int result = m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
+ if (m_pushedChar1) {
+ --result;
+ if (m_pushedChar2)
+ --result;
+ }
+ return result;
+}
+
String SegmentedString::toString() const
{
String result;
m_pushedChar1 = m_pushedChar2;
m_pushedChar2 = 0;
} else if (m_currentString.m_current) {
- if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers())
+ if (*m_currentString.m_current++ == '\n' && m_currentString.doNotExcludeLineNumbers()) {
++lineNumber;
+ ++m_currentLine;
+ m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
+ }
if (--m_currentString.m_length == 0)
advanceSubstring();
}
m_currentChar = m_pushedChar1 ? &m_pushedChar1 : m_currentString.m_current;
}
+WTF::ZeroBasedNumber SegmentedString::currentLine() const
+{
+ return WTF::ZeroBasedNumber::fromZeroBasedInt(m_currentLine);
+}
+
+WTF::ZeroBasedNumber SegmentedString::currentColumn() const
+{
+ int zeroBasedColumn = numberOfCharactersConsumedSlow() - m_numberOfCharactersConsumedPriorToCurrentLine;
+ return WTF::ZeroBasedNumber::fromZeroBasedInt(zeroBasedColumn);
+}
+
+void SegmentedString::setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength)
+{
+ m_currentLine = line.zeroBasedInt();
+ m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumedSlow() + prologLength - columnAftreProlog.zeroBasedInt();
+}
+
}
#include "PlatformString.h"
#include <wtf/Deque.h>
+#include <wtf/text/TextPosition.h>
namespace WebCore {
, m_pushedChar2(0)
, m_currentChar(0)
, m_numberOfCharactersConsumedPriorToCurrentString(0)
+ , m_numberOfCharactersConsumedPriorToCurrentLine(0)
+ , m_currentLine(0)
, m_composite(false)
, m_closed(false)
{
, m_currentString(str)
, m_currentChar(m_currentString.m_current)
, m_numberOfCharactersConsumedPriorToCurrentString(0)
+ , m_numberOfCharactersConsumedPriorToCurrentLine(0)
+ , m_currentLine(0)
, m_composite(false)
, m_closed(false)
{
{
ASSERT(*current() == '\n');
if (!m_pushedChar1 && m_currentString.m_length > 1) {
- lineNumber += m_currentString.doNotExcludeLineNumbers();
+ int newLineFlag = m_currentString.doNotExcludeLineNumbers();
+ lineNumber += newLineFlag;
+ m_currentLine += newLineFlag;
+ if (newLineFlag)
+ m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed();
--m_currentString.m_length;
m_currentChar = ++m_currentString.m_current;
return;
void advance(int& lineNumber)
{
if (!m_pushedChar1 && m_currentString.m_length > 1) {
- lineNumber += (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
+ int newLineFlag = (*m_currentString.m_current == '\n') & m_currentString.doNotExcludeLineNumbers();
+ lineNumber += newLineFlag;
+ m_currentLine += newLineFlag;
+ if (newLineFlag)
+ m_numberOfCharactersConsumedPriorToCurrentLine = numberOfCharactersConsumed() + 1;
--m_currentString.m_length;
m_currentChar = ++m_currentString.m_current;
return;
bool escaped() const { return m_pushedChar1; }
- int numberOfCharactersConsumed()
+ int numberOfCharactersConsumed() const
{
// We don't currently handle the case when there are pushed character.
ASSERT(!m_pushedChar1);
return m_numberOfCharactersConsumedPriorToCurrentString + m_currentString.numberOfCharactersConsumed();
}
+ int numberOfCharactersConsumedSlow() const;
+
String toString() const;
const UChar& operator*() const { return *current(); }
const UChar* operator->() const { return current(); }
+
+ // The method is moderately slow, comparing to currentLine method.
+ WTF::ZeroBasedNumber currentColumn() const;
+ WTF::ZeroBasedNumber currentLine() const;
+ // Sets value of line/column variables. Column is specified indirectly by a parameter columnAftreProlog
+ // which is a value of column that we should get after a prolog (first prologLength characters) has been consumed.
+ void setCurrentPosition(WTF::ZeroBasedNumber line, WTF::ZeroBasedNumber columnAftreProlog, int prologLength);
+
private:
void append(const SegmentedSubstring&);
void prepend(const SegmentedSubstring&);
SegmentedSubstring m_currentString;
const UChar* m_currentChar;
int m_numberOfCharactersConsumedPriorToCurrentString;
+ int m_numberOfCharactersConsumedPriorToCurrentLine;
+ int m_currentLine;
Deque<SegmentedSubstring> m_substrings;
bool m_composite;
bool m_closed;