8222 lines
332 KiB
JavaScript
8222 lines
332 KiB
JavaScript
|
const UNDEFINED_CODE_POINTS = new Set([
|
||
|
65534, 65535, 131070, 131071, 196606, 196607, 262142, 262143, 327678, 327679, 393214,
|
||
|
393215, 458750, 458751, 524286, 524287, 589822, 589823, 655358, 655359, 720894,
|
||
|
720895, 786430, 786431, 851966, 851967, 917502, 917503, 983038, 983039, 1048574,
|
||
|
1048575, 1114110, 1114111,
|
||
|
]);
|
||
|
const REPLACEMENT_CHARACTER = '\uFFFD';
|
||
|
var CODE_POINTS;
|
||
|
(function (CODE_POINTS) {
|
||
|
CODE_POINTS[CODE_POINTS["EOF"] = -1] = "EOF";
|
||
|
CODE_POINTS[CODE_POINTS["NULL"] = 0] = "NULL";
|
||
|
CODE_POINTS[CODE_POINTS["TABULATION"] = 9] = "TABULATION";
|
||
|
CODE_POINTS[CODE_POINTS["CARRIAGE_RETURN"] = 13] = "CARRIAGE_RETURN";
|
||
|
CODE_POINTS[CODE_POINTS["LINE_FEED"] = 10] = "LINE_FEED";
|
||
|
CODE_POINTS[CODE_POINTS["FORM_FEED"] = 12] = "FORM_FEED";
|
||
|
CODE_POINTS[CODE_POINTS["SPACE"] = 32] = "SPACE";
|
||
|
CODE_POINTS[CODE_POINTS["EXCLAMATION_MARK"] = 33] = "EXCLAMATION_MARK";
|
||
|
CODE_POINTS[CODE_POINTS["QUOTATION_MARK"] = 34] = "QUOTATION_MARK";
|
||
|
CODE_POINTS[CODE_POINTS["AMPERSAND"] = 38] = "AMPERSAND";
|
||
|
CODE_POINTS[CODE_POINTS["APOSTROPHE"] = 39] = "APOSTROPHE";
|
||
|
CODE_POINTS[CODE_POINTS["HYPHEN_MINUS"] = 45] = "HYPHEN_MINUS";
|
||
|
CODE_POINTS[CODE_POINTS["SOLIDUS"] = 47] = "SOLIDUS";
|
||
|
CODE_POINTS[CODE_POINTS["DIGIT_0"] = 48] = "DIGIT_0";
|
||
|
CODE_POINTS[CODE_POINTS["DIGIT_9"] = 57] = "DIGIT_9";
|
||
|
CODE_POINTS[CODE_POINTS["SEMICOLON"] = 59] = "SEMICOLON";
|
||
|
CODE_POINTS[CODE_POINTS["LESS_THAN_SIGN"] = 60] = "LESS_THAN_SIGN";
|
||
|
CODE_POINTS[CODE_POINTS["EQUALS_SIGN"] = 61] = "EQUALS_SIGN";
|
||
|
CODE_POINTS[CODE_POINTS["GREATER_THAN_SIGN"] = 62] = "GREATER_THAN_SIGN";
|
||
|
CODE_POINTS[CODE_POINTS["QUESTION_MARK"] = 63] = "QUESTION_MARK";
|
||
|
CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_A"] = 65] = "LATIN_CAPITAL_A";
|
||
|
CODE_POINTS[CODE_POINTS["LATIN_CAPITAL_Z"] = 90] = "LATIN_CAPITAL_Z";
|
||
|
CODE_POINTS[CODE_POINTS["RIGHT_SQUARE_BRACKET"] = 93] = "RIGHT_SQUARE_BRACKET";
|
||
|
CODE_POINTS[CODE_POINTS["GRAVE_ACCENT"] = 96] = "GRAVE_ACCENT";
|
||
|
CODE_POINTS[CODE_POINTS["LATIN_SMALL_A"] = 97] = "LATIN_SMALL_A";
|
||
|
CODE_POINTS[CODE_POINTS["LATIN_SMALL_Z"] = 122] = "LATIN_SMALL_Z";
|
||
|
})(CODE_POINTS || (CODE_POINTS = {}));
|
||
|
const SEQUENCES = {
|
||
|
DASH_DASH: '--',
|
||
|
CDATA_START: '[CDATA[',
|
||
|
DOCTYPE: 'doctype',
|
||
|
SCRIPT: 'script',
|
||
|
PUBLIC: 'public',
|
||
|
SYSTEM: 'system',
|
||
|
};
|
||
|
//Surrogates
|
||
|
function isSurrogate(cp) {
|
||
|
return cp >= 55296 && cp <= 57343;
|
||
|
}
|
||
|
function isSurrogatePair(cp) {
|
||
|
return cp >= 56320 && cp <= 57343;
|
||
|
}
|
||
|
function getSurrogatePairCodePoint(cp1, cp2) {
|
||
|
return (cp1 - 55296) * 1024 + 9216 + cp2;
|
||
|
}
|
||
|
//NOTE: excluding NULL and ASCII whitespace
|
||
|
function isControlCodePoint(cp) {
|
||
|
return ((cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) ||
|
||
|
(cp >= 0x7f && cp <= 0x9f));
|
||
|
}
|
||
|
function isUndefinedCodePoint(cp) {
|
||
|
return (cp >= 64976 && cp <= 65007) || UNDEFINED_CODE_POINTS.has(cp);
|
||
|
}
|
||
|
|
||
|
var ERR;
|
||
|
(function (ERR) {
|
||
|
ERR["controlCharacterInInputStream"] = "control-character-in-input-stream";
|
||
|
ERR["noncharacterInInputStream"] = "noncharacter-in-input-stream";
|
||
|
ERR["surrogateInInputStream"] = "surrogate-in-input-stream";
|
||
|
ERR["nonVoidHtmlElementStartTagWithTrailingSolidus"] = "non-void-html-element-start-tag-with-trailing-solidus";
|
||
|
ERR["endTagWithAttributes"] = "end-tag-with-attributes";
|
||
|
ERR["endTagWithTrailingSolidus"] = "end-tag-with-trailing-solidus";
|
||
|
ERR["unexpectedSolidusInTag"] = "unexpected-solidus-in-tag";
|
||
|
ERR["unexpectedNullCharacter"] = "unexpected-null-character";
|
||
|
ERR["unexpectedQuestionMarkInsteadOfTagName"] = "unexpected-question-mark-instead-of-tag-name";
|
||
|
ERR["invalidFirstCharacterOfTagName"] = "invalid-first-character-of-tag-name";
|
||
|
ERR["unexpectedEqualsSignBeforeAttributeName"] = "unexpected-equals-sign-before-attribute-name";
|
||
|
ERR["missingEndTagName"] = "missing-end-tag-name";
|
||
|
ERR["unexpectedCharacterInAttributeName"] = "unexpected-character-in-attribute-name";
|
||
|
ERR["unknownNamedCharacterReference"] = "unknown-named-character-reference";
|
||
|
ERR["missingSemicolonAfterCharacterReference"] = "missing-semicolon-after-character-reference";
|
||
|
ERR["unexpectedCharacterAfterDoctypeSystemIdentifier"] = "unexpected-character-after-doctype-system-identifier";
|
||
|
ERR["unexpectedCharacterInUnquotedAttributeValue"] = "unexpected-character-in-unquoted-attribute-value";
|
||
|
ERR["eofBeforeTagName"] = "eof-before-tag-name";
|
||
|
ERR["eofInTag"] = "eof-in-tag";
|
||
|
ERR["missingAttributeValue"] = "missing-attribute-value";
|
||
|
ERR["missingWhitespaceBetweenAttributes"] = "missing-whitespace-between-attributes";
|
||
|
ERR["missingWhitespaceAfterDoctypePublicKeyword"] = "missing-whitespace-after-doctype-public-keyword";
|
||
|
ERR["missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers"] = "missing-whitespace-between-doctype-public-and-system-identifiers";
|
||
|
ERR["missingWhitespaceAfterDoctypeSystemKeyword"] = "missing-whitespace-after-doctype-system-keyword";
|
||
|
ERR["missingQuoteBeforeDoctypePublicIdentifier"] = "missing-quote-before-doctype-public-identifier";
|
||
|
ERR["missingQuoteBeforeDoctypeSystemIdentifier"] = "missing-quote-before-doctype-system-identifier";
|
||
|
ERR["missingDoctypePublicIdentifier"] = "missing-doctype-public-identifier";
|
||
|
ERR["missingDoctypeSystemIdentifier"] = "missing-doctype-system-identifier";
|
||
|
ERR["abruptDoctypePublicIdentifier"] = "abrupt-doctype-public-identifier";
|
||
|
ERR["abruptDoctypeSystemIdentifier"] = "abrupt-doctype-system-identifier";
|
||
|
ERR["cdataInHtmlContent"] = "cdata-in-html-content";
|
||
|
ERR["incorrectlyOpenedComment"] = "incorrectly-opened-comment";
|
||
|
ERR["eofInScriptHtmlCommentLikeText"] = "eof-in-script-html-comment-like-text";
|
||
|
ERR["eofInDoctype"] = "eof-in-doctype";
|
||
|
ERR["nestedComment"] = "nested-comment";
|
||
|
ERR["abruptClosingOfEmptyComment"] = "abrupt-closing-of-empty-comment";
|
||
|
ERR["eofInComment"] = "eof-in-comment";
|
||
|
ERR["incorrectlyClosedComment"] = "incorrectly-closed-comment";
|
||
|
ERR["eofInCdata"] = "eof-in-cdata";
|
||
|
ERR["absenceOfDigitsInNumericCharacterReference"] = "absence-of-digits-in-numeric-character-reference";
|
||
|
ERR["nullCharacterReference"] = "null-character-reference";
|
||
|
ERR["surrogateCharacterReference"] = "surrogate-character-reference";
|
||
|
ERR["characterReferenceOutsideUnicodeRange"] = "character-reference-outside-unicode-range";
|
||
|
ERR["controlCharacterReference"] = "control-character-reference";
|
||
|
ERR["noncharacterCharacterReference"] = "noncharacter-character-reference";
|
||
|
ERR["missingWhitespaceBeforeDoctypeName"] = "missing-whitespace-before-doctype-name";
|
||
|
ERR["missingDoctypeName"] = "missing-doctype-name";
|
||
|
ERR["invalidCharacterSequenceAfterDoctypeName"] = "invalid-character-sequence-after-doctype-name";
|
||
|
ERR["duplicateAttribute"] = "duplicate-attribute";
|
||
|
ERR["nonConformingDoctype"] = "non-conforming-doctype";
|
||
|
ERR["missingDoctype"] = "missing-doctype";
|
||
|
ERR["misplacedDoctype"] = "misplaced-doctype";
|
||
|
ERR["endTagWithoutMatchingOpenElement"] = "end-tag-without-matching-open-element";
|
||
|
ERR["closingOfElementWithOpenChildElements"] = "closing-of-element-with-open-child-elements";
|
||
|
ERR["disallowedContentInNoscriptInHead"] = "disallowed-content-in-noscript-in-head";
|
||
|
ERR["openElementsLeftAfterEof"] = "open-elements-left-after-eof";
|
||
|
ERR["abandonedHeadElementChild"] = "abandoned-head-element-child";
|
||
|
ERR["misplacedStartTagForHeadElement"] = "misplaced-start-tag-for-head-element";
|
||
|
ERR["nestedNoscriptInHead"] = "nested-noscript-in-head";
|
||
|
ERR["eofInElementThatCanContainOnlyText"] = "eof-in-element-that-can-contain-only-text";
|
||
|
})(ERR || (ERR = {}));
|
||
|
|
||
|
//Const
|
||
|
const DEFAULT_BUFFER_WATERLINE = 1 << 16;
|
||
|
//Preprocessor
|
||
|
//NOTE: HTML input preprocessing
|
||
|
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/parsing.html#preprocessing-the-input-stream)
|
||
|
class Preprocessor {
|
||
|
constructor(handler) {
|
||
|
this.handler = handler;
|
||
|
this.html = '';
|
||
|
this.pos = -1;
|
||
|
// NOTE: Initial `lastGapPos` is -2, to ensure `col` on initialisation is 0
|
||
|
this.lastGapPos = -2;
|
||
|
this.gapStack = [];
|
||
|
this.skipNextNewLine = false;
|
||
|
this.lastChunkWritten = false;
|
||
|
this.endOfChunkHit = false;
|
||
|
this.bufferWaterline = DEFAULT_BUFFER_WATERLINE;
|
||
|
this.isEol = false;
|
||
|
this.lineStartPos = 0;
|
||
|
this.droppedBufferSize = 0;
|
||
|
this.line = 1;
|
||
|
//NOTE: avoid reporting errors twice on advance/retreat
|
||
|
this.lastErrOffset = -1;
|
||
|
}
|
||
|
/** The column on the current line. If we just saw a gap (eg. a surrogate pair), return the index before. */
|
||
|
get col() {
|
||
|
return this.pos - this.lineStartPos + Number(this.lastGapPos !== this.pos);
|
||
|
}
|
||
|
get offset() {
|
||
|
return this.droppedBufferSize + this.pos;
|
||
|
}
|
||
|
getError(code, cpOffset) {
|
||
|
const { line, col, offset } = this;
|
||
|
const startCol = col + cpOffset;
|
||
|
const startOffset = offset + cpOffset;
|
||
|
return {
|
||
|
code,
|
||
|
startLine: line,
|
||
|
endLine: line,
|
||
|
startCol,
|
||
|
endCol: startCol,
|
||
|
startOffset,
|
||
|
endOffset: startOffset,
|
||
|
};
|
||
|
}
|
||
|
_err(code) {
|
||
|
if (this.handler.onParseError && this.lastErrOffset !== this.offset) {
|
||
|
this.lastErrOffset = this.offset;
|
||
|
this.handler.onParseError(this.getError(code, 0));
|
||
|
}
|
||
|
}
|
||
|
_addGap() {
|
||
|
this.gapStack.push(this.lastGapPos);
|
||
|
this.lastGapPos = this.pos;
|
||
|
}
|
||
|
_processSurrogate(cp) {
|
||
|
//NOTE: try to peek a surrogate pair
|
||
|
if (this.pos !== this.html.length - 1) {
|
||
|
const nextCp = this.html.charCodeAt(this.pos + 1);
|
||
|
if (isSurrogatePair(nextCp)) {
|
||
|
//NOTE: we have a surrogate pair. Peek pair character and recalculate code point.
|
||
|
this.pos++;
|
||
|
//NOTE: add a gap that should be avoided during retreat
|
||
|
this._addGap();
|
||
|
return getSurrogatePairCodePoint(cp, nextCp);
|
||
|
}
|
||
|
}
|
||
|
//NOTE: we are at the end of a chunk, therefore we can't infer the surrogate pair yet.
|
||
|
else if (!this.lastChunkWritten) {
|
||
|
this.endOfChunkHit = true;
|
||
|
return CODE_POINTS.EOF;
|
||
|
}
|
||
|
//NOTE: isolated surrogate
|
||
|
this._err(ERR.surrogateInInputStream);
|
||
|
return cp;
|
||
|
}
|
||
|
willDropParsedChunk() {
|
||
|
return this.pos > this.bufferWaterline;
|
||
|
}
|
||
|
dropParsedChunk() {
|
||
|
if (this.willDropParsedChunk()) {
|
||
|
this.html = this.html.substring(this.pos);
|
||
|
this.lineStartPos -= this.pos;
|
||
|
this.droppedBufferSize += this.pos;
|
||
|
this.pos = 0;
|
||
|
this.lastGapPos = -2;
|
||
|
this.gapStack.length = 0;
|
||
|
}
|
||
|
}
|
||
|
write(chunk, isLastChunk) {
|
||
|
if (this.html.length > 0) {
|
||
|
this.html += chunk;
|
||
|
}
|
||
|
else {
|
||
|
this.html = chunk;
|
||
|
}
|
||
|
this.endOfChunkHit = false;
|
||
|
this.lastChunkWritten = isLastChunk;
|
||
|
}
|
||
|
insertHtmlAtCurrentPos(chunk) {
|
||
|
this.html = this.html.substring(0, this.pos + 1) + chunk + this.html.substring(this.pos + 1);
|
||
|
this.endOfChunkHit = false;
|
||
|
}
|
||
|
startsWith(pattern, caseSensitive) {
|
||
|
// Check if our buffer has enough characters
|
||
|
if (this.pos + pattern.length > this.html.length) {
|
||
|
this.endOfChunkHit = !this.lastChunkWritten;
|
||
|
return false;
|
||
|
}
|
||
|
if (caseSensitive) {
|
||
|
return this.html.startsWith(pattern, this.pos);
|
||
|
}
|
||
|
for (let i = 0; i < pattern.length; i++) {
|
||
|
const cp = this.html.charCodeAt(this.pos + i) | 0x20;
|
||
|
if (cp !== pattern.charCodeAt(i)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
peek(offset) {
|
||
|
const pos = this.pos + offset;
|
||
|
if (pos >= this.html.length) {
|
||
|
this.endOfChunkHit = !this.lastChunkWritten;
|
||
|
return CODE_POINTS.EOF;
|
||
|
}
|
||
|
const code = this.html.charCodeAt(pos);
|
||
|
return code === CODE_POINTS.CARRIAGE_RETURN ? CODE_POINTS.LINE_FEED : code;
|
||
|
}
|
||
|
advance() {
|
||
|
this.pos++;
|
||
|
//NOTE: LF should be in the last column of the line
|
||
|
if (this.isEol) {
|
||
|
this.isEol = false;
|
||
|
this.line++;
|
||
|
this.lineStartPos = this.pos;
|
||
|
}
|
||
|
if (this.pos >= this.html.length) {
|
||
|
this.endOfChunkHit = !this.lastChunkWritten;
|
||
|
return CODE_POINTS.EOF;
|
||
|
}
|
||
|
let cp = this.html.charCodeAt(this.pos);
|
||
|
//NOTE: all U+000D CARRIAGE RETURN (CR) characters must be converted to U+000A LINE FEED (LF) characters
|
||
|
if (cp === CODE_POINTS.CARRIAGE_RETURN) {
|
||
|
this.isEol = true;
|
||
|
this.skipNextNewLine = true;
|
||
|
return CODE_POINTS.LINE_FEED;
|
||
|
}
|
||
|
//NOTE: any U+000A LINE FEED (LF) characters that immediately follow a U+000D CARRIAGE RETURN (CR) character
|
||
|
//must be ignored.
|
||
|
if (cp === CODE_POINTS.LINE_FEED) {
|
||
|
this.isEol = true;
|
||
|
if (this.skipNextNewLine) {
|
||
|
// `line` will be bumped again in the recursive call.
|
||
|
this.line--;
|
||
|
this.skipNextNewLine = false;
|
||
|
this._addGap();
|
||
|
return this.advance();
|
||
|
}
|
||
|
}
|
||
|
this.skipNextNewLine = false;
|
||
|
if (isSurrogate(cp)) {
|
||
|
cp = this._processSurrogate(cp);
|
||
|
}
|
||
|
//OPTIMIZATION: first check if code point is in the common allowed
|
||
|
//range (ASCII alphanumeric, whitespaces, big chunk of BMP)
|
||
|
//before going into detailed performance cost validation.
|
||
|
const isCommonValidRange = this.handler.onParseError === null ||
|
||
|
(cp > 0x1f && cp < 0x7f) ||
|
||
|
cp === CODE_POINTS.LINE_FEED ||
|
||
|
cp === CODE_POINTS.CARRIAGE_RETURN ||
|
||
|
(cp > 0x9f && cp < 64976);
|
||
|
if (!isCommonValidRange) {
|
||
|
this._checkForProblematicCharacters(cp);
|
||
|
}
|
||
|
return cp;
|
||
|
}
|
||
|
_checkForProblematicCharacters(cp) {
|
||
|
if (isControlCodePoint(cp)) {
|
||
|
this._err(ERR.controlCharacterInInputStream);
|
||
|
}
|
||
|
else if (isUndefinedCodePoint(cp)) {
|
||
|
this._err(ERR.noncharacterInInputStream);
|
||
|
}
|
||
|
}
|
||
|
retreat(count) {
|
||
|
this.pos -= count;
|
||
|
while (this.pos < this.lastGapPos) {
|
||
|
this.lastGapPos = this.gapStack.pop();
|
||
|
this.pos--;
|
||
|
}
|
||
|
this.isEol = false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
var TokenType;
|
||
|
(function (TokenType) {
|
||
|
TokenType[TokenType["CHARACTER"] = 0] = "CHARACTER";
|
||
|
TokenType[TokenType["NULL_CHARACTER"] = 1] = "NULL_CHARACTER";
|
||
|
TokenType[TokenType["WHITESPACE_CHARACTER"] = 2] = "WHITESPACE_CHARACTER";
|
||
|
TokenType[TokenType["START_TAG"] = 3] = "START_TAG";
|
||
|
TokenType[TokenType["END_TAG"] = 4] = "END_TAG";
|
||
|
TokenType[TokenType["COMMENT"] = 5] = "COMMENT";
|
||
|
TokenType[TokenType["DOCTYPE"] = 6] = "DOCTYPE";
|
||
|
TokenType[TokenType["EOF"] = 7] = "EOF";
|
||
|
TokenType[TokenType["HIBERNATION"] = 8] = "HIBERNATION";
|
||
|
})(TokenType || (TokenType = {}));
|
||
|
function getTokenAttr(token, attrName) {
|
||
|
for (let i = token.attrs.length - 1; i >= 0; i--) {
|
||
|
if (token.attrs[i].name === attrName) {
|
||
|
return token.attrs[i].value;
|
||
|
}
|
||
|
}
|
||
|
return null;
|
||
|
}
|
||
|
|
||
|
// Generated using scripts/write-decode-map.ts
|
||
|
var htmlDecodeTree = new Uint16Array(
|
||
|
// prettier-ignore
|
||
|
"\u1d41<\xd5\u0131\u028a\u049d\u057b\u05d0\u0675\u06de\u07a2\u07d6\u080f\u0a4a\u0a91\u0da1\u0e6d\u0f09\u0f26\u10ca\u1228\u12e1\u1415\u149d\u14c3\u14df\u1525\0\0\0\0\0\0\u156b\u16cd\u198d\u1c12\u1ddd\u1f7e\u2060\u21b0\u228d\u23c0\u23fb\u2442\u2824\u2912\u2d08\u2e48\u2fce\u3016\u32ba\u3639\u37ac\u38fe\u3a28\u3a71\u3ae0\u3b2e\u0800EMabcfglmnoprstu\\bfms\x7f\x84\x8b\x90\x95\x98\xa6\xb3\xb9\xc8\xcflig\u803b\xc6\u40c6P\u803b&\u4026cute\u803b\xc1\u40c1reve;\u4102\u0100iyx}rc\u803b\xc2\u40c2;\u4410r;\uc000\ud835\udd04rave\u803b\xc0\u40c0pha;\u4391acr;\u4100d;\u6a53\u0100gp\x9d\xa1on;\u4104f;\uc000\ud835\udd38plyFunction;\u6061ing\u803b\xc5\u40c5\u0100cs\xbe\xc3r;\uc000\ud835\udc9cign;\u6254ilde\u803b\xc3\u40c3ml\u803b\xc4\u40c4\u0400aceforsu\xe5\xfb\xfe\u0117\u011c\u0122\u0127\u012a\u0100cr\xea\xf2kslash;\u6216\u0176\xf6\xf8;\u6ae7ed;\u6306y;\u4411\u0180crt\u0105\u010b\u0114ause;\u6235noullis;\u612ca;\u4392r;\uc000\ud835\udd05pf;\uc000\ud835\udd39eve;\u42d8c\xf2\u0113mpeq;\u624e\u0700HOacdefhilorsu\u014d\u0151\u0156\u0180\u019e\u01a2\u01b5\u01b7\u01ba\u01dc\u0215\u0273\u0278\u027ecy;\u4427PY\u803b\xa9\u40a9\u0180cpy\u015d\u0162\u017aute;\u4106\u0100;i\u0167\u0168\u62d2talDifferentialD;\u6145leys;\u612d\u0200aeio\u0189\u018e\u0194\u0198ron;\u410cdil\u803b\xc7\u40c7rc;\u4108nint;\u6230ot;\u410a\u0100dn\u01a7\u01adilla;\u40b8terDot;\u40b7\xf2\u017fi;\u43a7rcle\u0200DMPT\u01c7\u01cb\u01d1\u01d6ot;\u6299inus;\u6296lus;\u6295imes;\u6297o\u0100cs\u01e2\u01f8kwiseContourIntegral;\u6232eCurly\u0100DQ\u0203\u020foubleQuote;\u601duote;\u6019\u0200lnpu\u021e\u0228\u0247\u0255on\u0100;e\u0225\u0226\u6237;\u6a74\u0180git\u022f\u0236\u023aruent;\u6261nt;\u622fourIntegral;\u622e\u0100fr\u024c\u024e;\u6102oduct;\u6210nterClockwiseContourIntegral;\u6233oss;\u6a2fcr;\uc000\ud835\udc9ep\u0100;C\u0284\u0285\u62d3ap;\u624d\u0580DJSZacefios\u02a0\u02ac\u02b0\u02b4\u02b8\u02cb\u02d7\u02e1\u02e6\u0333\u048d\u0100;o\u0179\u02a5trahd;\u6911cy;\u4402cy;\u4405cy;\u440f\u0180grs\u02bf\u02c4\u02c7ger;\u6021r;\u61a1hv;\u6ae4\u0100ay\u02d0\u02d5ron;\u410e;\u4414l\u0100;t\u02dd\u02de\u6207a;\u4394r;\uc000\ud835\udd07\u0100af\u02eb\u0327\u0100cm\u02f0\u0322ritical\u0200ADGT\u0300\u0306\u0316\u031ccute;\u40b4o\u0174\u030b\u030d;\u42d9bleAcute;\u42ddrave;\u4060ilde;\u42dcond;\u62c4ferentialD;\u6146\u0470\u033d\0\0\0\u0342\u0354\0\u0405f;\uc000\ud835\udd3b\u0180;DE\u0348\u0349\u034d\u40a8ot;\u60dcqual;\u6250ble\u0300CDLRUV\u0363\u0372\u0382\u03cf\u03e2\u03f8ontourIntegra\xec\u0239o\u0274\u0379\0\0\u037b\xbb\u0349nArrow;\u61d3\u0100eo\u0387\u03a4ft\u0180ART\u0390\u0396\u03a1rrow;\u61d0ightArrow;\u61d4e\xe5\u02cang\u0100LR\u03ab\u03c4eft\u0100AR\u03b3\u03b9rrow;\u67f8ightArrow;\u67faightArrow;\u67f9ight\u0100AT\u03d8\u03derrow;\u61d2ee;\u62a8p\u0241\u03e9\0\0\u03efrrow;\u61d1ownArrow;\u61d5erticalBar;\u6225n\u0300ABLRTa\u0412\u042a\u0430\u045e\u047f\u037crrow\u0180;BU\u041d\u041e\u0422\u6193ar;\u6913pArrow;\u61f5reve;\u4311eft\u02d2\u043a\0\u0446\0\u0450ightVector;\u6950eeVector;\u695eector\u0100;B\u0459\u045a\u61bdar;\u6956ight\u01d4\u0467\0\u0471eeVector;\u695fector\u0100;B\u047a\u047b\u61c1ar;\u6957ee\u0100;A\u0486\u0487\u62a4rrow;\u61a7\u0100ct\u0492\u0497r;\uc000\ud835\udc9frok;\u4110\u0800NTacdfglmopqstux\u04bd\u04c0\u04c4\u04cb\u04de\u04e2\u04e7\u04ee\u04f5\u0521\u052f\u0536\u0552\u055d\u0560\u0565G;\u414aH\u803b\xd0\u40d0cute\u803b\xc9\u40c9\u0180aiy\u04d2\u04d7\u04dcron;\u411arc\u803b\xca\u40ca;\u442dot;\u4116r;\uc000\ud835\udd08rave\u803b\xc8\u40c8ement;\u6208\u0100ap\u04fa\u04fecr;\u4112ty\u0253\u0506\0\0\u0512mallSquare;\u65fberySmallSquare;\u65ab\u0100gp\u0526\u052aon;\u4118f;\uc000\ud835\udd3csilon;\u4395u\u0100ai\u053c\u0549l\u0100;T\u0542\u0543\u6a75ilde;\u6242librium;\u61cc\u0100ci\u0557\u055ar;\u6130m;\u6a73a;\u4397ml\u803b\xcb\u40cb\u0100ip\u056a\u056fsts;\u6203onentialE;\u6147\u0280cfios\u0585\u0588\u058d\u05b2\u05ccy;\u4424r;\uc000\ud835\udd09lled\u0253\u0597\0\0\u05a3mallSquare;\u65fcerySmallSquare;\u65aa\u0370\u05ba\0\u05bf\0\0\u05c4f;\uc000\ud835\udd3dAll;\u6200riertrf;\u6131c\xf2\u05cb\u0600JTabcdfgorst\u05e8\u05ec\u05ef\u05fa\u0600\u0612\u06
|
||
|
.split("")
|
||
|
.map((c) => c.charCodeAt(0)));
|
||
|
|
||
|
// Generated using scripts/write-decode-map.ts
|
||
|
new Uint16Array(
|
||
|
// prettier-ignore
|
||
|
"\u0200aglq\t\x15\x18\x1b\u026d\x0f\0\0\x12p;\u4026os;\u4027t;\u403et;\u403cuot;\u4022"
|
||
|
.split("")
|
||
|
.map((c) => c.charCodeAt(0)));
|
||
|
|
||
|
// Adapted from https://github.com/mathiasbynens/he/blob/36afe179392226cf1b6ccdb16ebbb7a5a844d93a/src/he.js#L106-L134
|
||
|
const decodeMap = new Map([
|
||
|
[0, 65533],
|
||
|
// C1 Unicode control character reference replacements
|
||
|
[128, 8364],
|
||
|
[130, 8218],
|
||
|
[131, 402],
|
||
|
[132, 8222],
|
||
|
[133, 8230],
|
||
|
[134, 8224],
|
||
|
[135, 8225],
|
||
|
[136, 710],
|
||
|
[137, 8240],
|
||
|
[138, 352],
|
||
|
[139, 8249],
|
||
|
[140, 338],
|
||
|
[142, 381],
|
||
|
[145, 8216],
|
||
|
[146, 8217],
|
||
|
[147, 8220],
|
||
|
[148, 8221],
|
||
|
[149, 8226],
|
||
|
[150, 8211],
|
||
|
[151, 8212],
|
||
|
[152, 732],
|
||
|
[153, 8482],
|
||
|
[154, 353],
|
||
|
[155, 8250],
|
||
|
[156, 339],
|
||
|
[158, 382],
|
||
|
[159, 376],
|
||
|
]);
|
||
|
/**
|
||
|
* Replace the given code point with a replacement character if it is a
|
||
|
* surrogate or is outside the valid range. Otherwise return the code
|
||
|
* point unchanged.
|
||
|
*/
|
||
|
function replaceCodePoint(codePoint) {
|
||
|
var _a;
|
||
|
if ((codePoint >= 0xd800 && codePoint <= 0xdfff) || codePoint > 0x10ffff) {
|
||
|
return 0xfffd;
|
||
|
}
|
||
|
return (_a = decodeMap.get(codePoint)) !== null && _a !== void 0 ? _a : codePoint;
|
||
|
}
|
||
|
|
||
|
var CharCodes;
|
||
|
(function (CharCodes) {
|
||
|
CharCodes[CharCodes["NUM"] = 35] = "NUM";
|
||
|
CharCodes[CharCodes["SEMI"] = 59] = "SEMI";
|
||
|
CharCodes[CharCodes["EQUALS"] = 61] = "EQUALS";
|
||
|
CharCodes[CharCodes["ZERO"] = 48] = "ZERO";
|
||
|
CharCodes[CharCodes["NINE"] = 57] = "NINE";
|
||
|
CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A";
|
||
|
CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F";
|
||
|
CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X";
|
||
|
CharCodes[CharCodes["LOWER_Z"] = 122] = "LOWER_Z";
|
||
|
CharCodes[CharCodes["UPPER_A"] = 65] = "UPPER_A";
|
||
|
CharCodes[CharCodes["UPPER_F"] = 70] = "UPPER_F";
|
||
|
CharCodes[CharCodes["UPPER_Z"] = 90] = "UPPER_Z";
|
||
|
})(CharCodes || (CharCodes = {}));
|
||
|
/** Bit that needs to be set to convert an upper case ASCII character to lower case */
|
||
|
const TO_LOWER_BIT = 0b100000;
|
||
|
var BinTrieFlags;
|
||
|
(function (BinTrieFlags) {
|
||
|
BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH";
|
||
|
BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH";
|
||
|
BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE";
|
||
|
})(BinTrieFlags || (BinTrieFlags = {}));
|
||
|
function isNumber(code) {
|
||
|
return code >= CharCodes.ZERO && code <= CharCodes.NINE;
|
||
|
}
|
||
|
function isHexadecimalCharacter(code) {
|
||
|
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_F) ||
|
||
|
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_F));
|
||
|
}
|
||
|
function isAsciiAlphaNumeric$1(code) {
|
||
|
return ((code >= CharCodes.UPPER_A && code <= CharCodes.UPPER_Z) ||
|
||
|
(code >= CharCodes.LOWER_A && code <= CharCodes.LOWER_Z) ||
|
||
|
isNumber(code));
|
||
|
}
|
||
|
/**
|
||
|
* Checks if the given character is a valid end character for an entity in an attribute.
|
||
|
*
|
||
|
* Attribute values that aren't terminated properly aren't parsed, and shouldn't lead to a parser error.
|
||
|
* See the example in https://html.spec.whatwg.org/multipage/parsing.html#named-character-reference-state
|
||
|
*/
|
||
|
function isEntityInAttributeInvalidEnd(code) {
|
||
|
return code === CharCodes.EQUALS || isAsciiAlphaNumeric$1(code);
|
||
|
}
|
||
|
var EntityDecoderState;
|
||
|
(function (EntityDecoderState) {
|
||
|
EntityDecoderState[EntityDecoderState["EntityStart"] = 0] = "EntityStart";
|
||
|
EntityDecoderState[EntityDecoderState["NumericStart"] = 1] = "NumericStart";
|
||
|
EntityDecoderState[EntityDecoderState["NumericDecimal"] = 2] = "NumericDecimal";
|
||
|
EntityDecoderState[EntityDecoderState["NumericHex"] = 3] = "NumericHex";
|
||
|
EntityDecoderState[EntityDecoderState["NamedEntity"] = 4] = "NamedEntity";
|
||
|
})(EntityDecoderState || (EntityDecoderState = {}));
|
||
|
var DecodingMode;
|
||
|
(function (DecodingMode) {
|
||
|
/** Entities in text nodes that can end with any character. */
|
||
|
DecodingMode[DecodingMode["Legacy"] = 0] = "Legacy";
|
||
|
/** Only allow entities terminated with a semicolon. */
|
||
|
DecodingMode[DecodingMode["Strict"] = 1] = "Strict";
|
||
|
/** Entities in attributes have limitations on ending characters. */
|
||
|
DecodingMode[DecodingMode["Attribute"] = 2] = "Attribute";
|
||
|
})(DecodingMode || (DecodingMode = {}));
|
||
|
/**
|
||
|
* Token decoder with support of writing partial entities.
|
||
|
*/
|
||
|
class EntityDecoder {
|
||
|
constructor(
|
||
|
/** The tree used to decode entities. */
|
||
|
decodeTree,
|
||
|
/**
|
||
|
* The function that is called when a codepoint is decoded.
|
||
|
*
|
||
|
* For multi-byte named entities, this will be called multiple times,
|
||
|
* with the second codepoint, and the same `consumed` value.
|
||
|
*
|
||
|
* @param codepoint The decoded codepoint.
|
||
|
* @param consumed The number of bytes consumed by the decoder.
|
||
|
*/
|
||
|
emitCodePoint,
|
||
|
/** An object that is used to produce errors. */
|
||
|
errors) {
|
||
|
this.decodeTree = decodeTree;
|
||
|
this.emitCodePoint = emitCodePoint;
|
||
|
this.errors = errors;
|
||
|
/** The current state of the decoder. */
|
||
|
this.state = EntityDecoderState.EntityStart;
|
||
|
/** Characters that were consumed while parsing an entity. */
|
||
|
this.consumed = 1;
|
||
|
/**
|
||
|
* The result of the entity.
|
||
|
*
|
||
|
* Either the result index of a numeric entity, or the codepoint of a
|
||
|
* numeric entity.
|
||
|
*/
|
||
|
this.result = 0;
|
||
|
/** The current index in the decode tree. */
|
||
|
this.treeIndex = 0;
|
||
|
/** The number of characters that were consumed in excess. */
|
||
|
this.excess = 1;
|
||
|
/** The mode in which the decoder is operating. */
|
||
|
this.decodeMode = DecodingMode.Strict;
|
||
|
}
|
||
|
/** Resets the instance to make it reusable. */
|
||
|
startEntity(decodeMode) {
|
||
|
this.decodeMode = decodeMode;
|
||
|
this.state = EntityDecoderState.EntityStart;
|
||
|
this.result = 0;
|
||
|
this.treeIndex = 0;
|
||
|
this.excess = 1;
|
||
|
this.consumed = 1;
|
||
|
}
|
||
|
/**
|
||
|
* Write an entity to the decoder. This can be called multiple times with partial entities.
|
||
|
* If the entity is incomplete, the decoder will return -1.
|
||
|
*
|
||
|
* Mirrors the implementation of `getDecoder`, but with the ability to stop decoding if the
|
||
|
* entity is incomplete, and resume when the next string is written.
|
||
|
*
|
||
|
* @param string The string containing the entity (or a continuation of the entity).
|
||
|
* @param offset The offset at which the entity begins. Should be 0 if this is not the first call.
|
||
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||
|
*/
|
||
|
write(str, offset) {
|
||
|
switch (this.state) {
|
||
|
case EntityDecoderState.EntityStart: {
|
||
|
if (str.charCodeAt(offset) === CharCodes.NUM) {
|
||
|
this.state = EntityDecoderState.NumericStart;
|
||
|
this.consumed += 1;
|
||
|
return this.stateNumericStart(str, offset + 1);
|
||
|
}
|
||
|
this.state = EntityDecoderState.NamedEntity;
|
||
|
return this.stateNamedEntity(str, offset);
|
||
|
}
|
||
|
case EntityDecoderState.NumericStart: {
|
||
|
return this.stateNumericStart(str, offset);
|
||
|
}
|
||
|
case EntityDecoderState.NumericDecimal: {
|
||
|
return this.stateNumericDecimal(str, offset);
|
||
|
}
|
||
|
case EntityDecoderState.NumericHex: {
|
||
|
return this.stateNumericHex(str, offset);
|
||
|
}
|
||
|
case EntityDecoderState.NamedEntity: {
|
||
|
return this.stateNamedEntity(str, offset);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/**
|
||
|
* Switches between the numeric decimal and hexadecimal states.
|
||
|
*
|
||
|
* Equivalent to the `Numeric character reference state` in the HTML spec.
|
||
|
*
|
||
|
* @param str The string containing the entity (or a continuation of the entity).
|
||
|
* @param offset The current offset.
|
||
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||
|
*/
|
||
|
stateNumericStart(str, offset) {
|
||
|
if (offset >= str.length) {
|
||
|
return -1;
|
||
|
}
|
||
|
if ((str.charCodeAt(offset) | TO_LOWER_BIT) === CharCodes.LOWER_X) {
|
||
|
this.state = EntityDecoderState.NumericHex;
|
||
|
this.consumed += 1;
|
||
|
return this.stateNumericHex(str, offset + 1);
|
||
|
}
|
||
|
this.state = EntityDecoderState.NumericDecimal;
|
||
|
return this.stateNumericDecimal(str, offset);
|
||
|
}
|
||
|
addToNumericResult(str, start, end, base) {
|
||
|
if (start !== end) {
|
||
|
const digitCount = end - start;
|
||
|
this.result =
|
||
|
this.result * Math.pow(base, digitCount) +
|
||
|
parseInt(str.substr(start, digitCount), base);
|
||
|
this.consumed += digitCount;
|
||
|
}
|
||
|
}
|
||
|
/**
|
||
|
* Parses a hexadecimal numeric entity.
|
||
|
*
|
||
|
* Equivalent to the `Hexademical character reference state` in the HTML spec.
|
||
|
*
|
||
|
* @param str The string containing the entity (or a continuation of the entity).
|
||
|
* @param offset The current offset.
|
||
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||
|
*/
|
||
|
stateNumericHex(str, offset) {
|
||
|
const startIdx = offset;
|
||
|
while (offset < str.length) {
|
||
|
const char = str.charCodeAt(offset);
|
||
|
if (isNumber(char) || isHexadecimalCharacter(char)) {
|
||
|
offset += 1;
|
||
|
}
|
||
|
else {
|
||
|
this.addToNumericResult(str, startIdx, offset, 16);
|
||
|
return this.emitNumericEntity(char, 3);
|
||
|
}
|
||
|
}
|
||
|
this.addToNumericResult(str, startIdx, offset, 16);
|
||
|
return -1;
|
||
|
}
|
||
|
/**
|
||
|
* Parses a decimal numeric entity.
|
||
|
*
|
||
|
* Equivalent to the `Decimal character reference state` in the HTML spec.
|
||
|
*
|
||
|
* @param str The string containing the entity (or a continuation of the entity).
|
||
|
* @param offset The current offset.
|
||
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||
|
*/
|
||
|
stateNumericDecimal(str, offset) {
|
||
|
const startIdx = offset;
|
||
|
while (offset < str.length) {
|
||
|
const char = str.charCodeAt(offset);
|
||
|
if (isNumber(char)) {
|
||
|
offset += 1;
|
||
|
}
|
||
|
else {
|
||
|
this.addToNumericResult(str, startIdx, offset, 10);
|
||
|
return this.emitNumericEntity(char, 2);
|
||
|
}
|
||
|
}
|
||
|
this.addToNumericResult(str, startIdx, offset, 10);
|
||
|
return -1;
|
||
|
}
|
||
|
/**
|
||
|
* Validate and emit a numeric entity.
|
||
|
*
|
||
|
* Implements the logic from the `Hexademical character reference start
|
||
|
* state` and `Numeric character reference end state` in the HTML spec.
|
||
|
*
|
||
|
* @param lastCp The last code point of the entity. Used to see if the
|
||
|
* entity was terminated with a semicolon.
|
||
|
* @param expectedLength The minimum number of characters that should be
|
||
|
* consumed. Used to validate that at least one digit
|
||
|
* was consumed.
|
||
|
* @returns The number of characters that were consumed.
|
||
|
*/
|
||
|
emitNumericEntity(lastCp, expectedLength) {
|
||
|
var _a;
|
||
|
// Ensure we consumed at least one digit.
|
||
|
if (this.consumed <= expectedLength) {
|
||
|
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
|
||
|
return 0;
|
||
|
}
|
||
|
// Figure out if this is a legit end of the entity
|
||
|
if (lastCp === CharCodes.SEMI) {
|
||
|
this.consumed += 1;
|
||
|
}
|
||
|
else if (this.decodeMode === DecodingMode.Strict) {
|
||
|
return 0;
|
||
|
}
|
||
|
this.emitCodePoint(replaceCodePoint(this.result), this.consumed);
|
||
|
if (this.errors) {
|
||
|
if (lastCp !== CharCodes.SEMI) {
|
||
|
this.errors.missingSemicolonAfterCharacterReference();
|
||
|
}
|
||
|
this.errors.validateNumericCharacterReference(this.result);
|
||
|
}
|
||
|
return this.consumed;
|
||
|
}
|
||
|
/**
|
||
|
* Parses a named entity.
|
||
|
*
|
||
|
* Equivalent to the `Named character reference state` in the HTML spec.
|
||
|
*
|
||
|
* @param str The string containing the entity (or a continuation of the entity).
|
||
|
* @param offset The current offset.
|
||
|
* @returns The number of characters that were consumed, or -1 if the entity is incomplete.
|
||
|
*/
|
||
|
stateNamedEntity(str, offset) {
|
||
|
const { decodeTree } = this;
|
||
|
let current = decodeTree[this.treeIndex];
|
||
|
// The mask is the number of bytes of the value, including the current byte.
|
||
|
let valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
||
|
for (; offset < str.length; offset++, this.excess++) {
|
||
|
const char = str.charCodeAt(offset);
|
||
|
this.treeIndex = determineBranch(decodeTree, current, this.treeIndex + Math.max(1, valueLength), char);
|
||
|
if (this.treeIndex < 0) {
|
||
|
return this.result === 0 ||
|
||
|
// If we are parsing an attribute
|
||
|
(this.decodeMode === DecodingMode.Attribute &&
|
||
|
// We shouldn't have consumed any characters after the entity,
|
||
|
(valueLength === 0 ||
|
||
|
// And there should be no invalid characters.
|
||
|
isEntityInAttributeInvalidEnd(char)))
|
||
|
? 0
|
||
|
: this.emitNotTerminatedNamedEntity();
|
||
|
}
|
||
|
current = decodeTree[this.treeIndex];
|
||
|
valueLength = (current & BinTrieFlags.VALUE_LENGTH) >> 14;
|
||
|
// If the branch is a value, store it and continue
|
||
|
if (valueLength !== 0) {
|
||
|
// If the entity is terminated by a semicolon, we are done.
|
||
|
if (char === CharCodes.SEMI) {
|
||
|
return this.emitNamedEntityData(this.treeIndex, valueLength, this.consumed + this.excess);
|
||
|
}
|
||
|
// If we encounter a non-terminated (legacy) entity while parsing strictly, then ignore it.
|
||
|
if (this.decodeMode !== DecodingMode.Strict) {
|
||
|
this.result = this.treeIndex;
|
||
|
this.consumed += this.excess;
|
||
|
this.excess = 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return -1;
|
||
|
}
|
||
|
/**
|
||
|
* Emit a named entity that was not terminated with a semicolon.
|
||
|
*
|
||
|
* @returns The number of characters consumed.
|
||
|
*/
|
||
|
emitNotTerminatedNamedEntity() {
|
||
|
var _a;
|
||
|
const { result, decodeTree } = this;
|
||
|
const valueLength = (decodeTree[result] & BinTrieFlags.VALUE_LENGTH) >> 14;
|
||
|
this.emitNamedEntityData(result, valueLength, this.consumed);
|
||
|
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.missingSemicolonAfterCharacterReference();
|
||
|
return this.consumed;
|
||
|
}
|
||
|
/**
|
||
|
* Emit a named entity.
|
||
|
*
|
||
|
* @param result The index of the entity in the decode tree.
|
||
|
* @param valueLength The number of bytes in the entity.
|
||
|
* @param consumed The number of characters consumed.
|
||
|
*
|
||
|
* @returns The number of characters consumed.
|
||
|
*/
|
||
|
emitNamedEntityData(result, valueLength, consumed) {
|
||
|
const { decodeTree } = this;
|
||
|
this.emitCodePoint(valueLength === 1
|
||
|
? decodeTree[result] & ~BinTrieFlags.VALUE_LENGTH
|
||
|
: decodeTree[result + 1], consumed);
|
||
|
if (valueLength === 3) {
|
||
|
// For multi-byte values, we need to emit the second byte.
|
||
|
this.emitCodePoint(decodeTree[result + 2], consumed);
|
||
|
}
|
||
|
return consumed;
|
||
|
}
|
||
|
/**
|
||
|
* Signal to the parser that the end of the input was reached.
|
||
|
*
|
||
|
* Remaining data will be emitted and relevant errors will be produced.
|
||
|
*
|
||
|
* @returns The number of characters consumed.
|
||
|
*/
|
||
|
end() {
|
||
|
var _a;
|
||
|
switch (this.state) {
|
||
|
case EntityDecoderState.NamedEntity: {
|
||
|
// Emit a named entity if we have one.
|
||
|
return this.result !== 0 &&
|
||
|
(this.decodeMode !== DecodingMode.Attribute ||
|
||
|
this.result === this.treeIndex)
|
||
|
? this.emitNotTerminatedNamedEntity()
|
||
|
: 0;
|
||
|
}
|
||
|
// Otherwise, emit a numeric entity if we have one.
|
||
|
case EntityDecoderState.NumericDecimal: {
|
||
|
return this.emitNumericEntity(0, 2);
|
||
|
}
|
||
|
case EntityDecoderState.NumericHex: {
|
||
|
return this.emitNumericEntity(0, 3);
|
||
|
}
|
||
|
case EntityDecoderState.NumericStart: {
|
||
|
(_a = this.errors) === null || _a === void 0 ? void 0 : _a.absenceOfDigitsInNumericCharacterReference(this.consumed);
|
||
|
return 0;
|
||
|
}
|
||
|
case EntityDecoderState.EntityStart: {
|
||
|
// Return 0 if we have no entity.
|
||
|
return 0;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/**
|
||
|
* Determines the branch of the current node that is taken given the current
|
||
|
* character. This function is used to traverse the trie.
|
||
|
*
|
||
|
* @param decodeTree The trie.
|
||
|
* @param current The current node.
|
||
|
* @param nodeIdx The index right after the current node and its value.
|
||
|
* @param char The current character.
|
||
|
* @returns The index of the next node, or -1 if no branch is taken.
|
||
|
*/
|
||
|
function determineBranch(decodeTree, current, nodeIdx, char) {
|
||
|
const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7;
|
||
|
const jumpOffset = current & BinTrieFlags.JUMP_TABLE;
|
||
|
// Case 1: Single branch encoded in jump offset
|
||
|
if (branchCount === 0) {
|
||
|
return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1;
|
||
|
}
|
||
|
// Case 2: Multiple branches encoded in jump table
|
||
|
if (jumpOffset) {
|
||
|
const value = char - jumpOffset;
|
||
|
return value < 0 || value >= branchCount
|
||
|
? -1
|
||
|
: decodeTree[nodeIdx + value] - 1;
|
||
|
}
|
||
|
// Case 3: Multiple branches encoded in dictionary
|
||
|
// Binary search for the character.
|
||
|
let lo = nodeIdx;
|
||
|
let hi = lo + branchCount - 1;
|
||
|
while (lo <= hi) {
|
||
|
const mid = (lo + hi) >>> 1;
|
||
|
const midVal = decodeTree[mid];
|
||
|
if (midVal < char) {
|
||
|
lo = mid + 1;
|
||
|
}
|
||
|
else if (midVal > char) {
|
||
|
hi = mid - 1;
|
||
|
}
|
||
|
else {
|
||
|
return decodeTree[mid + branchCount];
|
||
|
}
|
||
|
}
|
||
|
return -1;
|
||
|
}
|
||
|
|
||
|
/** All valid namespaces in HTML. */
|
||
|
var NS;
|
||
|
(function (NS) {
|
||
|
NS["HTML"] = "http://www.w3.org/1999/xhtml";
|
||
|
NS["MATHML"] = "http://www.w3.org/1998/Math/MathML";
|
||
|
NS["SVG"] = "http://www.w3.org/2000/svg";
|
||
|
NS["XLINK"] = "http://www.w3.org/1999/xlink";
|
||
|
NS["XML"] = "http://www.w3.org/XML/1998/namespace";
|
||
|
NS["XMLNS"] = "http://www.w3.org/2000/xmlns/";
|
||
|
})(NS || (NS = {}));
|
||
|
var ATTRS;
|
||
|
(function (ATTRS) {
|
||
|
ATTRS["TYPE"] = "type";
|
||
|
ATTRS["ACTION"] = "action";
|
||
|
ATTRS["ENCODING"] = "encoding";
|
||
|
ATTRS["PROMPT"] = "prompt";
|
||
|
ATTRS["NAME"] = "name";
|
||
|
ATTRS["COLOR"] = "color";
|
||
|
ATTRS["FACE"] = "face";
|
||
|
ATTRS["SIZE"] = "size";
|
||
|
})(ATTRS || (ATTRS = {}));
|
||
|
/**
|
||
|
* The mode of the document.
|
||
|
*
|
||
|
* @see {@link https://dom.spec.whatwg.org/#concept-document-limited-quirks}
|
||
|
*/
|
||
|
var DOCUMENT_MODE;
|
||
|
(function (DOCUMENT_MODE) {
|
||
|
DOCUMENT_MODE["NO_QUIRKS"] = "no-quirks";
|
||
|
DOCUMENT_MODE["QUIRKS"] = "quirks";
|
||
|
DOCUMENT_MODE["LIMITED_QUIRKS"] = "limited-quirks";
|
||
|
})(DOCUMENT_MODE || (DOCUMENT_MODE = {}));
|
||
|
var TAG_NAMES;
|
||
|
(function (TAG_NAMES) {
|
||
|
TAG_NAMES["A"] = "a";
|
||
|
TAG_NAMES["ADDRESS"] = "address";
|
||
|
TAG_NAMES["ANNOTATION_XML"] = "annotation-xml";
|
||
|
TAG_NAMES["APPLET"] = "applet";
|
||
|
TAG_NAMES["AREA"] = "area";
|
||
|
TAG_NAMES["ARTICLE"] = "article";
|
||
|
TAG_NAMES["ASIDE"] = "aside";
|
||
|
TAG_NAMES["B"] = "b";
|
||
|
TAG_NAMES["BASE"] = "base";
|
||
|
TAG_NAMES["BASEFONT"] = "basefont";
|
||
|
TAG_NAMES["BGSOUND"] = "bgsound";
|
||
|
TAG_NAMES["BIG"] = "big";
|
||
|
TAG_NAMES["BLOCKQUOTE"] = "blockquote";
|
||
|
TAG_NAMES["BODY"] = "body";
|
||
|
TAG_NAMES["BR"] = "br";
|
||
|
TAG_NAMES["BUTTON"] = "button";
|
||
|
TAG_NAMES["CAPTION"] = "caption";
|
||
|
TAG_NAMES["CENTER"] = "center";
|
||
|
TAG_NAMES["CODE"] = "code";
|
||
|
TAG_NAMES["COL"] = "col";
|
||
|
TAG_NAMES["COLGROUP"] = "colgroup";
|
||
|
TAG_NAMES["DD"] = "dd";
|
||
|
TAG_NAMES["DESC"] = "desc";
|
||
|
TAG_NAMES["DETAILS"] = "details";
|
||
|
TAG_NAMES["DIALOG"] = "dialog";
|
||
|
TAG_NAMES["DIR"] = "dir";
|
||
|
TAG_NAMES["DIV"] = "div";
|
||
|
TAG_NAMES["DL"] = "dl";
|
||
|
TAG_NAMES["DT"] = "dt";
|
||
|
TAG_NAMES["EM"] = "em";
|
||
|
TAG_NAMES["EMBED"] = "embed";
|
||
|
TAG_NAMES["FIELDSET"] = "fieldset";
|
||
|
TAG_NAMES["FIGCAPTION"] = "figcaption";
|
||
|
TAG_NAMES["FIGURE"] = "figure";
|
||
|
TAG_NAMES["FONT"] = "font";
|
||
|
TAG_NAMES["FOOTER"] = "footer";
|
||
|
TAG_NAMES["FOREIGN_OBJECT"] = "foreignObject";
|
||
|
TAG_NAMES["FORM"] = "form";
|
||
|
TAG_NAMES["FRAME"] = "frame";
|
||
|
TAG_NAMES["FRAMESET"] = "frameset";
|
||
|
TAG_NAMES["H1"] = "h1";
|
||
|
TAG_NAMES["H2"] = "h2";
|
||
|
TAG_NAMES["H3"] = "h3";
|
||
|
TAG_NAMES["H4"] = "h4";
|
||
|
TAG_NAMES["H5"] = "h5";
|
||
|
TAG_NAMES["H6"] = "h6";
|
||
|
TAG_NAMES["HEAD"] = "head";
|
||
|
TAG_NAMES["HEADER"] = "header";
|
||
|
TAG_NAMES["HGROUP"] = "hgroup";
|
||
|
TAG_NAMES["HR"] = "hr";
|
||
|
TAG_NAMES["HTML"] = "html";
|
||
|
TAG_NAMES["I"] = "i";
|
||
|
TAG_NAMES["IMG"] = "img";
|
||
|
TAG_NAMES["IMAGE"] = "image";
|
||
|
TAG_NAMES["INPUT"] = "input";
|
||
|
TAG_NAMES["IFRAME"] = "iframe";
|
||
|
TAG_NAMES["KEYGEN"] = "keygen";
|
||
|
TAG_NAMES["LABEL"] = "label";
|
||
|
TAG_NAMES["LI"] = "li";
|
||
|
TAG_NAMES["LINK"] = "link";
|
||
|
TAG_NAMES["LISTING"] = "listing";
|
||
|
TAG_NAMES["MAIN"] = "main";
|
||
|
TAG_NAMES["MALIGNMARK"] = "malignmark";
|
||
|
TAG_NAMES["MARQUEE"] = "marquee";
|
||
|
TAG_NAMES["MATH"] = "math";
|
||
|
TAG_NAMES["MENU"] = "menu";
|
||
|
TAG_NAMES["META"] = "meta";
|
||
|
TAG_NAMES["MGLYPH"] = "mglyph";
|
||
|
TAG_NAMES["MI"] = "mi";
|
||
|
TAG_NAMES["MO"] = "mo";
|
||
|
TAG_NAMES["MN"] = "mn";
|
||
|
TAG_NAMES["MS"] = "ms";
|
||
|
TAG_NAMES["MTEXT"] = "mtext";
|
||
|
TAG_NAMES["NAV"] = "nav";
|
||
|
TAG_NAMES["NOBR"] = "nobr";
|
||
|
TAG_NAMES["NOFRAMES"] = "noframes";
|
||
|
TAG_NAMES["NOEMBED"] = "noembed";
|
||
|
TAG_NAMES["NOSCRIPT"] = "noscript";
|
||
|
TAG_NAMES["OBJECT"] = "object";
|
||
|
TAG_NAMES["OL"] = "ol";
|
||
|
TAG_NAMES["OPTGROUP"] = "optgroup";
|
||
|
TAG_NAMES["OPTION"] = "option";
|
||
|
TAG_NAMES["P"] = "p";
|
||
|
TAG_NAMES["PARAM"] = "param";
|
||
|
TAG_NAMES["PLAINTEXT"] = "plaintext";
|
||
|
TAG_NAMES["PRE"] = "pre";
|
||
|
TAG_NAMES["RB"] = "rb";
|
||
|
TAG_NAMES["RP"] = "rp";
|
||
|
TAG_NAMES["RT"] = "rt";
|
||
|
TAG_NAMES["RTC"] = "rtc";
|
||
|
TAG_NAMES["RUBY"] = "ruby";
|
||
|
TAG_NAMES["S"] = "s";
|
||
|
TAG_NAMES["SCRIPT"] = "script";
|
||
|
TAG_NAMES["SEARCH"] = "search";
|
||
|
TAG_NAMES["SECTION"] = "section";
|
||
|
TAG_NAMES["SELECT"] = "select";
|
||
|
TAG_NAMES["SOURCE"] = "source";
|
||
|
TAG_NAMES["SMALL"] = "small";
|
||
|
TAG_NAMES["SPAN"] = "span";
|
||
|
TAG_NAMES["STRIKE"] = "strike";
|
||
|
TAG_NAMES["STRONG"] = "strong";
|
||
|
TAG_NAMES["STYLE"] = "style";
|
||
|
TAG_NAMES["SUB"] = "sub";
|
||
|
TAG_NAMES["SUMMARY"] = "summary";
|
||
|
TAG_NAMES["SUP"] = "sup";
|
||
|
TAG_NAMES["TABLE"] = "table";
|
||
|
TAG_NAMES["TBODY"] = "tbody";
|
||
|
TAG_NAMES["TEMPLATE"] = "template";
|
||
|
TAG_NAMES["TEXTAREA"] = "textarea";
|
||
|
TAG_NAMES["TFOOT"] = "tfoot";
|
||
|
TAG_NAMES["TD"] = "td";
|
||
|
TAG_NAMES["TH"] = "th";
|
||
|
TAG_NAMES["THEAD"] = "thead";
|
||
|
TAG_NAMES["TITLE"] = "title";
|
||
|
TAG_NAMES["TR"] = "tr";
|
||
|
TAG_NAMES["TRACK"] = "track";
|
||
|
TAG_NAMES["TT"] = "tt";
|
||
|
TAG_NAMES["U"] = "u";
|
||
|
TAG_NAMES["UL"] = "ul";
|
||
|
TAG_NAMES["SVG"] = "svg";
|
||
|
TAG_NAMES["VAR"] = "var";
|
||
|
TAG_NAMES["WBR"] = "wbr";
|
||
|
TAG_NAMES["XMP"] = "xmp";
|
||
|
})(TAG_NAMES || (TAG_NAMES = {}));
|
||
|
/**
|
||
|
* Tag IDs are numeric IDs for known tag names.
|
||
|
*
|
||
|
* We use tag IDs to improve the performance of tag name comparisons.
|
||
|
*/
|
||
|
var TAG_ID;
|
||
|
(function (TAG_ID) {
|
||
|
TAG_ID[TAG_ID["UNKNOWN"] = 0] = "UNKNOWN";
|
||
|
TAG_ID[TAG_ID["A"] = 1] = "A";
|
||
|
TAG_ID[TAG_ID["ADDRESS"] = 2] = "ADDRESS";
|
||
|
TAG_ID[TAG_ID["ANNOTATION_XML"] = 3] = "ANNOTATION_XML";
|
||
|
TAG_ID[TAG_ID["APPLET"] = 4] = "APPLET";
|
||
|
TAG_ID[TAG_ID["AREA"] = 5] = "AREA";
|
||
|
TAG_ID[TAG_ID["ARTICLE"] = 6] = "ARTICLE";
|
||
|
TAG_ID[TAG_ID["ASIDE"] = 7] = "ASIDE";
|
||
|
TAG_ID[TAG_ID["B"] = 8] = "B";
|
||
|
TAG_ID[TAG_ID["BASE"] = 9] = "BASE";
|
||
|
TAG_ID[TAG_ID["BASEFONT"] = 10] = "BASEFONT";
|
||
|
TAG_ID[TAG_ID["BGSOUND"] = 11] = "BGSOUND";
|
||
|
TAG_ID[TAG_ID["BIG"] = 12] = "BIG";
|
||
|
TAG_ID[TAG_ID["BLOCKQUOTE"] = 13] = "BLOCKQUOTE";
|
||
|
TAG_ID[TAG_ID["BODY"] = 14] = "BODY";
|
||
|
TAG_ID[TAG_ID["BR"] = 15] = "BR";
|
||
|
TAG_ID[TAG_ID["BUTTON"] = 16] = "BUTTON";
|
||
|
TAG_ID[TAG_ID["CAPTION"] = 17] = "CAPTION";
|
||
|
TAG_ID[TAG_ID["CENTER"] = 18] = "CENTER";
|
||
|
TAG_ID[TAG_ID["CODE"] = 19] = "CODE";
|
||
|
TAG_ID[TAG_ID["COL"] = 20] = "COL";
|
||
|
TAG_ID[TAG_ID["COLGROUP"] = 21] = "COLGROUP";
|
||
|
TAG_ID[TAG_ID["DD"] = 22] = "DD";
|
||
|
TAG_ID[TAG_ID["DESC"] = 23] = "DESC";
|
||
|
TAG_ID[TAG_ID["DETAILS"] = 24] = "DETAILS";
|
||
|
TAG_ID[TAG_ID["DIALOG"] = 25] = "DIALOG";
|
||
|
TAG_ID[TAG_ID["DIR"] = 26] = "DIR";
|
||
|
TAG_ID[TAG_ID["DIV"] = 27] = "DIV";
|
||
|
TAG_ID[TAG_ID["DL"] = 28] = "DL";
|
||
|
TAG_ID[TAG_ID["DT"] = 29] = "DT";
|
||
|
TAG_ID[TAG_ID["EM"] = 30] = "EM";
|
||
|
TAG_ID[TAG_ID["EMBED"] = 31] = "EMBED";
|
||
|
TAG_ID[TAG_ID["FIELDSET"] = 32] = "FIELDSET";
|
||
|
TAG_ID[TAG_ID["FIGCAPTION"] = 33] = "FIGCAPTION";
|
||
|
TAG_ID[TAG_ID["FIGURE"] = 34] = "FIGURE";
|
||
|
TAG_ID[TAG_ID["FONT"] = 35] = "FONT";
|
||
|
TAG_ID[TAG_ID["FOOTER"] = 36] = "FOOTER";
|
||
|
TAG_ID[TAG_ID["FOREIGN_OBJECT"] = 37] = "FOREIGN_OBJECT";
|
||
|
TAG_ID[TAG_ID["FORM"] = 38] = "FORM";
|
||
|
TAG_ID[TAG_ID["FRAME"] = 39] = "FRAME";
|
||
|
TAG_ID[TAG_ID["FRAMESET"] = 40] = "FRAMESET";
|
||
|
TAG_ID[TAG_ID["H1"] = 41] = "H1";
|
||
|
TAG_ID[TAG_ID["H2"] = 42] = "H2";
|
||
|
TAG_ID[TAG_ID["H3"] = 43] = "H3";
|
||
|
TAG_ID[TAG_ID["H4"] = 44] = "H4";
|
||
|
TAG_ID[TAG_ID["H5"] = 45] = "H5";
|
||
|
TAG_ID[TAG_ID["H6"] = 46] = "H6";
|
||
|
TAG_ID[TAG_ID["HEAD"] = 47] = "HEAD";
|
||
|
TAG_ID[TAG_ID["HEADER"] = 48] = "HEADER";
|
||
|
TAG_ID[TAG_ID["HGROUP"] = 49] = "HGROUP";
|
||
|
TAG_ID[TAG_ID["HR"] = 50] = "HR";
|
||
|
TAG_ID[TAG_ID["HTML"] = 51] = "HTML";
|
||
|
TAG_ID[TAG_ID["I"] = 52] = "I";
|
||
|
TAG_ID[TAG_ID["IMG"] = 53] = "IMG";
|
||
|
TAG_ID[TAG_ID["IMAGE"] = 54] = "IMAGE";
|
||
|
TAG_ID[TAG_ID["INPUT"] = 55] = "INPUT";
|
||
|
TAG_ID[TAG_ID["IFRAME"] = 56] = "IFRAME";
|
||
|
TAG_ID[TAG_ID["KEYGEN"] = 57] = "KEYGEN";
|
||
|
TAG_ID[TAG_ID["LABEL"] = 58] = "LABEL";
|
||
|
TAG_ID[TAG_ID["LI"] = 59] = "LI";
|
||
|
TAG_ID[TAG_ID["LINK"] = 60] = "LINK";
|
||
|
TAG_ID[TAG_ID["LISTING"] = 61] = "LISTING";
|
||
|
TAG_ID[TAG_ID["MAIN"] = 62] = "MAIN";
|
||
|
TAG_ID[TAG_ID["MALIGNMARK"] = 63] = "MALIGNMARK";
|
||
|
TAG_ID[TAG_ID["MARQUEE"] = 64] = "MARQUEE";
|
||
|
TAG_ID[TAG_ID["MATH"] = 65] = "MATH";
|
||
|
TAG_ID[TAG_ID["MENU"] = 66] = "MENU";
|
||
|
TAG_ID[TAG_ID["META"] = 67] = "META";
|
||
|
TAG_ID[TAG_ID["MGLYPH"] = 68] = "MGLYPH";
|
||
|
TAG_ID[TAG_ID["MI"] = 69] = "MI";
|
||
|
TAG_ID[TAG_ID["MO"] = 70] = "MO";
|
||
|
TAG_ID[TAG_ID["MN"] = 71] = "MN";
|
||
|
TAG_ID[TAG_ID["MS"] = 72] = "MS";
|
||
|
TAG_ID[TAG_ID["MTEXT"] = 73] = "MTEXT";
|
||
|
TAG_ID[TAG_ID["NAV"] = 74] = "NAV";
|
||
|
TAG_ID[TAG_ID["NOBR"] = 75] = "NOBR";
|
||
|
TAG_ID[TAG_ID["NOFRAMES"] = 76] = "NOFRAMES";
|
||
|
TAG_ID[TAG_ID["NOEMBED"] = 77] = "NOEMBED";
|
||
|
TAG_ID[TAG_ID["NOSCRIPT"] = 78] = "NOSCRIPT";
|
||
|
TAG_ID[TAG_ID["OBJECT"] = 79] = "OBJECT";
|
||
|
TAG_ID[TAG_ID["OL"] = 80] = "OL";
|
||
|
TAG_ID[TAG_ID["OPTGROUP"] = 81] = "OPTGROUP";
|
||
|
TAG_ID[TAG_ID["OPTION"] = 82] = "OPTION";
|
||
|
TAG_ID[TAG_ID["P"] = 83] = "P";
|
||
|
TAG_ID[TAG_ID["PARAM"] = 84] = "PARAM";
|
||
|
TAG_ID[TAG_ID["PLAINTEXT"] = 85] = "PLAINTEXT";
|
||
|
TAG_ID[TAG_ID["PRE"] = 86] = "PRE";
|
||
|
TAG_ID[TAG_ID["RB"] = 87] = "RB";
|
||
|
TAG_ID[TAG_ID["RP"] = 88] = "RP";
|
||
|
TAG_ID[TAG_ID["RT"] = 89] = "RT";
|
||
|
TAG_ID[TAG_ID["RTC"] = 90] = "RTC";
|
||
|
TAG_ID[TAG_ID["RUBY"] = 91] = "RUBY";
|
||
|
TAG_ID[TAG_ID["S"] = 92] = "S";
|
||
|
TAG_ID[TAG_ID["SCRIPT"] = 93] = "SCRIPT";
|
||
|
TAG_ID[TAG_ID["SEARCH"] = 94] = "SEARCH";
|
||
|
TAG_ID[TAG_ID["SECTION"] = 95] = "SECTION";
|
||
|
TAG_ID[TAG_ID["SELECT"] = 96] = "SELECT";
|
||
|
TAG_ID[TAG_ID["SOURCE"] = 97] = "SOURCE";
|
||
|
TAG_ID[TAG_ID["SMALL"] = 98] = "SMALL";
|
||
|
TAG_ID[TAG_ID["SPAN"] = 99] = "SPAN";
|
||
|
TAG_ID[TAG_ID["STRIKE"] = 100] = "STRIKE";
|
||
|
TAG_ID[TAG_ID["STRONG"] = 101] = "STRONG";
|
||
|
TAG_ID[TAG_ID["STYLE"] = 102] = "STYLE";
|
||
|
TAG_ID[TAG_ID["SUB"] = 103] = "SUB";
|
||
|
TAG_ID[TAG_ID["SUMMARY"] = 104] = "SUMMARY";
|
||
|
TAG_ID[TAG_ID["SUP"] = 105] = "SUP";
|
||
|
TAG_ID[TAG_ID["TABLE"] = 106] = "TABLE";
|
||
|
TAG_ID[TAG_ID["TBODY"] = 107] = "TBODY";
|
||
|
TAG_ID[TAG_ID["TEMPLATE"] = 108] = "TEMPLATE";
|
||
|
TAG_ID[TAG_ID["TEXTAREA"] = 109] = "TEXTAREA";
|
||
|
TAG_ID[TAG_ID["TFOOT"] = 110] = "TFOOT";
|
||
|
TAG_ID[TAG_ID["TD"] = 111] = "TD";
|
||
|
TAG_ID[TAG_ID["TH"] = 112] = "TH";
|
||
|
TAG_ID[TAG_ID["THEAD"] = 113] = "THEAD";
|
||
|
TAG_ID[TAG_ID["TITLE"] = 114] = "TITLE";
|
||
|
TAG_ID[TAG_ID["TR"] = 115] = "TR";
|
||
|
TAG_ID[TAG_ID["TRACK"] = 116] = "TRACK";
|
||
|
TAG_ID[TAG_ID["TT"] = 117] = "TT";
|
||
|
TAG_ID[TAG_ID["U"] = 118] = "U";
|
||
|
TAG_ID[TAG_ID["UL"] = 119] = "UL";
|
||
|
TAG_ID[TAG_ID["SVG"] = 120] = "SVG";
|
||
|
TAG_ID[TAG_ID["VAR"] = 121] = "VAR";
|
||
|
TAG_ID[TAG_ID["WBR"] = 122] = "WBR";
|
||
|
TAG_ID[TAG_ID["XMP"] = 123] = "XMP";
|
||
|
})(TAG_ID || (TAG_ID = {}));
|
||
|
const TAG_NAME_TO_ID = new Map([
|
||
|
[TAG_NAMES.A, TAG_ID.A],
|
||
|
[TAG_NAMES.ADDRESS, TAG_ID.ADDRESS],
|
||
|
[TAG_NAMES.ANNOTATION_XML, TAG_ID.ANNOTATION_XML],
|
||
|
[TAG_NAMES.APPLET, TAG_ID.APPLET],
|
||
|
[TAG_NAMES.AREA, TAG_ID.AREA],
|
||
|
[TAG_NAMES.ARTICLE, TAG_ID.ARTICLE],
|
||
|
[TAG_NAMES.ASIDE, TAG_ID.ASIDE],
|
||
|
[TAG_NAMES.B, TAG_ID.B],
|
||
|
[TAG_NAMES.BASE, TAG_ID.BASE],
|
||
|
[TAG_NAMES.BASEFONT, TAG_ID.BASEFONT],
|
||
|
[TAG_NAMES.BGSOUND, TAG_ID.BGSOUND],
|
||
|
[TAG_NAMES.BIG, TAG_ID.BIG],
|
||
|
[TAG_NAMES.BLOCKQUOTE, TAG_ID.BLOCKQUOTE],
|
||
|
[TAG_NAMES.BODY, TAG_ID.BODY],
|
||
|
[TAG_NAMES.BR, TAG_ID.BR],
|
||
|
[TAG_NAMES.BUTTON, TAG_ID.BUTTON],
|
||
|
[TAG_NAMES.CAPTION, TAG_ID.CAPTION],
|
||
|
[TAG_NAMES.CENTER, TAG_ID.CENTER],
|
||
|
[TAG_NAMES.CODE, TAG_ID.CODE],
|
||
|
[TAG_NAMES.COL, TAG_ID.COL],
|
||
|
[TAG_NAMES.COLGROUP, TAG_ID.COLGROUP],
|
||
|
[TAG_NAMES.DD, TAG_ID.DD],
|
||
|
[TAG_NAMES.DESC, TAG_ID.DESC],
|
||
|
[TAG_NAMES.DETAILS, TAG_ID.DETAILS],
|
||
|
[TAG_NAMES.DIALOG, TAG_ID.DIALOG],
|
||
|
[TAG_NAMES.DIR, TAG_ID.DIR],
|
||
|
[TAG_NAMES.DIV, TAG_ID.DIV],
|
||
|
[TAG_NAMES.DL, TAG_ID.DL],
|
||
|
[TAG_NAMES.DT, TAG_ID.DT],
|
||
|
[TAG_NAMES.EM, TAG_ID.EM],
|
||
|
[TAG_NAMES.EMBED, TAG_ID.EMBED],
|
||
|
[TAG_NAMES.FIELDSET, TAG_ID.FIELDSET],
|
||
|
[TAG_NAMES.FIGCAPTION, TAG_ID.FIGCAPTION],
|
||
|
[TAG_NAMES.FIGURE, TAG_ID.FIGURE],
|
||
|
[TAG_NAMES.FONT, TAG_ID.FONT],
|
||
|
[TAG_NAMES.FOOTER, TAG_ID.FOOTER],
|
||
|
[TAG_NAMES.FOREIGN_OBJECT, TAG_ID.FOREIGN_OBJECT],
|
||
|
[TAG_NAMES.FORM, TAG_ID.FORM],
|
||
|
[TAG_NAMES.FRAME, TAG_ID.FRAME],
|
||
|
[TAG_NAMES.FRAMESET, TAG_ID.FRAMESET],
|
||
|
[TAG_NAMES.H1, TAG_ID.H1],
|
||
|
[TAG_NAMES.H2, TAG_ID.H2],
|
||
|
[TAG_NAMES.H3, TAG_ID.H3],
|
||
|
[TAG_NAMES.H4, TAG_ID.H4],
|
||
|
[TAG_NAMES.H5, TAG_ID.H5],
|
||
|
[TAG_NAMES.H6, TAG_ID.H6],
|
||
|
[TAG_NAMES.HEAD, TAG_ID.HEAD],
|
||
|
[TAG_NAMES.HEADER, TAG_ID.HEADER],
|
||
|
[TAG_NAMES.HGROUP, TAG_ID.HGROUP],
|
||
|
[TAG_NAMES.HR, TAG_ID.HR],
|
||
|
[TAG_NAMES.HTML, TAG_ID.HTML],
|
||
|
[TAG_NAMES.I, TAG_ID.I],
|
||
|
[TAG_NAMES.IMG, TAG_ID.IMG],
|
||
|
[TAG_NAMES.IMAGE, TAG_ID.IMAGE],
|
||
|
[TAG_NAMES.INPUT, TAG_ID.INPUT],
|
||
|
[TAG_NAMES.IFRAME, TAG_ID.IFRAME],
|
||
|
[TAG_NAMES.KEYGEN, TAG_ID.KEYGEN],
|
||
|
[TAG_NAMES.LABEL, TAG_ID.LABEL],
|
||
|
[TAG_NAMES.LI, TAG_ID.LI],
|
||
|
[TAG_NAMES.LINK, TAG_ID.LINK],
|
||
|
[TAG_NAMES.LISTING, TAG_ID.LISTING],
|
||
|
[TAG_NAMES.MAIN, TAG_ID.MAIN],
|
||
|
[TAG_NAMES.MALIGNMARK, TAG_ID.MALIGNMARK],
|
||
|
[TAG_NAMES.MARQUEE, TAG_ID.MARQUEE],
|
||
|
[TAG_NAMES.MATH, TAG_ID.MATH],
|
||
|
[TAG_NAMES.MENU, TAG_ID.MENU],
|
||
|
[TAG_NAMES.META, TAG_ID.META],
|
||
|
[TAG_NAMES.MGLYPH, TAG_ID.MGLYPH],
|
||
|
[TAG_NAMES.MI, TAG_ID.MI],
|
||
|
[TAG_NAMES.MO, TAG_ID.MO],
|
||
|
[TAG_NAMES.MN, TAG_ID.MN],
|
||
|
[TAG_NAMES.MS, TAG_ID.MS],
|
||
|
[TAG_NAMES.MTEXT, TAG_ID.MTEXT],
|
||
|
[TAG_NAMES.NAV, TAG_ID.NAV],
|
||
|
[TAG_NAMES.NOBR, TAG_ID.NOBR],
|
||
|
[TAG_NAMES.NOFRAMES, TAG_ID.NOFRAMES],
|
||
|
[TAG_NAMES.NOEMBED, TAG_ID.NOEMBED],
|
||
|
[TAG_NAMES.NOSCRIPT, TAG_ID.NOSCRIPT],
|
||
|
[TAG_NAMES.OBJECT, TAG_ID.OBJECT],
|
||
|
[TAG_NAMES.OL, TAG_ID.OL],
|
||
|
[TAG_NAMES.OPTGROUP, TAG_ID.OPTGROUP],
|
||
|
[TAG_NAMES.OPTION, TAG_ID.OPTION],
|
||
|
[TAG_NAMES.P, TAG_ID.P],
|
||
|
[TAG_NAMES.PARAM, TAG_ID.PARAM],
|
||
|
[TAG_NAMES.PLAINTEXT, TAG_ID.PLAINTEXT],
|
||
|
[TAG_NAMES.PRE, TAG_ID.PRE],
|
||
|
[TAG_NAMES.RB, TAG_ID.RB],
|
||
|
[TAG_NAMES.RP, TAG_ID.RP],
|
||
|
[TAG_NAMES.RT, TAG_ID.RT],
|
||
|
[TAG_NAMES.RTC, TAG_ID.RTC],
|
||
|
[TAG_NAMES.RUBY, TAG_ID.RUBY],
|
||
|
[TAG_NAMES.S, TAG_ID.S],
|
||
|
[TAG_NAMES.SCRIPT, TAG_ID.SCRIPT],
|
||
|
[TAG_NAMES.SEARCH, TAG_ID.SEARCH],
|
||
|
[TAG_NAMES.SECTION, TAG_ID.SECTION],
|
||
|
[TAG_NAMES.SELECT, TAG_ID.SELECT],
|
||
|
[TAG_NAMES.SOURCE, TAG_ID.SOURCE],
|
||
|
[TAG_NAMES.SMALL, TAG_ID.SMALL],
|
||
|
[TAG_NAMES.SPAN, TAG_ID.SPAN],
|
||
|
[TAG_NAMES.STRIKE, TAG_ID.STRIKE],
|
||
|
[TAG_NAMES.STRONG, TAG_ID.STRONG],
|
||
|
[TAG_NAMES.STYLE, TAG_ID.STYLE],
|
||
|
[TAG_NAMES.SUB, TAG_ID.SUB],
|
||
|
[TAG_NAMES.SUMMARY, TAG_ID.SUMMARY],
|
||
|
[TAG_NAMES.SUP, TAG_ID.SUP],
|
||
|
[TAG_NAMES.TABLE, TAG_ID.TABLE],
|
||
|
[TAG_NAMES.TBODY, TAG_ID.TBODY],
|
||
|
[TAG_NAMES.TEMPLATE, TAG_ID.TEMPLATE],
|
||
|
[TAG_NAMES.TEXTAREA, TAG_ID.TEXTAREA],
|
||
|
[TAG_NAMES.TFOOT, TAG_ID.TFOOT],
|
||
|
[TAG_NAMES.TD, TAG_ID.TD],
|
||
|
[TAG_NAMES.TH, TAG_ID.TH],
|
||
|
[TAG_NAMES.THEAD, TAG_ID.THEAD],
|
||
|
[TAG_NAMES.TITLE, TAG_ID.TITLE],
|
||
|
[TAG_NAMES.TR, TAG_ID.TR],
|
||
|
[TAG_NAMES.TRACK, TAG_ID.TRACK],
|
||
|
[TAG_NAMES.TT, TAG_ID.TT],
|
||
|
[TAG_NAMES.U, TAG_ID.U],
|
||
|
[TAG_NAMES.UL, TAG_ID.UL],
|
||
|
[TAG_NAMES.SVG, TAG_ID.SVG],
|
||
|
[TAG_NAMES.VAR, TAG_ID.VAR],
|
||
|
[TAG_NAMES.WBR, TAG_ID.WBR],
|
||
|
[TAG_NAMES.XMP, TAG_ID.XMP],
|
||
|
]);
|
||
|
function getTagID(tagName) {
|
||
|
var _a;
|
||
|
return (_a = TAG_NAME_TO_ID.get(tagName)) !== null && _a !== void 0 ? _a : TAG_ID.UNKNOWN;
|
||
|
}
|
||
|
const $ = TAG_ID;
|
||
|
const SPECIAL_ELEMENTS = {
|
||
|
[NS.HTML]: new Set([
|
||
|
$.ADDRESS,
|
||
|
$.APPLET,
|
||
|
$.AREA,
|
||
|
$.ARTICLE,
|
||
|
$.ASIDE,
|
||
|
$.BASE,
|
||
|
$.BASEFONT,
|
||
|
$.BGSOUND,
|
||
|
$.BLOCKQUOTE,
|
||
|
$.BODY,
|
||
|
$.BR,
|
||
|
$.BUTTON,
|
||
|
$.CAPTION,
|
||
|
$.CENTER,
|
||
|
$.COL,
|
||
|
$.COLGROUP,
|
||
|
$.DD,
|
||
|
$.DETAILS,
|
||
|
$.DIR,
|
||
|
$.DIV,
|
||
|
$.DL,
|
||
|
$.DT,
|
||
|
$.EMBED,
|
||
|
$.FIELDSET,
|
||
|
$.FIGCAPTION,
|
||
|
$.FIGURE,
|
||
|
$.FOOTER,
|
||
|
$.FORM,
|
||
|
$.FRAME,
|
||
|
$.FRAMESET,
|
||
|
$.H1,
|
||
|
$.H2,
|
||
|
$.H3,
|
||
|
$.H4,
|
||
|
$.H5,
|
||
|
$.H6,
|
||
|
$.HEAD,
|
||
|
$.HEADER,
|
||
|
$.HGROUP,
|
||
|
$.HR,
|
||
|
$.HTML,
|
||
|
$.IFRAME,
|
||
|
$.IMG,
|
||
|
$.INPUT,
|
||
|
$.LI,
|
||
|
$.LINK,
|
||
|
$.LISTING,
|
||
|
$.MAIN,
|
||
|
$.MARQUEE,
|
||
|
$.MENU,
|
||
|
$.META,
|
||
|
$.NAV,
|
||
|
$.NOEMBED,
|
||
|
$.NOFRAMES,
|
||
|
$.NOSCRIPT,
|
||
|
$.OBJECT,
|
||
|
$.OL,
|
||
|
$.P,
|
||
|
$.PARAM,
|
||
|
$.PLAINTEXT,
|
||
|
$.PRE,
|
||
|
$.SCRIPT,
|
||
|
$.SECTION,
|
||
|
$.SELECT,
|
||
|
$.SOURCE,
|
||
|
$.STYLE,
|
||
|
$.SUMMARY,
|
||
|
$.TABLE,
|
||
|
$.TBODY,
|
||
|
$.TD,
|
||
|
$.TEMPLATE,
|
||
|
$.TEXTAREA,
|
||
|
$.TFOOT,
|
||
|
$.TH,
|
||
|
$.THEAD,
|
||
|
$.TITLE,
|
||
|
$.TR,
|
||
|
$.TRACK,
|
||
|
$.UL,
|
||
|
$.WBR,
|
||
|
$.XMP,
|
||
|
]),
|
||
|
[NS.MATHML]: new Set([$.MI, $.MO, $.MN, $.MS, $.MTEXT, $.ANNOTATION_XML]),
|
||
|
[NS.SVG]: new Set([$.TITLE, $.FOREIGN_OBJECT, $.DESC]),
|
||
|
[NS.XLINK]: new Set(),
|
||
|
[NS.XML]: new Set(),
|
||
|
[NS.XMLNS]: new Set(),
|
||
|
};
|
||
|
const NUMBERED_HEADERS = new Set([$.H1, $.H2, $.H3, $.H4, $.H5, $.H6]);
|
||
|
|
||
|
//States
|
||
|
var State;
|
||
|
(function (State) {
|
||
|
State[State["DATA"] = 0] = "DATA";
|
||
|
State[State["RCDATA"] = 1] = "RCDATA";
|
||
|
State[State["RAWTEXT"] = 2] = "RAWTEXT";
|
||
|
State[State["SCRIPT_DATA"] = 3] = "SCRIPT_DATA";
|
||
|
State[State["PLAINTEXT"] = 4] = "PLAINTEXT";
|
||
|
State[State["TAG_OPEN"] = 5] = "TAG_OPEN";
|
||
|
State[State["END_TAG_OPEN"] = 6] = "END_TAG_OPEN";
|
||
|
State[State["TAG_NAME"] = 7] = "TAG_NAME";
|
||
|
State[State["RCDATA_LESS_THAN_SIGN"] = 8] = "RCDATA_LESS_THAN_SIGN";
|
||
|
State[State["RCDATA_END_TAG_OPEN"] = 9] = "RCDATA_END_TAG_OPEN";
|
||
|
State[State["RCDATA_END_TAG_NAME"] = 10] = "RCDATA_END_TAG_NAME";
|
||
|
State[State["RAWTEXT_LESS_THAN_SIGN"] = 11] = "RAWTEXT_LESS_THAN_SIGN";
|
||
|
State[State["RAWTEXT_END_TAG_OPEN"] = 12] = "RAWTEXT_END_TAG_OPEN";
|
||
|
State[State["RAWTEXT_END_TAG_NAME"] = 13] = "RAWTEXT_END_TAG_NAME";
|
||
|
State[State["SCRIPT_DATA_LESS_THAN_SIGN"] = 14] = "SCRIPT_DATA_LESS_THAN_SIGN";
|
||
|
State[State["SCRIPT_DATA_END_TAG_OPEN"] = 15] = "SCRIPT_DATA_END_TAG_OPEN";
|
||
|
State[State["SCRIPT_DATA_END_TAG_NAME"] = 16] = "SCRIPT_DATA_END_TAG_NAME";
|
||
|
State[State["SCRIPT_DATA_ESCAPE_START"] = 17] = "SCRIPT_DATA_ESCAPE_START";
|
||
|
State[State["SCRIPT_DATA_ESCAPE_START_DASH"] = 18] = "SCRIPT_DATA_ESCAPE_START_DASH";
|
||
|
State[State["SCRIPT_DATA_ESCAPED"] = 19] = "SCRIPT_DATA_ESCAPED";
|
||
|
State[State["SCRIPT_DATA_ESCAPED_DASH"] = 20] = "SCRIPT_DATA_ESCAPED_DASH";
|
||
|
State[State["SCRIPT_DATA_ESCAPED_DASH_DASH"] = 21] = "SCRIPT_DATA_ESCAPED_DASH_DASH";
|
||
|
State[State["SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN"] = 22] = "SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN";
|
||
|
State[State["SCRIPT_DATA_ESCAPED_END_TAG_OPEN"] = 23] = "SCRIPT_DATA_ESCAPED_END_TAG_OPEN";
|
||
|
State[State["SCRIPT_DATA_ESCAPED_END_TAG_NAME"] = 24] = "SCRIPT_DATA_ESCAPED_END_TAG_NAME";
|
||
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_START"] = 25] = "SCRIPT_DATA_DOUBLE_ESCAPE_START";
|
||
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED"] = 26] = "SCRIPT_DATA_DOUBLE_ESCAPED";
|
||
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH"] = 27] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH";
|
||
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH"] = 28] = "SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH";
|
||
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN"] = 29] = "SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN";
|
||
|
State[State["SCRIPT_DATA_DOUBLE_ESCAPE_END"] = 30] = "SCRIPT_DATA_DOUBLE_ESCAPE_END";
|
||
|
State[State["BEFORE_ATTRIBUTE_NAME"] = 31] = "BEFORE_ATTRIBUTE_NAME";
|
||
|
State[State["ATTRIBUTE_NAME"] = 32] = "ATTRIBUTE_NAME";
|
||
|
State[State["AFTER_ATTRIBUTE_NAME"] = 33] = "AFTER_ATTRIBUTE_NAME";
|
||
|
State[State["BEFORE_ATTRIBUTE_VALUE"] = 34] = "BEFORE_ATTRIBUTE_VALUE";
|
||
|
State[State["ATTRIBUTE_VALUE_DOUBLE_QUOTED"] = 35] = "ATTRIBUTE_VALUE_DOUBLE_QUOTED";
|
||
|
State[State["ATTRIBUTE_VALUE_SINGLE_QUOTED"] = 36] = "ATTRIBUTE_VALUE_SINGLE_QUOTED";
|
||
|
State[State["ATTRIBUTE_VALUE_UNQUOTED"] = 37] = "ATTRIBUTE_VALUE_UNQUOTED";
|
||
|
State[State["AFTER_ATTRIBUTE_VALUE_QUOTED"] = 38] = "AFTER_ATTRIBUTE_VALUE_QUOTED";
|
||
|
State[State["SELF_CLOSING_START_TAG"] = 39] = "SELF_CLOSING_START_TAG";
|
||
|
State[State["BOGUS_COMMENT"] = 40] = "BOGUS_COMMENT";
|
||
|
State[State["MARKUP_DECLARATION_OPEN"] = 41] = "MARKUP_DECLARATION_OPEN";
|
||
|
State[State["COMMENT_START"] = 42] = "COMMENT_START";
|
||
|
State[State["COMMENT_START_DASH"] = 43] = "COMMENT_START_DASH";
|
||
|
State[State["COMMENT"] = 44] = "COMMENT";
|
||
|
State[State["COMMENT_LESS_THAN_SIGN"] = 45] = "COMMENT_LESS_THAN_SIGN";
|
||
|
State[State["COMMENT_LESS_THAN_SIGN_BANG"] = 46] = "COMMENT_LESS_THAN_SIGN_BANG";
|
||
|
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH"] = 47] = "COMMENT_LESS_THAN_SIGN_BANG_DASH";
|
||
|
State[State["COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH"] = 48] = "COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH";
|
||
|
State[State["COMMENT_END_DASH"] = 49] = "COMMENT_END_DASH";
|
||
|
State[State["COMMENT_END"] = 50] = "COMMENT_END";
|
||
|
State[State["COMMENT_END_BANG"] = 51] = "COMMENT_END_BANG";
|
||
|
State[State["DOCTYPE"] = 52] = "DOCTYPE";
|
||
|
State[State["BEFORE_DOCTYPE_NAME"] = 53] = "BEFORE_DOCTYPE_NAME";
|
||
|
State[State["DOCTYPE_NAME"] = 54] = "DOCTYPE_NAME";
|
||
|
State[State["AFTER_DOCTYPE_NAME"] = 55] = "AFTER_DOCTYPE_NAME";
|
||
|
State[State["AFTER_DOCTYPE_PUBLIC_KEYWORD"] = 56] = "AFTER_DOCTYPE_PUBLIC_KEYWORD";
|
||
|
State[State["BEFORE_DOCTYPE_PUBLIC_IDENTIFIER"] = 57] = "BEFORE_DOCTYPE_PUBLIC_IDENTIFIER";
|
||
|
State[State["DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED"] = 58] = "DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED";
|
||
|
State[State["DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED"] = 59] = "DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED";
|
||
|
State[State["AFTER_DOCTYPE_PUBLIC_IDENTIFIER"] = 60] = "AFTER_DOCTYPE_PUBLIC_IDENTIFIER";
|
||
|
State[State["BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS"] = 61] = "BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS";
|
||
|
State[State["AFTER_DOCTYPE_SYSTEM_KEYWORD"] = 62] = "AFTER_DOCTYPE_SYSTEM_KEYWORD";
|
||
|
State[State["BEFORE_DOCTYPE_SYSTEM_IDENTIFIER"] = 63] = "BEFORE_DOCTYPE_SYSTEM_IDENTIFIER";
|
||
|
State[State["DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED"] = 64] = "DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED";
|
||
|
State[State["DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED"] = 65] = "DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED";
|
||
|
State[State["AFTER_DOCTYPE_SYSTEM_IDENTIFIER"] = 66] = "AFTER_DOCTYPE_SYSTEM_IDENTIFIER";
|
||
|
State[State["BOGUS_DOCTYPE"] = 67] = "BOGUS_DOCTYPE";
|
||
|
State[State["CDATA_SECTION"] = 68] = "CDATA_SECTION";
|
||
|
State[State["CDATA_SECTION_BRACKET"] = 69] = "CDATA_SECTION_BRACKET";
|
||
|
State[State["CDATA_SECTION_END"] = 70] = "CDATA_SECTION_END";
|
||
|
State[State["CHARACTER_REFERENCE"] = 71] = "CHARACTER_REFERENCE";
|
||
|
State[State["AMBIGUOUS_AMPERSAND"] = 72] = "AMBIGUOUS_AMPERSAND";
|
||
|
})(State || (State = {}));
|
||
|
//Tokenizer initial states for different modes
|
||
|
const TokenizerMode = {
|
||
|
DATA: State.DATA,
|
||
|
RCDATA: State.RCDATA,
|
||
|
RAWTEXT: State.RAWTEXT,
|
||
|
SCRIPT_DATA: State.SCRIPT_DATA,
|
||
|
PLAINTEXT: State.PLAINTEXT,
|
||
|
CDATA_SECTION: State.CDATA_SECTION,
|
||
|
};
|
||
|
//Utils
|
||
|
//OPTIMIZATION: these utility functions should not be moved out of this module. V8 Crankshaft will not inline
|
||
|
//this functions if they will be situated in another module due to context switch.
|
||
|
//Always perform inlining check before modifying this functions ('node --trace-inlining').
|
||
|
function isAsciiDigit(cp) {
|
||
|
return cp >= CODE_POINTS.DIGIT_0 && cp <= CODE_POINTS.DIGIT_9;
|
||
|
}
|
||
|
function isAsciiUpper(cp) {
|
||
|
return cp >= CODE_POINTS.LATIN_CAPITAL_A && cp <= CODE_POINTS.LATIN_CAPITAL_Z;
|
||
|
}
|
||
|
function isAsciiLower(cp) {
|
||
|
return cp >= CODE_POINTS.LATIN_SMALL_A && cp <= CODE_POINTS.LATIN_SMALL_Z;
|
||
|
}
|
||
|
function isAsciiLetter(cp) {
|
||
|
return isAsciiLower(cp) || isAsciiUpper(cp);
|
||
|
}
|
||
|
function isAsciiAlphaNumeric(cp) {
|
||
|
return isAsciiLetter(cp) || isAsciiDigit(cp);
|
||
|
}
|
||
|
function toAsciiLower(cp) {
|
||
|
return cp + 32;
|
||
|
}
|
||
|
function isWhitespace(cp) {
|
||
|
return cp === CODE_POINTS.SPACE || cp === CODE_POINTS.LINE_FEED || cp === CODE_POINTS.TABULATION || cp === CODE_POINTS.FORM_FEED;
|
||
|
}
|
||
|
function isScriptDataDoubleEscapeSequenceEnd(cp) {
|
||
|
return isWhitespace(cp) || cp === CODE_POINTS.SOLIDUS || cp === CODE_POINTS.GREATER_THAN_SIGN;
|
||
|
}
|
||
|
function getErrorForNumericCharacterReference(code) {
|
||
|
if (code === CODE_POINTS.NULL) {
|
||
|
return ERR.nullCharacterReference;
|
||
|
}
|
||
|
else if (code > 1114111) {
|
||
|
return ERR.characterReferenceOutsideUnicodeRange;
|
||
|
}
|
||
|
else if (isSurrogate(code)) {
|
||
|
return ERR.surrogateCharacterReference;
|
||
|
}
|
||
|
else if (isUndefinedCodePoint(code)) {
|
||
|
return ERR.noncharacterCharacterReference;
|
||
|
}
|
||
|
else if (isControlCodePoint(code) || code === CODE_POINTS.CARRIAGE_RETURN) {
|
||
|
return ERR.controlCharacterReference;
|
||
|
}
|
||
|
return null;
|
||
|
}
|
||
|
//Tokenizer
|
||
|
class Tokenizer {
|
||
|
constructor(options, handler) {
|
||
|
this.options = options;
|
||
|
this.handler = handler;
|
||
|
this.paused = false;
|
||
|
/** Ensures that the parsing loop isn't run multiple times at once. */
|
||
|
this.inLoop = false;
|
||
|
/**
|
||
|
* Indicates that the current adjusted node exists, is not an element in the HTML namespace,
|
||
|
* and that it is not an integration point for either MathML or HTML.
|
||
|
*
|
||
|
* @see {@link https://html.spec.whatwg.org/multipage/parsing.html#tree-construction}
|
||
|
*/
|
||
|
this.inForeignNode = false;
|
||
|
this.lastStartTagName = '';
|
||
|
this.active = false;
|
||
|
this.state = State.DATA;
|
||
|
this.returnState = State.DATA;
|
||
|
this.entityStartPos = 0;
|
||
|
this.consumedAfterSnapshot = -1;
|
||
|
this.currentCharacterToken = null;
|
||
|
this.currentToken = null;
|
||
|
this.currentAttr = { name: '', value: '' };
|
||
|
this.preprocessor = new Preprocessor(handler);
|
||
|
this.currentLocation = this.getCurrentLocation(-1);
|
||
|
this.entityDecoder = new EntityDecoder(htmlDecodeTree, (cp, consumed) => {
|
||
|
// Note: Set `pos` _before_ flushing, as flushing might drop
|
||
|
// the current chunk and invalidate `entityStartPos`.
|
||
|
this.preprocessor.pos = this.entityStartPos + consumed - 1;
|
||
|
this._flushCodePointConsumedAsCharacterReference(cp);
|
||
|
}, handler.onParseError
|
||
|
? {
|
||
|
missingSemicolonAfterCharacterReference: () => {
|
||
|
this._err(ERR.missingSemicolonAfterCharacterReference, 1);
|
||
|
},
|
||
|
absenceOfDigitsInNumericCharacterReference: (consumed) => {
|
||
|
this._err(ERR.absenceOfDigitsInNumericCharacterReference, this.entityStartPos - this.preprocessor.pos + consumed);
|
||
|
},
|
||
|
validateNumericCharacterReference: (code) => {
|
||
|
const error = getErrorForNumericCharacterReference(code);
|
||
|
if (error)
|
||
|
this._err(error, 1);
|
||
|
},
|
||
|
}
|
||
|
: undefined);
|
||
|
}
|
||
|
//Errors
|
||
|
_err(code, cpOffset = 0) {
|
||
|
var _a, _b;
|
||
|
(_b = (_a = this.handler).onParseError) === null || _b === void 0 ? void 0 : _b.call(_a, this.preprocessor.getError(code, cpOffset));
|
||
|
}
|
||
|
// NOTE: `offset` may never run across line boundaries.
|
||
|
getCurrentLocation(offset) {
|
||
|
if (!this.options.sourceCodeLocationInfo) {
|
||
|
return null;
|
||
|
}
|
||
|
return {
|
||
|
startLine: this.preprocessor.line,
|
||
|
startCol: this.preprocessor.col - offset,
|
||
|
startOffset: this.preprocessor.offset - offset,
|
||
|
endLine: -1,
|
||
|
endCol: -1,
|
||
|
endOffset: -1,
|
||
|
};
|
||
|
}
|
||
|
_runParsingLoop() {
|
||
|
if (this.inLoop)
|
||
|
return;
|
||
|
this.inLoop = true;
|
||
|
while (this.active && !this.paused) {
|
||
|
this.consumedAfterSnapshot = 0;
|
||
|
const cp = this._consume();
|
||
|
if (!this._ensureHibernation()) {
|
||
|
this._callState(cp);
|
||
|
}
|
||
|
}
|
||
|
this.inLoop = false;
|
||
|
}
|
||
|
//API
|
||
|
pause() {
|
||
|
this.paused = true;
|
||
|
}
|
||
|
resume(writeCallback) {
|
||
|
if (!this.paused) {
|
||
|
throw new Error('Parser was already resumed');
|
||
|
}
|
||
|
this.paused = false;
|
||
|
// Necessary for synchronous resume.
|
||
|
if (this.inLoop)
|
||
|
return;
|
||
|
this._runParsingLoop();
|
||
|
if (!this.paused) {
|
||
|
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
|
||
|
}
|
||
|
}
|
||
|
write(chunk, isLastChunk, writeCallback) {
|
||
|
this.active = true;
|
||
|
this.preprocessor.write(chunk, isLastChunk);
|
||
|
this._runParsingLoop();
|
||
|
if (!this.paused) {
|
||
|
writeCallback === null || writeCallback === void 0 ? void 0 : writeCallback();
|
||
|
}
|
||
|
}
|
||
|
insertHtmlAtCurrentPos(chunk) {
|
||
|
this.active = true;
|
||
|
this.preprocessor.insertHtmlAtCurrentPos(chunk);
|
||
|
this._runParsingLoop();
|
||
|
}
|
||
|
//Hibernation
|
||
|
_ensureHibernation() {
|
||
|
if (this.preprocessor.endOfChunkHit) {
|
||
|
this.preprocessor.retreat(this.consumedAfterSnapshot);
|
||
|
this.consumedAfterSnapshot = 0;
|
||
|
this.active = false;
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
//Consumption
|
||
|
_consume() {
|
||
|
this.consumedAfterSnapshot++;
|
||
|
return this.preprocessor.advance();
|
||
|
}
|
||
|
_advanceBy(count) {
|
||
|
this.consumedAfterSnapshot += count;
|
||
|
for (let i = 0; i < count; i++) {
|
||
|
this.preprocessor.advance();
|
||
|
}
|
||
|
}
|
||
|
_consumeSequenceIfMatch(pattern, caseSensitive) {
|
||
|
if (this.preprocessor.startsWith(pattern, caseSensitive)) {
|
||
|
// We will already have consumed one character before calling this method.
|
||
|
this._advanceBy(pattern.length - 1);
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
//Token creation
|
||
|
_createStartTagToken() {
|
||
|
this.currentToken = {
|
||
|
type: TokenType.START_TAG,
|
||
|
tagName: '',
|
||
|
tagID: TAG_ID.UNKNOWN,
|
||
|
selfClosing: false,
|
||
|
ackSelfClosing: false,
|
||
|
attrs: [],
|
||
|
location: this.getCurrentLocation(1),
|
||
|
};
|
||
|
}
|
||
|
_createEndTagToken() {
|
||
|
this.currentToken = {
|
||
|
type: TokenType.END_TAG,
|
||
|
tagName: '',
|
||
|
tagID: TAG_ID.UNKNOWN,
|
||
|
selfClosing: false,
|
||
|
ackSelfClosing: false,
|
||
|
attrs: [],
|
||
|
location: this.getCurrentLocation(2),
|
||
|
};
|
||
|
}
|
||
|
_createCommentToken(offset) {
|
||
|
this.currentToken = {
|
||
|
type: TokenType.COMMENT,
|
||
|
data: '',
|
||
|
location: this.getCurrentLocation(offset),
|
||
|
};
|
||
|
}
|
||
|
_createDoctypeToken(initialName) {
|
||
|
this.currentToken = {
|
||
|
type: TokenType.DOCTYPE,
|
||
|
name: initialName,
|
||
|
forceQuirks: false,
|
||
|
publicId: null,
|
||
|
systemId: null,
|
||
|
location: this.currentLocation,
|
||
|
};
|
||
|
}
|
||
|
_createCharacterToken(type, chars) {
|
||
|
this.currentCharacterToken = {
|
||
|
type,
|
||
|
chars,
|
||
|
location: this.currentLocation,
|
||
|
};
|
||
|
}
|
||
|
//Tag attributes
|
||
|
_createAttr(attrNameFirstCh) {
|
||
|
this.currentAttr = {
|
||
|
name: attrNameFirstCh,
|
||
|
value: '',
|
||
|
};
|
||
|
this.currentLocation = this.getCurrentLocation(0);
|
||
|
}
|
||
|
_leaveAttrName() {
|
||
|
var _a;
|
||
|
var _b;
|
||
|
const token = this.currentToken;
|
||
|
if (getTokenAttr(token, this.currentAttr.name) === null) {
|
||
|
token.attrs.push(this.currentAttr);
|
||
|
if (token.location && this.currentLocation) {
|
||
|
const attrLocations = ((_a = (_b = token.location).attrs) !== null && _a !== void 0 ? _a : (_b.attrs = Object.create(null)));
|
||
|
attrLocations[this.currentAttr.name] = this.currentLocation;
|
||
|
// Set end location
|
||
|
this._leaveAttrValue();
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
this._err(ERR.duplicateAttribute);
|
||
|
}
|
||
|
}
|
||
|
_leaveAttrValue() {
|
||
|
if (this.currentLocation) {
|
||
|
this.currentLocation.endLine = this.preprocessor.line;
|
||
|
this.currentLocation.endCol = this.preprocessor.col;
|
||
|
this.currentLocation.endOffset = this.preprocessor.offset;
|
||
|
}
|
||
|
}
|
||
|
//Token emission
|
||
|
prepareToken(ct) {
|
||
|
this._emitCurrentCharacterToken(ct.location);
|
||
|
this.currentToken = null;
|
||
|
if (ct.location) {
|
||
|
ct.location.endLine = this.preprocessor.line;
|
||
|
ct.location.endCol = this.preprocessor.col + 1;
|
||
|
ct.location.endOffset = this.preprocessor.offset + 1;
|
||
|
}
|
||
|
this.currentLocation = this.getCurrentLocation(-1);
|
||
|
}
|
||
|
emitCurrentTagToken() {
|
||
|
const ct = this.currentToken;
|
||
|
this.prepareToken(ct);
|
||
|
ct.tagID = getTagID(ct.tagName);
|
||
|
if (ct.type === TokenType.START_TAG) {
|
||
|
this.lastStartTagName = ct.tagName;
|
||
|
this.handler.onStartTag(ct);
|
||
|
}
|
||
|
else {
|
||
|
if (ct.attrs.length > 0) {
|
||
|
this._err(ERR.endTagWithAttributes);
|
||
|
}
|
||
|
if (ct.selfClosing) {
|
||
|
this._err(ERR.endTagWithTrailingSolidus);
|
||
|
}
|
||
|
this.handler.onEndTag(ct);
|
||
|
}
|
||
|
this.preprocessor.dropParsedChunk();
|
||
|
}
|
||
|
emitCurrentComment(ct) {
|
||
|
this.prepareToken(ct);
|
||
|
this.handler.onComment(ct);
|
||
|
this.preprocessor.dropParsedChunk();
|
||
|
}
|
||
|
emitCurrentDoctype(ct) {
|
||
|
this.prepareToken(ct);
|
||
|
this.handler.onDoctype(ct);
|
||
|
this.preprocessor.dropParsedChunk();
|
||
|
}
|
||
|
_emitCurrentCharacterToken(nextLocation) {
|
||
|
if (this.currentCharacterToken) {
|
||
|
//NOTE: if we have a pending character token, make it's end location equal to the
|
||
|
//current token's start location.
|
||
|
if (nextLocation && this.currentCharacterToken.location) {
|
||
|
this.currentCharacterToken.location.endLine = nextLocation.startLine;
|
||
|
this.currentCharacterToken.location.endCol = nextLocation.startCol;
|
||
|
this.currentCharacterToken.location.endOffset = nextLocation.startOffset;
|
||
|
}
|
||
|
switch (this.currentCharacterToken.type) {
|
||
|
case TokenType.CHARACTER: {
|
||
|
this.handler.onCharacter(this.currentCharacterToken);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.NULL_CHARACTER: {
|
||
|
this.handler.onNullCharacter(this.currentCharacterToken);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.WHITESPACE_CHARACTER: {
|
||
|
this.handler.onWhitespaceCharacter(this.currentCharacterToken);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
this.currentCharacterToken = null;
|
||
|
}
|
||
|
}
|
||
|
_emitEOFToken() {
|
||
|
const location = this.getCurrentLocation(0);
|
||
|
if (location) {
|
||
|
location.endLine = location.startLine;
|
||
|
location.endCol = location.startCol;
|
||
|
location.endOffset = location.startOffset;
|
||
|
}
|
||
|
this._emitCurrentCharacterToken(location);
|
||
|
this.handler.onEof({ type: TokenType.EOF, location });
|
||
|
this.active = false;
|
||
|
}
|
||
|
//Characters emission
|
||
|
//OPTIMIZATION: The specification uses only one type of character token (one token per character).
|
||
|
//This causes a huge memory overhead and a lot of unnecessary parser loops. parse5 uses 3 groups of characters.
|
||
|
//If we have a sequence of characters that belong to the same group, the parser can process it
|
||
|
//as a single solid character token.
|
||
|
//So, there are 3 types of character tokens in parse5:
|
||
|
//1)TokenType.NULL_CHARACTER - \u0000-character sequences (e.g. '\u0000\u0000\u0000')
|
||
|
//2)TokenType.WHITESPACE_CHARACTER - any whitespace/new-line character sequences (e.g. '\n \r\t \f')
|
||
|
//3)TokenType.CHARACTER - any character sequence which don't belong to groups 1 and 2 (e.g. 'abcdef1234@@#$%^')
|
||
|
_appendCharToCurrentCharacterToken(type, ch) {
|
||
|
if (this.currentCharacterToken) {
|
||
|
if (this.currentCharacterToken.type === type) {
|
||
|
this.currentCharacterToken.chars += ch;
|
||
|
return;
|
||
|
}
|
||
|
else {
|
||
|
this.currentLocation = this.getCurrentLocation(0);
|
||
|
this._emitCurrentCharacterToken(this.currentLocation);
|
||
|
this.preprocessor.dropParsedChunk();
|
||
|
}
|
||
|
}
|
||
|
this._createCharacterToken(type, ch);
|
||
|
}
|
||
|
_emitCodePoint(cp) {
|
||
|
const type = isWhitespace(cp)
|
||
|
? TokenType.WHITESPACE_CHARACTER
|
||
|
: cp === CODE_POINTS.NULL
|
||
|
? TokenType.NULL_CHARACTER
|
||
|
: TokenType.CHARACTER;
|
||
|
this._appendCharToCurrentCharacterToken(type, String.fromCodePoint(cp));
|
||
|
}
|
||
|
//NOTE: used when we emit characters explicitly.
|
||
|
//This is always for non-whitespace and non-null characters, which allows us to avoid additional checks.
|
||
|
_emitChars(ch) {
|
||
|
this._appendCharToCurrentCharacterToken(TokenType.CHARACTER, ch);
|
||
|
}
|
||
|
// Character reference helpers
|
||
|
_startCharacterReference() {
|
||
|
this.returnState = this.state;
|
||
|
this.state = State.CHARACTER_REFERENCE;
|
||
|
this.entityStartPos = this.preprocessor.pos;
|
||
|
this.entityDecoder.startEntity(this._isCharacterReferenceInAttribute() ? DecodingMode.Attribute : DecodingMode.Legacy);
|
||
|
}
|
||
|
_isCharacterReferenceInAttribute() {
|
||
|
return (this.returnState === State.ATTRIBUTE_VALUE_DOUBLE_QUOTED ||
|
||
|
this.returnState === State.ATTRIBUTE_VALUE_SINGLE_QUOTED ||
|
||
|
this.returnState === State.ATTRIBUTE_VALUE_UNQUOTED);
|
||
|
}
|
||
|
_flushCodePointConsumedAsCharacterReference(cp) {
|
||
|
if (this._isCharacterReferenceInAttribute()) {
|
||
|
this.currentAttr.value += String.fromCodePoint(cp);
|
||
|
}
|
||
|
else {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
// Calling states this way turns out to be much faster than any other approach.
|
||
|
_callState(cp) {
|
||
|
switch (this.state) {
|
||
|
case State.DATA: {
|
||
|
this._stateData(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RCDATA: {
|
||
|
this._stateRcdata(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RAWTEXT: {
|
||
|
this._stateRawtext(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA: {
|
||
|
this._stateScriptData(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.PLAINTEXT: {
|
||
|
this._statePlaintext(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.TAG_OPEN: {
|
||
|
this._stateTagOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.END_TAG_OPEN: {
|
||
|
this._stateEndTagOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.TAG_NAME: {
|
||
|
this._stateTagName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RCDATA_LESS_THAN_SIGN: {
|
||
|
this._stateRcdataLessThanSign(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RCDATA_END_TAG_OPEN: {
|
||
|
this._stateRcdataEndTagOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RCDATA_END_TAG_NAME: {
|
||
|
this._stateRcdataEndTagName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RAWTEXT_LESS_THAN_SIGN: {
|
||
|
this._stateRawtextLessThanSign(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RAWTEXT_END_TAG_OPEN: {
|
||
|
this._stateRawtextEndTagOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.RAWTEXT_END_TAG_NAME: {
|
||
|
this._stateRawtextEndTagName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_LESS_THAN_SIGN: {
|
||
|
this._stateScriptDataLessThanSign(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_END_TAG_OPEN: {
|
||
|
this._stateScriptDataEndTagOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_END_TAG_NAME: {
|
||
|
this._stateScriptDataEndTagName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPE_START: {
|
||
|
this._stateScriptDataEscapeStart(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPE_START_DASH: {
|
||
|
this._stateScriptDataEscapeStartDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPED: {
|
||
|
this._stateScriptDataEscaped(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPED_DASH: {
|
||
|
this._stateScriptDataEscapedDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPED_DASH_DASH: {
|
||
|
this._stateScriptDataEscapedDashDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN: {
|
||
|
this._stateScriptDataEscapedLessThanSign(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN: {
|
||
|
this._stateScriptDataEscapedEndTagOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_ESCAPED_END_TAG_NAME: {
|
||
|
this._stateScriptDataEscapedEndTagName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_DOUBLE_ESCAPE_START: {
|
||
|
this._stateScriptDataDoubleEscapeStart(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED: {
|
||
|
this._stateScriptDataDoubleEscaped(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH: {
|
||
|
this._stateScriptDataDoubleEscapedDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH: {
|
||
|
this._stateScriptDataDoubleEscapedDashDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN: {
|
||
|
this._stateScriptDataDoubleEscapedLessThanSign(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SCRIPT_DATA_DOUBLE_ESCAPE_END: {
|
||
|
this._stateScriptDataDoubleEscapeEnd(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BEFORE_ATTRIBUTE_NAME: {
|
||
|
this._stateBeforeAttributeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.ATTRIBUTE_NAME: {
|
||
|
this._stateAttributeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_ATTRIBUTE_NAME: {
|
||
|
this._stateAfterAttributeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BEFORE_ATTRIBUTE_VALUE: {
|
||
|
this._stateBeforeAttributeValue(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.ATTRIBUTE_VALUE_DOUBLE_QUOTED: {
|
||
|
this._stateAttributeValueDoubleQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.ATTRIBUTE_VALUE_SINGLE_QUOTED: {
|
||
|
this._stateAttributeValueSingleQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.ATTRIBUTE_VALUE_UNQUOTED: {
|
||
|
this._stateAttributeValueUnquoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_ATTRIBUTE_VALUE_QUOTED: {
|
||
|
this._stateAfterAttributeValueQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.SELF_CLOSING_START_TAG: {
|
||
|
this._stateSelfClosingStartTag(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BOGUS_COMMENT: {
|
||
|
this._stateBogusComment(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.MARKUP_DECLARATION_OPEN: {
|
||
|
this._stateMarkupDeclarationOpen(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_START: {
|
||
|
this._stateCommentStart(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_START_DASH: {
|
||
|
this._stateCommentStartDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT: {
|
||
|
this._stateComment(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_LESS_THAN_SIGN: {
|
||
|
this._stateCommentLessThanSign(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_LESS_THAN_SIGN_BANG: {
|
||
|
this._stateCommentLessThanSignBang(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH: {
|
||
|
this._stateCommentLessThanSignBangDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH: {
|
||
|
this._stateCommentLessThanSignBangDashDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_END_DASH: {
|
||
|
this._stateCommentEndDash(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_END: {
|
||
|
this._stateCommentEnd(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.COMMENT_END_BANG: {
|
||
|
this._stateCommentEndBang(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.DOCTYPE: {
|
||
|
this._stateDoctype(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BEFORE_DOCTYPE_NAME: {
|
||
|
this._stateBeforeDoctypeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.DOCTYPE_NAME: {
|
||
|
this._stateDoctypeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_DOCTYPE_NAME: {
|
||
|
this._stateAfterDoctypeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_DOCTYPE_PUBLIC_KEYWORD: {
|
||
|
this._stateAfterDoctypePublicKeyword(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER: {
|
||
|
this._stateBeforeDoctypePublicIdentifier(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED: {
|
||
|
this._stateDoctypePublicIdentifierDoubleQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED: {
|
||
|
this._stateDoctypePublicIdentifierSingleQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER: {
|
||
|
this._stateAfterDoctypePublicIdentifier(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS: {
|
||
|
this._stateBetweenDoctypePublicAndSystemIdentifiers(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_DOCTYPE_SYSTEM_KEYWORD: {
|
||
|
this._stateAfterDoctypeSystemKeyword(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER: {
|
||
|
this._stateBeforeDoctypeSystemIdentifier(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED: {
|
||
|
this._stateDoctypeSystemIdentifierDoubleQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED: {
|
||
|
this._stateDoctypeSystemIdentifierSingleQuoted(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER: {
|
||
|
this._stateAfterDoctypeSystemIdentifier(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.BOGUS_DOCTYPE: {
|
||
|
this._stateBogusDoctype(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.CDATA_SECTION: {
|
||
|
this._stateCdataSection(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.CDATA_SECTION_BRACKET: {
|
||
|
this._stateCdataSectionBracket(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.CDATA_SECTION_END: {
|
||
|
this._stateCdataSectionEnd(cp);
|
||
|
break;
|
||
|
}
|
||
|
case State.CHARACTER_REFERENCE: {
|
||
|
this._stateCharacterReference();
|
||
|
break;
|
||
|
}
|
||
|
case State.AMBIGUOUS_AMPERSAND: {
|
||
|
this._stateAmbiguousAmpersand(cp);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
throw new Error('Unknown state');
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// State machine
|
||
|
// Data state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateData(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.TAG_OPEN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.AMPERSAND: {
|
||
|
this._startCharacterReference();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitCodePoint(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// RCDATA state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRcdata(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.AMPERSAND: {
|
||
|
this._startCharacterReference();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.RCDATA_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// RAWTEXT state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRawtext(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.RAWTEXT_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptData(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// PLAINTEXT state
|
||
|
//------------------------------------------------------------------
|
||
|
_statePlaintext(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Tag open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateTagOpen(cp) {
|
||
|
if (isAsciiLetter(cp)) {
|
||
|
this._createStartTagToken();
|
||
|
this.state = State.TAG_NAME;
|
||
|
this._stateTagName(cp);
|
||
|
}
|
||
|
else
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.EXCLAMATION_MARK: {
|
||
|
this.state = State.MARKUP_DECLARATION_OPEN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.SOLIDUS: {
|
||
|
this.state = State.END_TAG_OPEN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUESTION_MARK: {
|
||
|
this._err(ERR.unexpectedQuestionMarkInsteadOfTagName);
|
||
|
this._createCommentToken(1);
|
||
|
this.state = State.BOGUS_COMMENT;
|
||
|
this._stateBogusComment(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofBeforeTagName);
|
||
|
this._emitChars('<');
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.invalidFirstCharacterOfTagName);
|
||
|
this._emitChars('<');
|
||
|
this.state = State.DATA;
|
||
|
this._stateData(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// End tag open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateEndTagOpen(cp) {
|
||
|
if (isAsciiLetter(cp)) {
|
||
|
this._createEndTagToken();
|
||
|
this.state = State.TAG_NAME;
|
||
|
this._stateTagName(cp);
|
||
|
}
|
||
|
else
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingEndTagName);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofBeforeTagName);
|
||
|
this._emitChars('</');
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.invalidFirstCharacterOfTagName);
|
||
|
this._createCommentToken(2);
|
||
|
this.state = State.BOGUS_COMMENT;
|
||
|
this._stateBogusComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Tag name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateTagName(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.SOLIDUS: {
|
||
|
this.state = State.SELF_CLOSING_START_TAG;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentTagToken();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.tagName += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.tagName += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// RCDATA less-than sign state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRcdataLessThanSign(cp) {
|
||
|
if (cp === CODE_POINTS.SOLIDUS) {
|
||
|
this.state = State.RCDATA_END_TAG_OPEN;
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('<');
|
||
|
this.state = State.RCDATA;
|
||
|
this._stateRcdata(cp);
|
||
|
}
|
||
|
}
|
||
|
// RCDATA end tag open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRcdataEndTagOpen(cp) {
|
||
|
if (isAsciiLetter(cp)) {
|
||
|
this.state = State.RCDATA_END_TAG_NAME;
|
||
|
this._stateRcdataEndTagName(cp);
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.RCDATA;
|
||
|
this._stateRcdata(cp);
|
||
|
}
|
||
|
}
|
||
|
handleSpecialEndTag(_cp) {
|
||
|
if (!this.preprocessor.startsWith(this.lastStartTagName, false)) {
|
||
|
return !this._ensureHibernation();
|
||
|
}
|
||
|
this._createEndTagToken();
|
||
|
const token = this.currentToken;
|
||
|
token.tagName = this.lastStartTagName;
|
||
|
const cp = this.preprocessor.peek(this.lastStartTagName.length);
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this._advanceBy(this.lastStartTagName.length);
|
||
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
||
|
return false;
|
||
|
}
|
||
|
case CODE_POINTS.SOLIDUS: {
|
||
|
this._advanceBy(this.lastStartTagName.length);
|
||
|
this.state = State.SELF_CLOSING_START_TAG;
|
||
|
return false;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._advanceBy(this.lastStartTagName.length);
|
||
|
this.emitCurrentTagToken();
|
||
|
this.state = State.DATA;
|
||
|
return false;
|
||
|
}
|
||
|
default: {
|
||
|
return !this._ensureHibernation();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// RCDATA end tag name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRcdataEndTagName(cp) {
|
||
|
if (this.handleSpecialEndTag(cp)) {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.RCDATA;
|
||
|
this._stateRcdata(cp);
|
||
|
}
|
||
|
}
|
||
|
// RAWTEXT less-than sign state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRawtextLessThanSign(cp) {
|
||
|
if (cp === CODE_POINTS.SOLIDUS) {
|
||
|
this.state = State.RAWTEXT_END_TAG_OPEN;
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('<');
|
||
|
this.state = State.RAWTEXT;
|
||
|
this._stateRawtext(cp);
|
||
|
}
|
||
|
}
|
||
|
// RAWTEXT end tag open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRawtextEndTagOpen(cp) {
|
||
|
if (isAsciiLetter(cp)) {
|
||
|
this.state = State.RAWTEXT_END_TAG_NAME;
|
||
|
this._stateRawtextEndTagName(cp);
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.RAWTEXT;
|
||
|
this._stateRawtext(cp);
|
||
|
}
|
||
|
}
|
||
|
// RAWTEXT end tag name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateRawtextEndTagName(cp) {
|
||
|
if (this.handleSpecialEndTag(cp)) {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.RAWTEXT;
|
||
|
this._stateRawtext(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data less-than sign state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataLessThanSign(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SOLIDUS: {
|
||
|
this.state = State.SCRIPT_DATA_END_TAG_OPEN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EXCLAMATION_MARK: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPE_START;
|
||
|
this._emitChars('<!');
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitChars('<');
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._stateScriptData(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data end tag open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEndTagOpen(cp) {
|
||
|
if (isAsciiLetter(cp)) {
|
||
|
this.state = State.SCRIPT_DATA_END_TAG_NAME;
|
||
|
this._stateScriptDataEndTagName(cp);
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._stateScriptData(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data end tag name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEndTagName(cp) {
|
||
|
if (this.handleSpecialEndTag(cp)) {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._stateScriptData(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data escape start state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapeStart(cp) {
|
||
|
if (cp === CODE_POINTS.HYPHEN_MINUS) {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPE_START_DASH;
|
||
|
this._emitChars('-');
|
||
|
}
|
||
|
else {
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._stateScriptData(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data escape start dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapeStartDash(cp) {
|
||
|
if (cp === CODE_POINTS.HYPHEN_MINUS) {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
|
||
|
this._emitChars('-');
|
||
|
}
|
||
|
else {
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._stateScriptData(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data escaped state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscaped(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH;
|
||
|
this._emitChars('-');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInScriptHtmlCommentLikeText);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data escaped dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapedDash(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_DASH_DASH;
|
||
|
this._emitChars('-');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInScriptHtmlCommentLikeText);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data escaped dash dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapedDashDash(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this._emitChars('-');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._emitChars('>');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInScriptHtmlCommentLikeText);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data escaped less-than sign state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapedLessThanSign(cp) {
|
||
|
if (cp === CODE_POINTS.SOLIDUS) {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_OPEN;
|
||
|
}
|
||
|
else if (isAsciiLetter(cp)) {
|
||
|
this._emitChars('<');
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_START;
|
||
|
this._stateScriptDataDoubleEscapeStart(cp);
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('<');
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._stateScriptDataEscaped(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data escaped end tag open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapedEndTagOpen(cp) {
|
||
|
if (isAsciiLetter(cp)) {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED_END_TAG_NAME;
|
||
|
this._stateScriptDataEscapedEndTagName(cp);
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._stateScriptDataEscaped(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data escaped end tag name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataEscapedEndTagName(cp) {
|
||
|
if (this.handleSpecialEndTag(cp)) {
|
||
|
this._emitChars('</');
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._stateScriptDataEscaped(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data double escape start state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataDoubleEscapeStart(cp) {
|
||
|
if (this.preprocessor.startsWith(SEQUENCES.SCRIPT, false) &&
|
||
|
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(SEQUENCES.SCRIPT.length))) {
|
||
|
this._emitCodePoint(cp);
|
||
|
for (let i = 0; i < SEQUENCES.SCRIPT.length; i++) {
|
||
|
this._emitCodePoint(this._consume());
|
||
|
}
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
}
|
||
|
else if (!this._ensureHibernation()) {
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
this._stateScriptDataEscaped(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data double escaped state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataDoubleEscaped(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH;
|
||
|
this._emitChars('-');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
||
|
this._emitChars('<');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInScriptHtmlCommentLikeText);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data double escaped dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataDoubleEscapedDash(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH;
|
||
|
this._emitChars('-');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
||
|
this._emitChars('<');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInScriptHtmlCommentLikeText);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data double escaped dash dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataDoubleEscapedDashDash(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this._emitChars('-');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN;
|
||
|
this._emitChars('<');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.SCRIPT_DATA;
|
||
|
this._emitChars('>');
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
this._emitChars(REPLACEMENT_CHARACTER);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInScriptHtmlCommentLikeText);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Script data double escaped less-than sign state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataDoubleEscapedLessThanSign(cp) {
|
||
|
if (cp === CODE_POINTS.SOLIDUS) {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPE_END;
|
||
|
this._emitChars('/');
|
||
|
}
|
||
|
else {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
this._stateScriptDataDoubleEscaped(cp);
|
||
|
}
|
||
|
}
|
||
|
// Script data double escape end state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateScriptDataDoubleEscapeEnd(cp) {
|
||
|
if (this.preprocessor.startsWith(SEQUENCES.SCRIPT, false) &&
|
||
|
isScriptDataDoubleEscapeSequenceEnd(this.preprocessor.peek(SEQUENCES.SCRIPT.length))) {
|
||
|
this._emitCodePoint(cp);
|
||
|
for (let i = 0; i < SEQUENCES.SCRIPT.length; i++) {
|
||
|
this._emitCodePoint(this._consume());
|
||
|
}
|
||
|
this.state = State.SCRIPT_DATA_ESCAPED;
|
||
|
}
|
||
|
else if (!this._ensureHibernation()) {
|
||
|
this.state = State.SCRIPT_DATA_DOUBLE_ESCAPED;
|
||
|
this._stateScriptDataDoubleEscaped(cp);
|
||
|
}
|
||
|
}
|
||
|
// Before attribute name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBeforeAttributeName(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.SOLIDUS:
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN:
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this.state = State.AFTER_ATTRIBUTE_NAME;
|
||
|
this._stateAfterAttributeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EQUALS_SIGN: {
|
||
|
this._err(ERR.unexpectedEqualsSignBeforeAttributeName);
|
||
|
this._createAttr('=');
|
||
|
this.state = State.ATTRIBUTE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._createAttr('');
|
||
|
this.state = State.ATTRIBUTE_NAME;
|
||
|
this._stateAttributeName(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Attribute name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAttributeName(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED:
|
||
|
case CODE_POINTS.SOLIDUS:
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN:
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._leaveAttrName();
|
||
|
this.state = State.AFTER_ATTRIBUTE_NAME;
|
||
|
this._stateAfterAttributeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EQUALS_SIGN: {
|
||
|
this._leaveAttrName();
|
||
|
this.state = State.BEFORE_ATTRIBUTE_VALUE;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK:
|
||
|
case CODE_POINTS.APOSTROPHE:
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
this._err(ERR.unexpectedCharacterInAttributeName);
|
||
|
this.currentAttr.name += String.fromCodePoint(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.currentAttr.name += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.currentAttr.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After attribute name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterAttributeName(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.SOLIDUS: {
|
||
|
this.state = State.SELF_CLOSING_START_TAG;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EQUALS_SIGN: {
|
||
|
this.state = State.BEFORE_ATTRIBUTE_VALUE;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentTagToken();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._createAttr('');
|
||
|
this.state = State.ATTRIBUTE_NAME;
|
||
|
this._stateAttributeName(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Before attribute value state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBeforeAttributeValue(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this.state = State.ATTRIBUTE_VALUE_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this.state = State.ATTRIBUTE_VALUE_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingAttributeValue);
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentTagToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.ATTRIBUTE_VALUE_UNQUOTED;
|
||
|
this._stateAttributeValueUnquoted(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Attribute value (double-quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAttributeValueDoubleQuoted(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.AMPERSAND: {
|
||
|
this._startCharacterReference();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.currentAttr.value += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.currentAttr.value += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Attribute value (single-quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAttributeValueSingleQuoted(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this.state = State.AFTER_ATTRIBUTE_VALUE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.AMPERSAND: {
|
||
|
this._startCharacterReference();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.currentAttr.value += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.currentAttr.value += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Attribute value (unquoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAttributeValueUnquoted(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this._leaveAttrValue();
|
||
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.AMPERSAND: {
|
||
|
this._startCharacterReference();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._leaveAttrValue();
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentTagToken();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this.currentAttr.value += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK:
|
||
|
case CODE_POINTS.APOSTROPHE:
|
||
|
case CODE_POINTS.LESS_THAN_SIGN:
|
||
|
case CODE_POINTS.EQUALS_SIGN:
|
||
|
case CODE_POINTS.GRAVE_ACCENT: {
|
||
|
this._err(ERR.unexpectedCharacterInUnquotedAttributeValue);
|
||
|
this.currentAttr.value += String.fromCodePoint(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.currentAttr.value += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After attribute value (quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterAttributeValueQuoted(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this._leaveAttrValue();
|
||
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.SOLIDUS: {
|
||
|
this._leaveAttrValue();
|
||
|
this.state = State.SELF_CLOSING_START_TAG;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._leaveAttrValue();
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentTagToken();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingWhitespaceBetweenAttributes);
|
||
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
||
|
this._stateBeforeAttributeName(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Self-closing start tag state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateSelfClosingStartTag(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
const token = this.currentToken;
|
||
|
token.selfClosing = true;
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentTagToken();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInTag);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.unexpectedSolidusInTag);
|
||
|
this.state = State.BEFORE_ATTRIBUTE_NAME;
|
||
|
this._stateBeforeAttributeName(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Bogus comment state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBogusComment(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentComment(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this.emitCurrentComment(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.data += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.data += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Markup declaration open state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateMarkupDeclarationOpen(cp) {
|
||
|
if (this._consumeSequenceIfMatch(SEQUENCES.DASH_DASH, true)) {
|
||
|
this._createCommentToken(SEQUENCES.DASH_DASH.length + 1);
|
||
|
this.state = State.COMMENT_START;
|
||
|
}
|
||
|
else if (this._consumeSequenceIfMatch(SEQUENCES.DOCTYPE, false)) {
|
||
|
// NOTE: Doctypes tokens are created without fixed offsets. We keep track of the moment a doctype *might* start here.
|
||
|
this.currentLocation = this.getCurrentLocation(SEQUENCES.DOCTYPE.length + 1);
|
||
|
this.state = State.DOCTYPE;
|
||
|
}
|
||
|
else if (this._consumeSequenceIfMatch(SEQUENCES.CDATA_START, true)) {
|
||
|
if (this.inForeignNode) {
|
||
|
this.state = State.CDATA_SECTION;
|
||
|
}
|
||
|
else {
|
||
|
this._err(ERR.cdataInHtmlContent);
|
||
|
this._createCommentToken(SEQUENCES.CDATA_START.length + 1);
|
||
|
this.currentToken.data = '[CDATA[';
|
||
|
this.state = State.BOGUS_COMMENT;
|
||
|
}
|
||
|
}
|
||
|
//NOTE: Sequence lookups can be abrupted by hibernation. In that case, lookup
|
||
|
//results are no longer valid and we will need to start over.
|
||
|
else if (!this._ensureHibernation()) {
|
||
|
this._err(ERR.incorrectlyOpenedComment);
|
||
|
this._createCommentToken(2);
|
||
|
this.state = State.BOGUS_COMMENT;
|
||
|
this._stateBogusComment(cp);
|
||
|
}
|
||
|
}
|
||
|
// Comment start state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentStart(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.COMMENT_START_DASH;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.abruptClosingOfEmptyComment);
|
||
|
this.state = State.DATA;
|
||
|
const token = this.currentToken;
|
||
|
this.emitCurrentComment(token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Comment start dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentStartDash(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.COMMENT_END;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.abruptClosingOfEmptyComment);
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentComment(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInComment);
|
||
|
this.emitCurrentComment(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.data += '-';
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Comment state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateComment(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.COMMENT_END_DASH;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
token.data += '<';
|
||
|
this.state = State.COMMENT_LESS_THAN_SIGN;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.data += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInComment);
|
||
|
this.emitCurrentComment(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.data += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Comment less-than sign state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentLessThanSign(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.EXCLAMATION_MARK: {
|
||
|
token.data += '!';
|
||
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.LESS_THAN_SIGN: {
|
||
|
token.data += '<';
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Comment less-than sign bang state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentLessThanSignBang(cp) {
|
||
|
if (cp === CODE_POINTS.HYPHEN_MINUS) {
|
||
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH;
|
||
|
}
|
||
|
else {
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
// Comment less-than sign bang dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentLessThanSignBangDash(cp) {
|
||
|
if (cp === CODE_POINTS.HYPHEN_MINUS) {
|
||
|
this.state = State.COMMENT_LESS_THAN_SIGN_BANG_DASH_DASH;
|
||
|
}
|
||
|
else {
|
||
|
this.state = State.COMMENT_END_DASH;
|
||
|
this._stateCommentEndDash(cp);
|
||
|
}
|
||
|
}
|
||
|
// Comment less-than sign bang dash dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentLessThanSignBangDashDash(cp) {
|
||
|
if (cp !== CODE_POINTS.GREATER_THAN_SIGN && cp !== CODE_POINTS.EOF) {
|
||
|
this._err(ERR.nestedComment);
|
||
|
}
|
||
|
this.state = State.COMMENT_END;
|
||
|
this._stateCommentEnd(cp);
|
||
|
}
|
||
|
// Comment end dash state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentEndDash(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
this.state = State.COMMENT_END;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInComment);
|
||
|
this.emitCurrentComment(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.data += '-';
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Comment end state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentEnd(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentComment(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EXCLAMATION_MARK: {
|
||
|
this.state = State.COMMENT_END_BANG;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
token.data += '-';
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInComment);
|
||
|
this.emitCurrentComment(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.data += '--';
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Comment end bang state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCommentEndBang(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.HYPHEN_MINUS: {
|
||
|
token.data += '--!';
|
||
|
this.state = State.COMMENT_END_DASH;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.incorrectlyClosedComment);
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentComment(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInComment);
|
||
|
this.emitCurrentComment(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.data += '--!';
|
||
|
this.state = State.COMMENT;
|
||
|
this._stateComment(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// DOCTYPE state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateDoctype(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this.state = State.BEFORE_DOCTYPE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.BEFORE_DOCTYPE_NAME;
|
||
|
this._stateBeforeDoctypeName(cp);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
this._createDoctypeToken(null);
|
||
|
const token = this.currentToken;
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingWhitespaceBeforeDoctypeName);
|
||
|
this.state = State.BEFORE_DOCTYPE_NAME;
|
||
|
this._stateBeforeDoctypeName(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Before DOCTYPE name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBeforeDoctypeName(cp) {
|
||
|
if (isAsciiUpper(cp)) {
|
||
|
this._createDoctypeToken(String.fromCharCode(toAsciiLower(cp)));
|
||
|
this.state = State.DOCTYPE_NAME;
|
||
|
}
|
||
|
else
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
this._createDoctypeToken(REPLACEMENT_CHARACTER);
|
||
|
this.state = State.DOCTYPE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingDoctypeName);
|
||
|
this._createDoctypeToken(null);
|
||
|
const token = this.currentToken;
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
this._createDoctypeToken(null);
|
||
|
const token = this.currentToken;
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._createDoctypeToken(String.fromCodePoint(cp));
|
||
|
this.state = State.DOCTYPE_NAME;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// DOCTYPE name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateDoctypeName(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this.state = State.AFTER_DOCTYPE_NAME;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.name += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.name += String.fromCodePoint(isAsciiUpper(cp) ? toAsciiLower(cp) : cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After DOCTYPE name state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterDoctypeName(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
if (this._consumeSequenceIfMatch(SEQUENCES.PUBLIC, false)) {
|
||
|
this.state = State.AFTER_DOCTYPE_PUBLIC_KEYWORD;
|
||
|
}
|
||
|
else if (this._consumeSequenceIfMatch(SEQUENCES.SYSTEM, false)) {
|
||
|
this.state = State.AFTER_DOCTYPE_SYSTEM_KEYWORD;
|
||
|
}
|
||
|
//NOTE: sequence lookup can be abrupted by hibernation. In that case lookup
|
||
|
//results are no longer valid and we will need to start over.
|
||
|
else if (!this._ensureHibernation()) {
|
||
|
this._err(ERR.invalidCharacterSequenceAfterDoctypeName);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After DOCTYPE public keyword state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterDoctypePublicKeyword(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this.state = State.BEFORE_DOCTYPE_PUBLIC_IDENTIFIER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
|
||
|
token.publicId = '';
|
||
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this._err(ERR.missingWhitespaceAfterDoctypePublicKeyword);
|
||
|
token.publicId = '';
|
||
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingDoctypePublicIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Before DOCTYPE public identifier state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBeforeDoctypePublicIdentifier(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
token.publicId = '';
|
||
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
token.publicId = '';
|
||
|
this.state = State.DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingDoctypePublicIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingQuoteBeforeDoctypePublicIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// DOCTYPE public identifier (double-quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateDoctypePublicIdentifierDoubleQuoted(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.publicId += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.abruptDoctypePublicIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.publicId += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// DOCTYPE public identifier (single-quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateDoctypePublicIdentifierSingleQuoted(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this.state = State.AFTER_DOCTYPE_PUBLIC_IDENTIFIER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.publicId += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.abruptDoctypePublicIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.publicId += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After DOCTYPE public identifier state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterDoctypePublicIdentifier(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this.state = State.BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this._err(ERR.missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Between DOCTYPE public and system identifiers state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBetweenDoctypePublicAndSystemIdentifiers(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After DOCTYPE system keyword state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterDoctypeSystemKeyword(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
this.state = State.BEFORE_DOCTYPE_SYSTEM_IDENTIFIER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this._err(ERR.missingWhitespaceAfterDoctypeSystemKeyword);
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Before DOCTYPE system identifier state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBeforeDoctypeSystemIdentifier(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
token.systemId = '';
|
||
|
this.state = State.DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.missingDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.DATA;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.missingQuoteBeforeDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// DOCTYPE system identifier (double-quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateDoctypeSystemIdentifierDoubleQuoted(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.QUOTATION_MARK: {
|
||
|
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.systemId += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.abruptDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.systemId += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// DOCTYPE system identifier (single-quoted) state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateDoctypeSystemIdentifierSingleQuoted(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.APOSTROPHE: {
|
||
|
this.state = State.AFTER_DOCTYPE_SYSTEM_IDENTIFIER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
token.systemId += REPLACEMENT_CHARACTER;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this._err(ERR.abruptDoctypeSystemIdentifier);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
token.systemId += String.fromCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// After DOCTYPE system identifier state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAfterDoctypeSystemIdentifier(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.SPACE:
|
||
|
case CODE_POINTS.LINE_FEED:
|
||
|
case CODE_POINTS.TABULATION:
|
||
|
case CODE_POINTS.FORM_FEED: {
|
||
|
// Ignore whitespace
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInDoctype);
|
||
|
token.forceQuirks = true;
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._err(ERR.unexpectedCharacterAfterDoctypeSystemIdentifier);
|
||
|
this.state = State.BOGUS_DOCTYPE;
|
||
|
this._stateBogusDoctype(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Bogus DOCTYPE state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateBogusDoctype(cp) {
|
||
|
const token = this.currentToken;
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.NULL: {
|
||
|
this._err(ERR.unexpectedNullCharacter);
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this.emitCurrentDoctype(token);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
// CDATA section state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCdataSection(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.RIGHT_SQUARE_BRACKET: {
|
||
|
this.state = State.CDATA_SECTION_BRACKET;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.EOF: {
|
||
|
this._err(ERR.eofInCdata);
|
||
|
this._emitEOFToken();
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitCodePoint(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// CDATA section bracket state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCdataSectionBracket(cp) {
|
||
|
if (cp === CODE_POINTS.RIGHT_SQUARE_BRACKET) {
|
||
|
this.state = State.CDATA_SECTION_END;
|
||
|
}
|
||
|
else {
|
||
|
this._emitChars(']');
|
||
|
this.state = State.CDATA_SECTION;
|
||
|
this._stateCdataSection(cp);
|
||
|
}
|
||
|
}
|
||
|
// CDATA section end state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCdataSectionEnd(cp) {
|
||
|
switch (cp) {
|
||
|
case CODE_POINTS.GREATER_THAN_SIGN: {
|
||
|
this.state = State.DATA;
|
||
|
break;
|
||
|
}
|
||
|
case CODE_POINTS.RIGHT_SQUARE_BRACKET: {
|
||
|
this._emitChars(']');
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
this._emitChars(']]');
|
||
|
this.state = State.CDATA_SECTION;
|
||
|
this._stateCdataSection(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// Character reference state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateCharacterReference() {
|
||
|
let length = this.entityDecoder.write(this.preprocessor.html, this.preprocessor.pos);
|
||
|
if (length < 0) {
|
||
|
if (this.preprocessor.lastChunkWritten) {
|
||
|
length = this.entityDecoder.end();
|
||
|
}
|
||
|
else {
|
||
|
// Wait for the rest of the entity.
|
||
|
this.active = false;
|
||
|
// Mark the entire buffer as read.
|
||
|
this.preprocessor.pos = this.preprocessor.html.length - 1;
|
||
|
this.consumedAfterSnapshot = 0;
|
||
|
this.preprocessor.endOfChunkHit = true;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
if (length === 0) {
|
||
|
// This was not a valid entity. Go back to the beginning, and
|
||
|
// figure out what to do.
|
||
|
this.preprocessor.pos = this.entityStartPos;
|
||
|
this._flushCodePointConsumedAsCharacterReference(CODE_POINTS.AMPERSAND);
|
||
|
this.state =
|
||
|
!this._isCharacterReferenceInAttribute() && isAsciiAlphaNumeric(this.preprocessor.peek(1))
|
||
|
? State.AMBIGUOUS_AMPERSAND
|
||
|
: this.returnState;
|
||
|
}
|
||
|
else {
|
||
|
// We successfully parsed an entity. Switch to the return state.
|
||
|
this.state = this.returnState;
|
||
|
}
|
||
|
}
|
||
|
// Ambiguos ampersand state
|
||
|
//------------------------------------------------------------------
|
||
|
_stateAmbiguousAmpersand(cp) {
|
||
|
if (isAsciiAlphaNumeric(cp)) {
|
||
|
this._flushCodePointConsumedAsCharacterReference(cp);
|
||
|
}
|
||
|
else {
|
||
|
if (cp === CODE_POINTS.SEMICOLON) {
|
||
|
this._err(ERR.unknownNamedCharacterReference);
|
||
|
}
|
||
|
this.state = this.returnState;
|
||
|
this._callState(cp);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//Element utils
|
||
|
const IMPLICIT_END_TAG_REQUIRED = new Set([TAG_ID.DD, TAG_ID.DT, TAG_ID.LI, TAG_ID.OPTGROUP, TAG_ID.OPTION, TAG_ID.P, TAG_ID.RB, TAG_ID.RP, TAG_ID.RT, TAG_ID.RTC]);
|
||
|
const IMPLICIT_END_TAG_REQUIRED_THOROUGHLY = new Set([
|
||
|
...IMPLICIT_END_TAG_REQUIRED,
|
||
|
TAG_ID.CAPTION,
|
||
|
TAG_ID.COLGROUP,
|
||
|
TAG_ID.TBODY,
|
||
|
TAG_ID.TD,
|
||
|
TAG_ID.TFOOT,
|
||
|
TAG_ID.TH,
|
||
|
TAG_ID.THEAD,
|
||
|
TAG_ID.TR,
|
||
|
]);
|
||
|
const SCOPING_ELEMENTS_HTML = new Set([
|
||
|
TAG_ID.APPLET,
|
||
|
TAG_ID.CAPTION,
|
||
|
TAG_ID.HTML,
|
||
|
TAG_ID.MARQUEE,
|
||
|
TAG_ID.OBJECT,
|
||
|
TAG_ID.TABLE,
|
||
|
TAG_ID.TD,
|
||
|
TAG_ID.TEMPLATE,
|
||
|
TAG_ID.TH,
|
||
|
]);
|
||
|
const SCOPING_ELEMENTS_HTML_LIST = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.OL, TAG_ID.UL]);
|
||
|
const SCOPING_ELEMENTS_HTML_BUTTON = new Set([...SCOPING_ELEMENTS_HTML, TAG_ID.BUTTON]);
|
||
|
const SCOPING_ELEMENTS_MATHML = new Set([TAG_ID.ANNOTATION_XML, TAG_ID.MI, TAG_ID.MN, TAG_ID.MO, TAG_ID.MS, TAG_ID.MTEXT]);
|
||
|
const SCOPING_ELEMENTS_SVG = new Set([TAG_ID.DESC, TAG_ID.FOREIGN_OBJECT, TAG_ID.TITLE]);
|
||
|
const TABLE_ROW_CONTEXT = new Set([TAG_ID.TR, TAG_ID.TEMPLATE, TAG_ID.HTML]);
|
||
|
const TABLE_BODY_CONTEXT = new Set([TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TEMPLATE, TAG_ID.HTML]);
|
||
|
const TABLE_CONTEXT = new Set([TAG_ID.TABLE, TAG_ID.TEMPLATE, TAG_ID.HTML]);
|
||
|
const TABLE_CELLS = new Set([TAG_ID.TD, TAG_ID.TH]);
|
||
|
//Stack of open elements
|
||
|
class OpenElementStack {
|
||
|
get currentTmplContentOrNode() {
|
||
|
return this._isInTemplate() ? this.treeAdapter.getTemplateContent(this.current) : this.current;
|
||
|
}
|
||
|
constructor(document, treeAdapter, handler) {
|
||
|
this.treeAdapter = treeAdapter;
|
||
|
this.handler = handler;
|
||
|
this.items = [];
|
||
|
this.tagIDs = [];
|
||
|
this.stackTop = -1;
|
||
|
this.tmplCount = 0;
|
||
|
this.currentTagId = TAG_ID.UNKNOWN;
|
||
|
this.current = document;
|
||
|
}
|
||
|
//Index of element
|
||
|
_indexOf(element) {
|
||
|
return this.items.lastIndexOf(element, this.stackTop);
|
||
|
}
|
||
|
//Update current element
|
||
|
_isInTemplate() {
|
||
|
return this.currentTagId === TAG_ID.TEMPLATE && this.treeAdapter.getNamespaceURI(this.current) === NS.HTML;
|
||
|
}
|
||
|
_updateCurrentElement() {
|
||
|
this.current = this.items[this.stackTop];
|
||
|
this.currentTagId = this.tagIDs[this.stackTop];
|
||
|
}
|
||
|
//Mutations
|
||
|
push(element, tagID) {
|
||
|
this.stackTop++;
|
||
|
this.items[this.stackTop] = element;
|
||
|
this.current = element;
|
||
|
this.tagIDs[this.stackTop] = tagID;
|
||
|
this.currentTagId = tagID;
|
||
|
if (this._isInTemplate()) {
|
||
|
this.tmplCount++;
|
||
|
}
|
||
|
this.handler.onItemPush(element, tagID, true);
|
||
|
}
|
||
|
pop() {
|
||
|
const popped = this.current;
|
||
|
if (this.tmplCount > 0 && this._isInTemplate()) {
|
||
|
this.tmplCount--;
|
||
|
}
|
||
|
this.stackTop--;
|
||
|
this._updateCurrentElement();
|
||
|
this.handler.onItemPop(popped, true);
|
||
|
}
|
||
|
replace(oldElement, newElement) {
|
||
|
const idx = this._indexOf(oldElement);
|
||
|
this.items[idx] = newElement;
|
||
|
if (idx === this.stackTop) {
|
||
|
this.current = newElement;
|
||
|
}
|
||
|
}
|
||
|
insertAfter(referenceElement, newElement, newElementID) {
|
||
|
const insertionIdx = this._indexOf(referenceElement) + 1;
|
||
|
this.items.splice(insertionIdx, 0, newElement);
|
||
|
this.tagIDs.splice(insertionIdx, 0, newElementID);
|
||
|
this.stackTop++;
|
||
|
if (insertionIdx === this.stackTop) {
|
||
|
this._updateCurrentElement();
|
||
|
}
|
||
|
this.handler.onItemPush(this.current, this.currentTagId, insertionIdx === this.stackTop);
|
||
|
}
|
||
|
popUntilTagNamePopped(tagName) {
|
||
|
let targetIdx = this.stackTop + 1;
|
||
|
do {
|
||
|
targetIdx = this.tagIDs.lastIndexOf(tagName, targetIdx - 1);
|
||
|
} while (targetIdx > 0 && this.treeAdapter.getNamespaceURI(this.items[targetIdx]) !== NS.HTML);
|
||
|
this.shortenToLength(targetIdx < 0 ? 0 : targetIdx);
|
||
|
}
|
||
|
shortenToLength(idx) {
|
||
|
while (this.stackTop >= idx) {
|
||
|
const popped = this.current;
|
||
|
if (this.tmplCount > 0 && this._isInTemplate()) {
|
||
|
this.tmplCount -= 1;
|
||
|
}
|
||
|
this.stackTop--;
|
||
|
this._updateCurrentElement();
|
||
|
this.handler.onItemPop(popped, this.stackTop < idx);
|
||
|
}
|
||
|
}
|
||
|
popUntilElementPopped(element) {
|
||
|
const idx = this._indexOf(element);
|
||
|
this.shortenToLength(idx < 0 ? 0 : idx);
|
||
|
}
|
||
|
popUntilPopped(tagNames, targetNS) {
|
||
|
const idx = this._indexOfTagNames(tagNames, targetNS);
|
||
|
this.shortenToLength(idx < 0 ? 0 : idx);
|
||
|
}
|
||
|
popUntilNumberedHeaderPopped() {
|
||
|
this.popUntilPopped(NUMBERED_HEADERS, NS.HTML);
|
||
|
}
|
||
|
popUntilTableCellPopped() {
|
||
|
this.popUntilPopped(TABLE_CELLS, NS.HTML);
|
||
|
}
|
||
|
popAllUpToHtmlElement() {
|
||
|
//NOTE: here we assume that the root <html> element is always first in the open element stack, so
|
||
|
//we perform this fast stack clean up.
|
||
|
this.tmplCount = 0;
|
||
|
this.shortenToLength(1);
|
||
|
}
|
||
|
_indexOfTagNames(tagNames, namespace) {
|
||
|
for (let i = this.stackTop; i >= 0; i--) {
|
||
|
if (tagNames.has(this.tagIDs[i]) && this.treeAdapter.getNamespaceURI(this.items[i]) === namespace) {
|
||
|
return i;
|
||
|
}
|
||
|
}
|
||
|
return -1;
|
||
|
}
|
||
|
clearBackTo(tagNames, targetNS) {
|
||
|
const idx = this._indexOfTagNames(tagNames, targetNS);
|
||
|
this.shortenToLength(idx + 1);
|
||
|
}
|
||
|
clearBackToTableContext() {
|
||
|
this.clearBackTo(TABLE_CONTEXT, NS.HTML);
|
||
|
}
|
||
|
clearBackToTableBodyContext() {
|
||
|
this.clearBackTo(TABLE_BODY_CONTEXT, NS.HTML);
|
||
|
}
|
||
|
clearBackToTableRowContext() {
|
||
|
this.clearBackTo(TABLE_ROW_CONTEXT, NS.HTML);
|
||
|
}
|
||
|
remove(element) {
|
||
|
const idx = this._indexOf(element);
|
||
|
if (idx >= 0) {
|
||
|
if (idx === this.stackTop) {
|
||
|
this.pop();
|
||
|
}
|
||
|
else {
|
||
|
this.items.splice(idx, 1);
|
||
|
this.tagIDs.splice(idx, 1);
|
||
|
this.stackTop--;
|
||
|
this._updateCurrentElement();
|
||
|
this.handler.onItemPop(element, false);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//Search
|
||
|
tryPeekProperlyNestedBodyElement() {
|
||
|
//Properly nested <body> element (should be second element in stack).
|
||
|
return this.stackTop >= 1 && this.tagIDs[1] === TAG_ID.BODY ? this.items[1] : null;
|
||
|
}
|
||
|
contains(element) {
|
||
|
return this._indexOf(element) > -1;
|
||
|
}
|
||
|
getCommonAncestor(element) {
|
||
|
const elementIdx = this._indexOf(element) - 1;
|
||
|
return elementIdx >= 0 ? this.items[elementIdx] : null;
|
||
|
}
|
||
|
isRootHtmlElementCurrent() {
|
||
|
return this.stackTop === 0 && this.tagIDs[0] === TAG_ID.HTML;
|
||
|
}
|
||
|
//Element in scope
|
||
|
hasInDynamicScope(tagName, htmlScope) {
|
||
|
for (let i = this.stackTop; i >= 0; i--) {
|
||
|
const tn = this.tagIDs[i];
|
||
|
switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
|
||
|
case NS.HTML: {
|
||
|
if (tn === tagName)
|
||
|
return true;
|
||
|
if (htmlScope.has(tn))
|
||
|
return false;
|
||
|
break;
|
||
|
}
|
||
|
case NS.SVG: {
|
||
|
if (SCOPING_ELEMENTS_SVG.has(tn))
|
||
|
return false;
|
||
|
break;
|
||
|
}
|
||
|
case NS.MATHML: {
|
||
|
if (SCOPING_ELEMENTS_MATHML.has(tn))
|
||
|
return false;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
hasInScope(tagName) {
|
||
|
return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML);
|
||
|
}
|
||
|
hasInListItemScope(tagName) {
|
||
|
return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_LIST);
|
||
|
}
|
||
|
hasInButtonScope(tagName) {
|
||
|
return this.hasInDynamicScope(tagName, SCOPING_ELEMENTS_HTML_BUTTON);
|
||
|
}
|
||
|
hasNumberedHeaderInScope() {
|
||
|
for (let i = this.stackTop; i >= 0; i--) {
|
||
|
const tn = this.tagIDs[i];
|
||
|
switch (this.treeAdapter.getNamespaceURI(this.items[i])) {
|
||
|
case NS.HTML: {
|
||
|
if (NUMBERED_HEADERS.has(tn))
|
||
|
return true;
|
||
|
if (SCOPING_ELEMENTS_HTML.has(tn))
|
||
|
return false;
|
||
|
break;
|
||
|
}
|
||
|
case NS.SVG: {
|
||
|
if (SCOPING_ELEMENTS_SVG.has(tn))
|
||
|
return false;
|
||
|
break;
|
||
|
}
|
||
|
case NS.MATHML: {
|
||
|
if (SCOPING_ELEMENTS_MATHML.has(tn))
|
||
|
return false;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
hasInTableScope(tagName) {
|
||
|
for (let i = this.stackTop; i >= 0; i--) {
|
||
|
if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
|
||
|
continue;
|
||
|
}
|
||
|
switch (this.tagIDs[i]) {
|
||
|
case tagName: {
|
||
|
return true;
|
||
|
}
|
||
|
case TAG_ID.TABLE:
|
||
|
case TAG_ID.HTML: {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
hasTableBodyContextInTableScope() {
|
||
|
for (let i = this.stackTop; i >= 0; i--) {
|
||
|
if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
|
||
|
continue;
|
||
|
}
|
||
|
switch (this.tagIDs[i]) {
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.TFOOT: {
|
||
|
return true;
|
||
|
}
|
||
|
case TAG_ID.TABLE:
|
||
|
case TAG_ID.HTML: {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
hasInSelectScope(tagName) {
|
||
|
for (let i = this.stackTop; i >= 0; i--) {
|
||
|
if (this.treeAdapter.getNamespaceURI(this.items[i]) !== NS.HTML) {
|
||
|
continue;
|
||
|
}
|
||
|
switch (this.tagIDs[i]) {
|
||
|
case tagName: {
|
||
|
return true;
|
||
|
}
|
||
|
case TAG_ID.OPTION:
|
||
|
case TAG_ID.OPTGROUP: {
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
//Implied end tags
|
||
|
generateImpliedEndTags() {
|
||
|
while (IMPLICIT_END_TAG_REQUIRED.has(this.currentTagId)) {
|
||
|
this.pop();
|
||
|
}
|
||
|
}
|
||
|
generateImpliedEndTagsThoroughly() {
|
||
|
while (IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.has(this.currentTagId)) {
|
||
|
this.pop();
|
||
|
}
|
||
|
}
|
||
|
generateImpliedEndTagsWithExclusion(exclusionId) {
|
||
|
while (this.currentTagId !== exclusionId && IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.has(this.currentTagId)) {
|
||
|
this.pop();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//Const
|
||
|
const NOAH_ARK_CAPACITY = 3;
|
||
|
var EntryType;
|
||
|
(function (EntryType) {
|
||
|
EntryType[EntryType["Marker"] = 0] = "Marker";
|
||
|
EntryType[EntryType["Element"] = 1] = "Element";
|
||
|
})(EntryType || (EntryType = {}));
|
||
|
const MARKER = { type: EntryType.Marker };
|
||
|
//List of formatting elements
|
||
|
class FormattingElementList {
|
||
|
constructor(treeAdapter) {
|
||
|
this.treeAdapter = treeAdapter;
|
||
|
this.entries = [];
|
||
|
this.bookmark = null;
|
||
|
}
|
||
|
//Noah Ark's condition
|
||
|
//OPTIMIZATION: at first we try to find possible candidates for exclusion using
|
||
|
//lightweight heuristics without thorough attributes check.
|
||
|
_getNoahArkConditionCandidates(newElement, neAttrs) {
|
||
|
const candidates = [];
|
||
|
const neAttrsLength = neAttrs.length;
|
||
|
const neTagName = this.treeAdapter.getTagName(newElement);
|
||
|
const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement);
|
||
|
for (let i = 0; i < this.entries.length; i++) {
|
||
|
const entry = this.entries[i];
|
||
|
if (entry.type === EntryType.Marker) {
|
||
|
break;
|
||
|
}
|
||
|
const { element } = entry;
|
||
|
if (this.treeAdapter.getTagName(element) === neTagName &&
|
||
|
this.treeAdapter.getNamespaceURI(element) === neNamespaceURI) {
|
||
|
const elementAttrs = this.treeAdapter.getAttrList(element);
|
||
|
if (elementAttrs.length === neAttrsLength) {
|
||
|
candidates.push({ idx: i, attrs: elementAttrs });
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return candidates;
|
||
|
}
|
||
|
_ensureNoahArkCondition(newElement) {
|
||
|
if (this.entries.length < NOAH_ARK_CAPACITY)
|
||
|
return;
|
||
|
const neAttrs = this.treeAdapter.getAttrList(newElement);
|
||
|
const candidates = this._getNoahArkConditionCandidates(newElement, neAttrs);
|
||
|
if (candidates.length < NOAH_ARK_CAPACITY)
|
||
|
return;
|
||
|
//NOTE: build attrs map for the new element, so we can perform fast lookups
|
||
|
const neAttrsMap = new Map(neAttrs.map((neAttr) => [neAttr.name, neAttr.value]));
|
||
|
let validCandidates = 0;
|
||
|
//NOTE: remove bottommost candidates, until Noah's Ark condition will not be met
|
||
|
for (let i = 0; i < candidates.length; i++) {
|
||
|
const candidate = candidates[i];
|
||
|
// We know that `candidate.attrs.length === neAttrs.length`
|
||
|
if (candidate.attrs.every((cAttr) => neAttrsMap.get(cAttr.name) === cAttr.value)) {
|
||
|
validCandidates += 1;
|
||
|
if (validCandidates >= NOAH_ARK_CAPACITY) {
|
||
|
this.entries.splice(candidate.idx, 1);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//Mutations
|
||
|
insertMarker() {
|
||
|
this.entries.unshift(MARKER);
|
||
|
}
|
||
|
pushElement(element, token) {
|
||
|
this._ensureNoahArkCondition(element);
|
||
|
this.entries.unshift({
|
||
|
type: EntryType.Element,
|
||
|
element,
|
||
|
token,
|
||
|
});
|
||
|
}
|
||
|
insertElementAfterBookmark(element, token) {
|
||
|
const bookmarkIdx = this.entries.indexOf(this.bookmark);
|
||
|
this.entries.splice(bookmarkIdx, 0, {
|
||
|
type: EntryType.Element,
|
||
|
element,
|
||
|
token,
|
||
|
});
|
||
|
}
|
||
|
removeEntry(entry) {
|
||
|
const entryIndex = this.entries.indexOf(entry);
|
||
|
if (entryIndex >= 0) {
|
||
|
this.entries.splice(entryIndex, 1);
|
||
|
}
|
||
|
}
|
||
|
/**
|
||
|
* Clears the list of formatting elements up to the last marker.
|
||
|
*
|
||
|
* @see https://html.spec.whatwg.org/multipage/parsing.html#clear-the-list-of-active-formatting-elements-up-to-the-last-marker
|
||
|
*/
|
||
|
clearToLastMarker() {
|
||
|
const markerIdx = this.entries.indexOf(MARKER);
|
||
|
if (markerIdx >= 0) {
|
||
|
this.entries.splice(0, markerIdx + 1);
|
||
|
}
|
||
|
else {
|
||
|
this.entries.length = 0;
|
||
|
}
|
||
|
}
|
||
|
//Search
|
||
|
getElementEntryInScopeWithTagName(tagName) {
|
||
|
const entry = this.entries.find((entry) => entry.type === EntryType.Marker || this.treeAdapter.getTagName(entry.element) === tagName);
|
||
|
return entry && entry.type === EntryType.Element ? entry : null;
|
||
|
}
|
||
|
getElementEntry(element) {
|
||
|
return this.entries.find((entry) => entry.type === EntryType.Element && entry.element === element);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const defaultTreeAdapter = {
|
||
|
//Node construction
|
||
|
createDocument() {
|
||
|
return {
|
||
|
nodeName: '#document',
|
||
|
mode: DOCUMENT_MODE.NO_QUIRKS,
|
||
|
childNodes: [],
|
||
|
};
|
||
|
},
|
||
|
createDocumentFragment() {
|
||
|
return {
|
||
|
nodeName: '#document-fragment',
|
||
|
childNodes: [],
|
||
|
};
|
||
|
},
|
||
|
createElement(tagName, namespaceURI, attrs) {
|
||
|
return {
|
||
|
nodeName: tagName,
|
||
|
tagName,
|
||
|
attrs,
|
||
|
namespaceURI,
|
||
|
childNodes: [],
|
||
|
parentNode: null,
|
||
|
};
|
||
|
},
|
||
|
createCommentNode(data) {
|
||
|
return {
|
||
|
nodeName: '#comment',
|
||
|
data,
|
||
|
parentNode: null,
|
||
|
};
|
||
|
},
|
||
|
createTextNode(value) {
|
||
|
return {
|
||
|
nodeName: '#text',
|
||
|
value,
|
||
|
parentNode: null,
|
||
|
};
|
||
|
},
|
||
|
//Tree mutation
|
||
|
appendChild(parentNode, newNode) {
|
||
|
parentNode.childNodes.push(newNode);
|
||
|
newNode.parentNode = parentNode;
|
||
|
},
|
||
|
insertBefore(parentNode, newNode, referenceNode) {
|
||
|
const insertionIdx = parentNode.childNodes.indexOf(referenceNode);
|
||
|
parentNode.childNodes.splice(insertionIdx, 0, newNode);
|
||
|
newNode.parentNode = parentNode;
|
||
|
},
|
||
|
setTemplateContent(templateElement, contentElement) {
|
||
|
templateElement.content = contentElement;
|
||
|
},
|
||
|
getTemplateContent(templateElement) {
|
||
|
return templateElement.content;
|
||
|
},
|
||
|
setDocumentType(document, name, publicId, systemId) {
|
||
|
const doctypeNode = document.childNodes.find((node) => node.nodeName === '#documentType');
|
||
|
if (doctypeNode) {
|
||
|
doctypeNode.name = name;
|
||
|
doctypeNode.publicId = publicId;
|
||
|
doctypeNode.systemId = systemId;
|
||
|
}
|
||
|
else {
|
||
|
const node = {
|
||
|
nodeName: '#documentType',
|
||
|
name,
|
||
|
publicId,
|
||
|
systemId,
|
||
|
parentNode: null,
|
||
|
};
|
||
|
defaultTreeAdapter.appendChild(document, node);
|
||
|
}
|
||
|
},
|
||
|
setDocumentMode(document, mode) {
|
||
|
document.mode = mode;
|
||
|
},
|
||
|
getDocumentMode(document) {
|
||
|
return document.mode;
|
||
|
},
|
||
|
detachNode(node) {
|
||
|
if (node.parentNode) {
|
||
|
const idx = node.parentNode.childNodes.indexOf(node);
|
||
|
node.parentNode.childNodes.splice(idx, 1);
|
||
|
node.parentNode = null;
|
||
|
}
|
||
|
},
|
||
|
insertText(parentNode, text) {
|
||
|
if (parentNode.childNodes.length > 0) {
|
||
|
const prevNode = parentNode.childNodes[parentNode.childNodes.length - 1];
|
||
|
if (defaultTreeAdapter.isTextNode(prevNode)) {
|
||
|
prevNode.value += text;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
defaultTreeAdapter.appendChild(parentNode, defaultTreeAdapter.createTextNode(text));
|
||
|
},
|
||
|
insertTextBefore(parentNode, text, referenceNode) {
|
||
|
const prevNode = parentNode.childNodes[parentNode.childNodes.indexOf(referenceNode) - 1];
|
||
|
if (prevNode && defaultTreeAdapter.isTextNode(prevNode)) {
|
||
|
prevNode.value += text;
|
||
|
}
|
||
|
else {
|
||
|
defaultTreeAdapter.insertBefore(parentNode, defaultTreeAdapter.createTextNode(text), referenceNode);
|
||
|
}
|
||
|
},
|
||
|
adoptAttributes(recipient, attrs) {
|
||
|
const recipientAttrsMap = new Set(recipient.attrs.map((attr) => attr.name));
|
||
|
for (let j = 0; j < attrs.length; j++) {
|
||
|
if (!recipientAttrsMap.has(attrs[j].name)) {
|
||
|
recipient.attrs.push(attrs[j]);
|
||
|
}
|
||
|
}
|
||
|
},
|
||
|
//Tree traversing
|
||
|
getFirstChild(node) {
|
||
|
return node.childNodes[0];
|
||
|
},
|
||
|
getChildNodes(node) {
|
||
|
return node.childNodes;
|
||
|
},
|
||
|
getParentNode(node) {
|
||
|
return node.parentNode;
|
||
|
},
|
||
|
getAttrList(element) {
|
||
|
return element.attrs;
|
||
|
},
|
||
|
//Node data
|
||
|
getTagName(element) {
|
||
|
return element.tagName;
|
||
|
},
|
||
|
getNamespaceURI(element) {
|
||
|
return element.namespaceURI;
|
||
|
},
|
||
|
getTextNodeContent(textNode) {
|
||
|
return textNode.value;
|
||
|
},
|
||
|
getCommentNodeContent(commentNode) {
|
||
|
return commentNode.data;
|
||
|
},
|
||
|
getDocumentTypeNodeName(doctypeNode) {
|
||
|
return doctypeNode.name;
|
||
|
},
|
||
|
getDocumentTypeNodePublicId(doctypeNode) {
|
||
|
return doctypeNode.publicId;
|
||
|
},
|
||
|
getDocumentTypeNodeSystemId(doctypeNode) {
|
||
|
return doctypeNode.systemId;
|
||
|
},
|
||
|
//Node types
|
||
|
isTextNode(node) {
|
||
|
return node.nodeName === '#text';
|
||
|
},
|
||
|
isCommentNode(node) {
|
||
|
return node.nodeName === '#comment';
|
||
|
},
|
||
|
isDocumentTypeNode(node) {
|
||
|
return node.nodeName === '#documentType';
|
||
|
},
|
||
|
isElementNode(node) {
|
||
|
return Object.prototype.hasOwnProperty.call(node, 'tagName');
|
||
|
},
|
||
|
// Source code location
|
||
|
setNodeSourceCodeLocation(node, location) {
|
||
|
node.sourceCodeLocation = location;
|
||
|
},
|
||
|
getNodeSourceCodeLocation(node) {
|
||
|
return node.sourceCodeLocation;
|
||
|
},
|
||
|
updateNodeSourceCodeLocation(node, endLocation) {
|
||
|
node.sourceCodeLocation = { ...node.sourceCodeLocation, ...endLocation };
|
||
|
},
|
||
|
};
|
||
|
|
||
|
//Const
|
||
|
const VALID_DOCTYPE_NAME = 'html';
|
||
|
const VALID_SYSTEM_ID = 'about:legacy-compat';
|
||
|
const QUIRKS_MODE_SYSTEM_ID = 'http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd';
|
||
|
const QUIRKS_MODE_PUBLIC_ID_PREFIXES = [
|
||
|
'+//silmaril//dtd html pro v0r11 19970101//',
|
||
|
'-//as//dtd html 3.0 aswedit + extensions//',
|
||
|
'-//advasoft ltd//dtd html 3.0 aswedit + extensions//',
|
||
|
'-//ietf//dtd html 2.0 level 1//',
|
||
|
'-//ietf//dtd html 2.0 level 2//',
|
||
|
'-//ietf//dtd html 2.0 strict level 1//',
|
||
|
'-//ietf//dtd html 2.0 strict level 2//',
|
||
|
'-//ietf//dtd html 2.0 strict//',
|
||
|
'-//ietf//dtd html 2.0//',
|
||
|
'-//ietf//dtd html 2.1e//',
|
||
|
'-//ietf//dtd html 3.0//',
|
||
|
'-//ietf//dtd html 3.2 final//',
|
||
|
'-//ietf//dtd html 3.2//',
|
||
|
'-//ietf//dtd html 3//',
|
||
|
'-//ietf//dtd html level 0//',
|
||
|
'-//ietf//dtd html level 1//',
|
||
|
'-//ietf//dtd html level 2//',
|
||
|
'-//ietf//dtd html level 3//',
|
||
|
'-//ietf//dtd html strict level 0//',
|
||
|
'-//ietf//dtd html strict level 1//',
|
||
|
'-//ietf//dtd html strict level 2//',
|
||
|
'-//ietf//dtd html strict level 3//',
|
||
|
'-//ietf//dtd html strict//',
|
||
|
'-//ietf//dtd html//',
|
||
|
'-//metrius//dtd metrius presentational//',
|
||
|
'-//microsoft//dtd internet explorer 2.0 html strict//',
|
||
|
'-//microsoft//dtd internet explorer 2.0 html//',
|
||
|
'-//microsoft//dtd internet explorer 2.0 tables//',
|
||
|
'-//microsoft//dtd internet explorer 3.0 html strict//',
|
||
|
'-//microsoft//dtd internet explorer 3.0 html//',
|
||
|
'-//microsoft//dtd internet explorer 3.0 tables//',
|
||
|
'-//netscape comm. corp.//dtd html//',
|
||
|
'-//netscape comm. corp.//dtd strict html//',
|
||
|
"-//o'reilly and associates//dtd html 2.0//",
|
||
|
"-//o'reilly and associates//dtd html extended 1.0//",
|
||
|
"-//o'reilly and associates//dtd html extended relaxed 1.0//",
|
||
|
'-//sq//dtd html 2.0 hotmetal + extensions//',
|
||
|
'-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//',
|
||
|
'-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//',
|
||
|
'-//spyglass//dtd html 2.0 extended//',
|
||
|
'-//sun microsystems corp.//dtd hotjava html//',
|
||
|
'-//sun microsystems corp.//dtd hotjava strict html//',
|
||
|
'-//w3c//dtd html 3 1995-03-24//',
|
||
|
'-//w3c//dtd html 3.2 draft//',
|
||
|
'-//w3c//dtd html 3.2 final//',
|
||
|
'-//w3c//dtd html 3.2//',
|
||
|
'-//w3c//dtd html 3.2s draft//',
|
||
|
'-//w3c//dtd html 4.0 frameset//',
|
||
|
'-//w3c//dtd html 4.0 transitional//',
|
||
|
'-//w3c//dtd html experimental 19960712//',
|
||
|
'-//w3c//dtd html experimental 970421//',
|
||
|
'-//w3c//dtd w3 html//',
|
||
|
'-//w3o//dtd w3 html 3.0//',
|
||
|
'-//webtechs//dtd mozilla html 2.0//',
|
||
|
'-//webtechs//dtd mozilla html//',
|
||
|
];
|
||
|
const QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
|
||
|
...QUIRKS_MODE_PUBLIC_ID_PREFIXES,
|
||
|
'-//w3c//dtd html 4.01 frameset//',
|
||
|
'-//w3c//dtd html 4.01 transitional//',
|
||
|
];
|
||
|
const QUIRKS_MODE_PUBLIC_IDS = new Set([
|
||
|
'-//w3o//dtd w3 html strict 3.0//en//',
|
||
|
'-/w3c/dtd html 4.0 transitional/en',
|
||
|
'html',
|
||
|
]);
|
||
|
const LIMITED_QUIRKS_PUBLIC_ID_PREFIXES = ['-//w3c//dtd xhtml 1.0 frameset//', '-//w3c//dtd xhtml 1.0 transitional//'];
|
||
|
const LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES = [
|
||
|
...LIMITED_QUIRKS_PUBLIC_ID_PREFIXES,
|
||
|
'-//w3c//dtd html 4.01 frameset//',
|
||
|
'-//w3c//dtd html 4.01 transitional//',
|
||
|
];
|
||
|
//Utils
|
||
|
function hasPrefix(publicId, prefixes) {
|
||
|
return prefixes.some((prefix) => publicId.startsWith(prefix));
|
||
|
}
|
||
|
//API
|
||
|
function isConforming(token) {
|
||
|
return (token.name === VALID_DOCTYPE_NAME &&
|
||
|
token.publicId === null &&
|
||
|
(token.systemId === null || token.systemId === VALID_SYSTEM_ID));
|
||
|
}
|
||
|
function getDocumentMode(token) {
|
||
|
if (token.name !== VALID_DOCTYPE_NAME) {
|
||
|
return DOCUMENT_MODE.QUIRKS;
|
||
|
}
|
||
|
const { systemId } = token;
|
||
|
if (systemId && systemId.toLowerCase() === QUIRKS_MODE_SYSTEM_ID) {
|
||
|
return DOCUMENT_MODE.QUIRKS;
|
||
|
}
|
||
|
let { publicId } = token;
|
||
|
if (publicId !== null) {
|
||
|
publicId = publicId.toLowerCase();
|
||
|
if (QUIRKS_MODE_PUBLIC_IDS.has(publicId)) {
|
||
|
return DOCUMENT_MODE.QUIRKS;
|
||
|
}
|
||
|
let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES;
|
||
|
if (hasPrefix(publicId, prefixes)) {
|
||
|
return DOCUMENT_MODE.QUIRKS;
|
||
|
}
|
||
|
prefixes =
|
||
|
systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES;
|
||
|
if (hasPrefix(publicId, prefixes)) {
|
||
|
return DOCUMENT_MODE.LIMITED_QUIRKS;
|
||
|
}
|
||
|
}
|
||
|
return DOCUMENT_MODE.NO_QUIRKS;
|
||
|
}
|
||
|
|
||
|
//MIME types
|
||
|
const MIME_TYPES = {
|
||
|
TEXT_HTML: 'text/html',
|
||
|
APPLICATION_XML: 'application/xhtml+xml',
|
||
|
};
|
||
|
//Attributes
|
||
|
const DEFINITION_URL_ATTR = 'definitionurl';
|
||
|
const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL';
|
||
|
const SVG_ATTRS_ADJUSTMENT_MAP = new Map([
|
||
|
'attributeName',
|
||
|
'attributeType',
|
||
|
'baseFrequency',
|
||
|
'baseProfile',
|
||
|
'calcMode',
|
||
|
'clipPathUnits',
|
||
|
'diffuseConstant',
|
||
|
'edgeMode',
|
||
|
'filterUnits',
|
||
|
'glyphRef',
|
||
|
'gradientTransform',
|
||
|
'gradientUnits',
|
||
|
'kernelMatrix',
|
||
|
'kernelUnitLength',
|
||
|
'keyPoints',
|
||
|
'keySplines',
|
||
|
'keyTimes',
|
||
|
'lengthAdjust',
|
||
|
'limitingConeAngle',
|
||
|
'markerHeight',
|
||
|
'markerUnits',
|
||
|
'markerWidth',
|
||
|
'maskContentUnits',
|
||
|
'maskUnits',
|
||
|
'numOctaves',
|
||
|
'pathLength',
|
||
|
'patternContentUnits',
|
||
|
'patternTransform',
|
||
|
'patternUnits',
|
||
|
'pointsAtX',
|
||
|
'pointsAtY',
|
||
|
'pointsAtZ',
|
||
|
'preserveAlpha',
|
||
|
'preserveAspectRatio',
|
||
|
'primitiveUnits',
|
||
|
'refX',
|
||
|
'refY',
|
||
|
'repeatCount',
|
||
|
'repeatDur',
|
||
|
'requiredExtensions',
|
||
|
'requiredFeatures',
|
||
|
'specularConstant',
|
||
|
'specularExponent',
|
||
|
'spreadMethod',
|
||
|
'startOffset',
|
||
|
'stdDeviation',
|
||
|
'stitchTiles',
|
||
|
'surfaceScale',
|
||
|
'systemLanguage',
|
||
|
'tableValues',
|
||
|
'targetX',
|
||
|
'targetY',
|
||
|
'textLength',
|
||
|
'viewBox',
|
||
|
'viewTarget',
|
||
|
'xChannelSelector',
|
||
|
'yChannelSelector',
|
||
|
'zoomAndPan',
|
||
|
].map((attr) => [attr.toLowerCase(), attr]));
|
||
|
const XML_ATTRS_ADJUSTMENT_MAP = new Map([
|
||
|
['xlink:actuate', { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK }],
|
||
|
['xlink:arcrole', { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK }],
|
||
|
['xlink:href', { prefix: 'xlink', name: 'href', namespace: NS.XLINK }],
|
||
|
['xlink:role', { prefix: 'xlink', name: 'role', namespace: NS.XLINK }],
|
||
|
['xlink:show', { prefix: 'xlink', name: 'show', namespace: NS.XLINK }],
|
||
|
['xlink:title', { prefix: 'xlink', name: 'title', namespace: NS.XLINK }],
|
||
|
['xlink:type', { prefix: 'xlink', name: 'type', namespace: NS.XLINK }],
|
||
|
['xml:lang', { prefix: 'xml', name: 'lang', namespace: NS.XML }],
|
||
|
['xml:space', { prefix: 'xml', name: 'space', namespace: NS.XML }],
|
||
|
['xmlns', { prefix: '', name: 'xmlns', namespace: NS.XMLNS }],
|
||
|
['xmlns:xlink', { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS }],
|
||
|
]);
|
||
|
//SVG tag names adjustment map
|
||
|
const SVG_TAG_NAMES_ADJUSTMENT_MAP = new Map([
|
||
|
'altGlyph',
|
||
|
'altGlyphDef',
|
||
|
'altGlyphItem',
|
||
|
'animateColor',
|
||
|
'animateMotion',
|
||
|
'animateTransform',
|
||
|
'clipPath',
|
||
|
'feBlend',
|
||
|
'feColorMatrix',
|
||
|
'feComponentTransfer',
|
||
|
'feComposite',
|
||
|
'feConvolveMatrix',
|
||
|
'feDiffuseLighting',
|
||
|
'feDisplacementMap',
|
||
|
'feDistantLight',
|
||
|
'feFlood',
|
||
|
'feFuncA',
|
||
|
'feFuncB',
|
||
|
'feFuncG',
|
||
|
'feFuncR',
|
||
|
'feGaussianBlur',
|
||
|
'feImage',
|
||
|
'feMerge',
|
||
|
'feMergeNode',
|
||
|
'feMorphology',
|
||
|
'feOffset',
|
||
|
'fePointLight',
|
||
|
'feSpecularLighting',
|
||
|
'feSpotLight',
|
||
|
'feTile',
|
||
|
'feTurbulence',
|
||
|
'foreignObject',
|
||
|
'glyphRef',
|
||
|
'linearGradient',
|
||
|
'radialGradient',
|
||
|
'textPath',
|
||
|
].map((tn) => [tn.toLowerCase(), tn]));
|
||
|
//Tags that causes exit from foreign content
|
||
|
const EXITS_FOREIGN_CONTENT = new Set([
|
||
|
TAG_ID.B,
|
||
|
TAG_ID.BIG,
|
||
|
TAG_ID.BLOCKQUOTE,
|
||
|
TAG_ID.BODY,
|
||
|
TAG_ID.BR,
|
||
|
TAG_ID.CENTER,
|
||
|
TAG_ID.CODE,
|
||
|
TAG_ID.DD,
|
||
|
TAG_ID.DIV,
|
||
|
TAG_ID.DL,
|
||
|
TAG_ID.DT,
|
||
|
TAG_ID.EM,
|
||
|
TAG_ID.EMBED,
|
||
|
TAG_ID.H1,
|
||
|
TAG_ID.H2,
|
||
|
TAG_ID.H3,
|
||
|
TAG_ID.H4,
|
||
|
TAG_ID.H5,
|
||
|
TAG_ID.H6,
|
||
|
TAG_ID.HEAD,
|
||
|
TAG_ID.HR,
|
||
|
TAG_ID.I,
|
||
|
TAG_ID.IMG,
|
||
|
TAG_ID.LI,
|
||
|
TAG_ID.LISTING,
|
||
|
TAG_ID.MENU,
|
||
|
TAG_ID.META,
|
||
|
TAG_ID.NOBR,
|
||
|
TAG_ID.OL,
|
||
|
TAG_ID.P,
|
||
|
TAG_ID.PRE,
|
||
|
TAG_ID.RUBY,
|
||
|
TAG_ID.S,
|
||
|
TAG_ID.SMALL,
|
||
|
TAG_ID.SPAN,
|
||
|
TAG_ID.STRONG,
|
||
|
TAG_ID.STRIKE,
|
||
|
TAG_ID.SUB,
|
||
|
TAG_ID.SUP,
|
||
|
TAG_ID.TABLE,
|
||
|
TAG_ID.TT,
|
||
|
TAG_ID.U,
|
||
|
TAG_ID.UL,
|
||
|
TAG_ID.VAR,
|
||
|
]);
|
||
|
//Check exit from foreign content
|
||
|
function causesExit(startTagToken) {
|
||
|
const tn = startTagToken.tagID;
|
||
|
const isFontWithAttrs = tn === TAG_ID.FONT &&
|
||
|
startTagToken.attrs.some(({ name }) => name === ATTRS.COLOR || name === ATTRS.SIZE || name === ATTRS.FACE);
|
||
|
return isFontWithAttrs || EXITS_FOREIGN_CONTENT.has(tn);
|
||
|
}
|
||
|
//Token adjustments
|
||
|
function adjustTokenMathMLAttrs(token) {
|
||
|
for (let i = 0; i < token.attrs.length; i++) {
|
||
|
if (token.attrs[i].name === DEFINITION_URL_ATTR) {
|
||
|
token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function adjustTokenSVGAttrs(token) {
|
||
|
for (let i = 0; i < token.attrs.length; i++) {
|
||
|
const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name);
|
||
|
if (adjustedAttrName != null) {
|
||
|
token.attrs[i].name = adjustedAttrName;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function adjustTokenXMLAttrs(token) {
|
||
|
for (let i = 0; i < token.attrs.length; i++) {
|
||
|
const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP.get(token.attrs[i].name);
|
||
|
if (adjustedAttrEntry) {
|
||
|
token.attrs[i].prefix = adjustedAttrEntry.prefix;
|
||
|
token.attrs[i].name = adjustedAttrEntry.name;
|
||
|
token.attrs[i].namespace = adjustedAttrEntry.namespace;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function adjustTokenSVGTagName(token) {
|
||
|
const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
|
||
|
if (adjustedTagName != null) {
|
||
|
token.tagName = adjustedTagName;
|
||
|
token.tagID = getTagID(token.tagName);
|
||
|
}
|
||
|
}
|
||
|
//Integration points
|
||
|
function isMathMLTextIntegrationPoint(tn, ns) {
|
||
|
return ns === NS.MATHML && (tn === TAG_ID.MI || tn === TAG_ID.MO || tn === TAG_ID.MN || tn === TAG_ID.MS || tn === TAG_ID.MTEXT);
|
||
|
}
|
||
|
function isHtmlIntegrationPoint(tn, ns, attrs) {
|
||
|
if (ns === NS.MATHML && tn === TAG_ID.ANNOTATION_XML) {
|
||
|
for (let i = 0; i < attrs.length; i++) {
|
||
|
if (attrs[i].name === ATTRS.ENCODING) {
|
||
|
const value = attrs[i].value.toLowerCase();
|
||
|
return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return ns === NS.SVG && (tn === TAG_ID.FOREIGN_OBJECT || tn === TAG_ID.DESC || tn === TAG_ID.TITLE);
|
||
|
}
|
||
|
function isIntegrationPoint(tn, ns, attrs, foreignNS) {
|
||
|
return (((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) ||
|
||
|
((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)));
|
||
|
}
|
||
|
|
||
|
//Misc constants
|
||
|
const HIDDEN_INPUT_TYPE = 'hidden';
|
||
|
//Adoption agency loops iteration count
|
||
|
const AA_OUTER_LOOP_ITER = 8;
|
||
|
const AA_INNER_LOOP_ITER = 3;
|
||
|
//Insertion modes
|
||
|
var InsertionMode;
|
||
|
(function (InsertionMode) {
|
||
|
InsertionMode[InsertionMode["INITIAL"] = 0] = "INITIAL";
|
||
|
InsertionMode[InsertionMode["BEFORE_HTML"] = 1] = "BEFORE_HTML";
|
||
|
InsertionMode[InsertionMode["BEFORE_HEAD"] = 2] = "BEFORE_HEAD";
|
||
|
InsertionMode[InsertionMode["IN_HEAD"] = 3] = "IN_HEAD";
|
||
|
InsertionMode[InsertionMode["IN_HEAD_NO_SCRIPT"] = 4] = "IN_HEAD_NO_SCRIPT";
|
||
|
InsertionMode[InsertionMode["AFTER_HEAD"] = 5] = "AFTER_HEAD";
|
||
|
InsertionMode[InsertionMode["IN_BODY"] = 6] = "IN_BODY";
|
||
|
InsertionMode[InsertionMode["TEXT"] = 7] = "TEXT";
|
||
|
InsertionMode[InsertionMode["IN_TABLE"] = 8] = "IN_TABLE";
|
||
|
InsertionMode[InsertionMode["IN_TABLE_TEXT"] = 9] = "IN_TABLE_TEXT";
|
||
|
InsertionMode[InsertionMode["IN_CAPTION"] = 10] = "IN_CAPTION";
|
||
|
InsertionMode[InsertionMode["IN_COLUMN_GROUP"] = 11] = "IN_COLUMN_GROUP";
|
||
|
InsertionMode[InsertionMode["IN_TABLE_BODY"] = 12] = "IN_TABLE_BODY";
|
||
|
InsertionMode[InsertionMode["IN_ROW"] = 13] = "IN_ROW";
|
||
|
InsertionMode[InsertionMode["IN_CELL"] = 14] = "IN_CELL";
|
||
|
InsertionMode[InsertionMode["IN_SELECT"] = 15] = "IN_SELECT";
|
||
|
InsertionMode[InsertionMode["IN_SELECT_IN_TABLE"] = 16] = "IN_SELECT_IN_TABLE";
|
||
|
InsertionMode[InsertionMode["IN_TEMPLATE"] = 17] = "IN_TEMPLATE";
|
||
|
InsertionMode[InsertionMode["AFTER_BODY"] = 18] = "AFTER_BODY";
|
||
|
InsertionMode[InsertionMode["IN_FRAMESET"] = 19] = "IN_FRAMESET";
|
||
|
InsertionMode[InsertionMode["AFTER_FRAMESET"] = 20] = "AFTER_FRAMESET";
|
||
|
InsertionMode[InsertionMode["AFTER_AFTER_BODY"] = 21] = "AFTER_AFTER_BODY";
|
||
|
InsertionMode[InsertionMode["AFTER_AFTER_FRAMESET"] = 22] = "AFTER_AFTER_FRAMESET";
|
||
|
})(InsertionMode || (InsertionMode = {}));
|
||
|
const BASE_LOC = {
|
||
|
startLine: -1,
|
||
|
startCol: -1,
|
||
|
startOffset: -1,
|
||
|
endLine: -1,
|
||
|
endCol: -1,
|
||
|
endOffset: -1,
|
||
|
};
|
||
|
const TABLE_STRUCTURE_TAGS = new Set([TAG_ID.TABLE, TAG_ID.TBODY, TAG_ID.TFOOT, TAG_ID.THEAD, TAG_ID.TR]);
|
||
|
const defaultParserOptions = {
|
||
|
scriptingEnabled: true,
|
||
|
sourceCodeLocationInfo: false,
|
||
|
treeAdapter: defaultTreeAdapter,
|
||
|
onParseError: null,
|
||
|
};
|
||
|
//Parser
|
||
|
class Parser {
|
||
|
constructor(options, document,
|
||
|
/** @internal */
|
||
|
fragmentContext = null,
|
||
|
/** @internal */
|
||
|
scriptHandler = null) {
|
||
|
this.fragmentContext = fragmentContext;
|
||
|
this.scriptHandler = scriptHandler;
|
||
|
this.currentToken = null;
|
||
|
this.stopped = false;
|
||
|
/** @internal */
|
||
|
this.insertionMode = InsertionMode.INITIAL;
|
||
|
/** @internal */
|
||
|
this.originalInsertionMode = InsertionMode.INITIAL;
|
||
|
/** @internal */
|
||
|
this.headElement = null;
|
||
|
/** @internal */
|
||
|
this.formElement = null;
|
||
|
/** Indicates that the current node is not an element in the HTML namespace */
|
||
|
this.currentNotInHTML = false;
|
||
|
/**
|
||
|
* The template insertion mode stack is maintained from the left.
|
||
|
* Ie. the topmost element will always have index 0.
|
||
|
*
|
||
|
* @internal
|
||
|
*/
|
||
|
this.tmplInsertionModeStack = [];
|
||
|
/** @internal */
|
||
|
this.pendingCharacterTokens = [];
|
||
|
/** @internal */
|
||
|
this.hasNonWhitespacePendingCharacterToken = false;
|
||
|
/** @internal */
|
||
|
this.framesetOk = true;
|
||
|
/** @internal */
|
||
|
this.skipNextNewLine = false;
|
||
|
/** @internal */
|
||
|
this.fosterParentingEnabled = false;
|
||
|
this.options = {
|
||
|
...defaultParserOptions,
|
||
|
...options,
|
||
|
};
|
||
|
this.treeAdapter = this.options.treeAdapter;
|
||
|
this.onParseError = this.options.onParseError;
|
||
|
// Always enable location info if we report parse errors.
|
||
|
if (this.onParseError) {
|
||
|
this.options.sourceCodeLocationInfo = true;
|
||
|
}
|
||
|
this.document = document !== null && document !== void 0 ? document : this.treeAdapter.createDocument();
|
||
|
this.tokenizer = new Tokenizer(this.options, this);
|
||
|
this.activeFormattingElements = new FormattingElementList(this.treeAdapter);
|
||
|
this.fragmentContextID = fragmentContext ? getTagID(this.treeAdapter.getTagName(fragmentContext)) : TAG_ID.UNKNOWN;
|
||
|
this._setContextModes(fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : this.document, this.fragmentContextID);
|
||
|
this.openElements = new OpenElementStack(this.document, this.treeAdapter, this);
|
||
|
}
|
||
|
// API
|
||
|
static parse(html, options) {
|
||
|
const parser = new this(options);
|
||
|
parser.tokenizer.write(html, true);
|
||
|
return parser.document;
|
||
|
}
|
||
|
static getFragmentParser(fragmentContext, options) {
|
||
|
const opts = {
|
||
|
...defaultParserOptions,
|
||
|
...options,
|
||
|
};
|
||
|
//NOTE: use a <template> element as the fragment context if no context element was provided,
|
||
|
//so we will parse in a "forgiving" manner
|
||
|
fragmentContext !== null && fragmentContext !== void 0 ? fragmentContext : (fragmentContext = opts.treeAdapter.createElement(TAG_NAMES.TEMPLATE, NS.HTML, []));
|
||
|
//NOTE: create a fake element which will be used as the `document` for fragment parsing.
|
||
|
//This is important for jsdom, where a new `document` cannot be created. This led to
|
||
|
//fragment parsing messing with the main `document`.
|
||
|
const documentMock = opts.treeAdapter.createElement('documentmock', NS.HTML, []);
|
||
|
const parser = new this(opts, documentMock, fragmentContext);
|
||
|
if (parser.fragmentContextID === TAG_ID.TEMPLATE) {
|
||
|
parser.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
|
||
|
}
|
||
|
parser._initTokenizerForFragmentParsing();
|
||
|
parser._insertFakeRootElement();
|
||
|
parser._resetInsertionMode();
|
||
|
parser._findFormInFragmentContext();
|
||
|
return parser;
|
||
|
}
|
||
|
getFragment() {
|
||
|
const rootElement = this.treeAdapter.getFirstChild(this.document);
|
||
|
const fragment = this.treeAdapter.createDocumentFragment();
|
||
|
this._adoptNodes(rootElement, fragment);
|
||
|
return fragment;
|
||
|
}
|
||
|
//Errors
|
||
|
/** @internal */
|
||
|
_err(token, code, beforeToken) {
|
||
|
var _a;
|
||
|
if (!this.onParseError)
|
||
|
return;
|
||
|
const loc = (_a = token.location) !== null && _a !== void 0 ? _a : BASE_LOC;
|
||
|
const err = {
|
||
|
code,
|
||
|
startLine: loc.startLine,
|
||
|
startCol: loc.startCol,
|
||
|
startOffset: loc.startOffset,
|
||
|
endLine: beforeToken ? loc.startLine : loc.endLine,
|
||
|
endCol: beforeToken ? loc.startCol : loc.endCol,
|
||
|
endOffset: beforeToken ? loc.startOffset : loc.endOffset,
|
||
|
};
|
||
|
this.onParseError(err);
|
||
|
}
|
||
|
//Stack events
|
||
|
/** @internal */
|
||
|
onItemPush(node, tid, isTop) {
|
||
|
var _a, _b;
|
||
|
(_b = (_a = this.treeAdapter).onItemPush) === null || _b === void 0 ? void 0 : _b.call(_a, node);
|
||
|
if (isTop && this.openElements.stackTop > 0)
|
||
|
this._setContextModes(node, tid);
|
||
|
}
|
||
|
/** @internal */
|
||
|
onItemPop(node, isTop) {
|
||
|
var _a, _b;
|
||
|
if (this.options.sourceCodeLocationInfo) {
|
||
|
this._setEndLocation(node, this.currentToken);
|
||
|
}
|
||
|
(_b = (_a = this.treeAdapter).onItemPop) === null || _b === void 0 ? void 0 : _b.call(_a, node, this.openElements.current);
|
||
|
if (isTop) {
|
||
|
let current;
|
||
|
let currentTagId;
|
||
|
if (this.openElements.stackTop === 0 && this.fragmentContext) {
|
||
|
current = this.fragmentContext;
|
||
|
currentTagId = this.fragmentContextID;
|
||
|
}
|
||
|
else {
|
||
|
({ current, currentTagId } = this.openElements);
|
||
|
}
|
||
|
this._setContextModes(current, currentTagId);
|
||
|
}
|
||
|
}
|
||
|
_setContextModes(current, tid) {
|
||
|
const isHTML = current === this.document || this.treeAdapter.getNamespaceURI(current) === NS.HTML;
|
||
|
this.currentNotInHTML = !isHTML;
|
||
|
this.tokenizer.inForeignNode = !isHTML && !this._isIntegrationPoint(tid, current);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_switchToTextParsing(currentToken, nextTokenizerState) {
|
||
|
this._insertElement(currentToken, NS.HTML);
|
||
|
this.tokenizer.state = nextTokenizerState;
|
||
|
this.originalInsertionMode = this.insertionMode;
|
||
|
this.insertionMode = InsertionMode.TEXT;
|
||
|
}
|
||
|
switchToPlaintextParsing() {
|
||
|
this.insertionMode = InsertionMode.TEXT;
|
||
|
this.originalInsertionMode = InsertionMode.IN_BODY;
|
||
|
this.tokenizer.state = TokenizerMode.PLAINTEXT;
|
||
|
}
|
||
|
//Fragment parsing
|
||
|
/** @protected */
|
||
|
_getAdjustedCurrentElement() {
|
||
|
return this.openElements.stackTop === 0 && this.fragmentContext
|
||
|
? this.fragmentContext
|
||
|
: this.openElements.current;
|
||
|
}
|
||
|
/** @protected */
|
||
|
_findFormInFragmentContext() {
|
||
|
let node = this.fragmentContext;
|
||
|
while (node) {
|
||
|
if (this.treeAdapter.getTagName(node) === TAG_NAMES.FORM) {
|
||
|
this.formElement = node;
|
||
|
break;
|
||
|
}
|
||
|
node = this.treeAdapter.getParentNode(node);
|
||
|
}
|
||
|
}
|
||
|
_initTokenizerForFragmentParsing() {
|
||
|
if (!this.fragmentContext || this.treeAdapter.getNamespaceURI(this.fragmentContext) !== NS.HTML) {
|
||
|
return;
|
||
|
}
|
||
|
switch (this.fragmentContextID) {
|
||
|
case TAG_ID.TITLE:
|
||
|
case TAG_ID.TEXTAREA: {
|
||
|
this.tokenizer.state = TokenizerMode.RCDATA;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.STYLE:
|
||
|
case TAG_ID.XMP:
|
||
|
case TAG_ID.IFRAME:
|
||
|
case TAG_ID.NOEMBED:
|
||
|
case TAG_ID.NOFRAMES:
|
||
|
case TAG_ID.NOSCRIPT: {
|
||
|
this.tokenizer.state = TokenizerMode.RAWTEXT;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.SCRIPT: {
|
||
|
this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.PLAINTEXT: {
|
||
|
this.tokenizer.state = TokenizerMode.PLAINTEXT;
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
//Tree mutation
|
||
|
/** @protected */
|
||
|
_setDocumentType(token) {
|
||
|
const name = token.name || '';
|
||
|
const publicId = token.publicId || '';
|
||
|
const systemId = token.systemId || '';
|
||
|
this.treeAdapter.setDocumentType(this.document, name, publicId, systemId);
|
||
|
if (token.location) {
|
||
|
const documentChildren = this.treeAdapter.getChildNodes(this.document);
|
||
|
const docTypeNode = documentChildren.find((node) => this.treeAdapter.isDocumentTypeNode(node));
|
||
|
if (docTypeNode) {
|
||
|
this.treeAdapter.setNodeSourceCodeLocation(docTypeNode, token.location);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
/** @protected */
|
||
|
_attachElementToTree(element, location) {
|
||
|
if (this.options.sourceCodeLocationInfo) {
|
||
|
const loc = location && {
|
||
|
...location,
|
||
|
startTag: location,
|
||
|
};
|
||
|
this.treeAdapter.setNodeSourceCodeLocation(element, loc);
|
||
|
}
|
||
|
if (this._shouldFosterParentOnInsertion()) {
|
||
|
this._fosterParentElement(element);
|
||
|
}
|
||
|
else {
|
||
|
const parent = this.openElements.currentTmplContentOrNode;
|
||
|
this.treeAdapter.appendChild(parent, element);
|
||
|
}
|
||
|
}
|
||
|
/**
|
||
|
* For self-closing tags. Add an element to the tree, but skip adding it
|
||
|
* to the stack.
|
||
|
*/
|
||
|
/** @protected */
|
||
|
_appendElement(token, namespaceURI) {
|
||
|
const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
|
||
|
this._attachElementToTree(element, token.location);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_insertElement(token, namespaceURI) {
|
||
|
const element = this.treeAdapter.createElement(token.tagName, namespaceURI, token.attrs);
|
||
|
this._attachElementToTree(element, token.location);
|
||
|
this.openElements.push(element, token.tagID);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_insertFakeElement(tagName, tagID) {
|
||
|
const element = this.treeAdapter.createElement(tagName, NS.HTML, []);
|
||
|
this._attachElementToTree(element, null);
|
||
|
this.openElements.push(element, tagID);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_insertTemplate(token) {
|
||
|
const tmpl = this.treeAdapter.createElement(token.tagName, NS.HTML, token.attrs);
|
||
|
const content = this.treeAdapter.createDocumentFragment();
|
||
|
this.treeAdapter.setTemplateContent(tmpl, content);
|
||
|
this._attachElementToTree(tmpl, token.location);
|
||
|
this.openElements.push(tmpl, token.tagID);
|
||
|
if (this.options.sourceCodeLocationInfo)
|
||
|
this.treeAdapter.setNodeSourceCodeLocation(content, null);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_insertFakeRootElement() {
|
||
|
const element = this.treeAdapter.createElement(TAG_NAMES.HTML, NS.HTML, []);
|
||
|
if (this.options.sourceCodeLocationInfo)
|
||
|
this.treeAdapter.setNodeSourceCodeLocation(element, null);
|
||
|
this.treeAdapter.appendChild(this.openElements.current, element);
|
||
|
this.openElements.push(element, TAG_ID.HTML);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_appendCommentNode(token, parent) {
|
||
|
const commentNode = this.treeAdapter.createCommentNode(token.data);
|
||
|
this.treeAdapter.appendChild(parent, commentNode);
|
||
|
if (this.options.sourceCodeLocationInfo) {
|
||
|
this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location);
|
||
|
}
|
||
|
}
|
||
|
/** @protected */
|
||
|
_insertCharacters(token) {
|
||
|
let parent;
|
||
|
let beforeElement;
|
||
|
if (this._shouldFosterParentOnInsertion()) {
|
||
|
({ parent, beforeElement } = this._findFosterParentingLocation());
|
||
|
if (beforeElement) {
|
||
|
this.treeAdapter.insertTextBefore(parent, token.chars, beforeElement);
|
||
|
}
|
||
|
else {
|
||
|
this.treeAdapter.insertText(parent, token.chars);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
parent = this.openElements.currentTmplContentOrNode;
|
||
|
this.treeAdapter.insertText(parent, token.chars);
|
||
|
}
|
||
|
if (!token.location)
|
||
|
return;
|
||
|
const siblings = this.treeAdapter.getChildNodes(parent);
|
||
|
const textNodeIdx = beforeElement ? siblings.lastIndexOf(beforeElement) : siblings.length;
|
||
|
const textNode = siblings[textNodeIdx - 1];
|
||
|
//NOTE: if we have a location assigned by another token, then just update the end position
|
||
|
const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode);
|
||
|
if (tnLoc) {
|
||
|
const { endLine, endCol, endOffset } = token.location;
|
||
|
this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset });
|
||
|
}
|
||
|
else if (this.options.sourceCodeLocationInfo) {
|
||
|
this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location);
|
||
|
}
|
||
|
}
|
||
|
/** @protected */
|
||
|
_adoptNodes(donor, recipient) {
|
||
|
for (let child = this.treeAdapter.getFirstChild(donor); child; child = this.treeAdapter.getFirstChild(donor)) {
|
||
|
this.treeAdapter.detachNode(child);
|
||
|
this.treeAdapter.appendChild(recipient, child);
|
||
|
}
|
||
|
}
|
||
|
/** @protected */
|
||
|
_setEndLocation(element, closingToken) {
|
||
|
if (this.treeAdapter.getNodeSourceCodeLocation(element) && closingToken.location) {
|
||
|
const ctLoc = closingToken.location;
|
||
|
const tn = this.treeAdapter.getTagName(element);
|
||
|
const endLoc =
|
||
|
// NOTE: For cases like <p> <p> </p> - First 'p' closes without a closing
|
||
|
// tag and for cases like <td> <p> </td> - 'p' closes without a closing tag.
|
||
|
closingToken.type === TokenType.END_TAG && tn === closingToken.tagName
|
||
|
? {
|
||
|
endTag: { ...ctLoc },
|
||
|
endLine: ctLoc.endLine,
|
||
|
endCol: ctLoc.endCol,
|
||
|
endOffset: ctLoc.endOffset,
|
||
|
}
|
||
|
: {
|
||
|
endLine: ctLoc.startLine,
|
||
|
endCol: ctLoc.startCol,
|
||
|
endOffset: ctLoc.startOffset,
|
||
|
};
|
||
|
this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc);
|
||
|
}
|
||
|
}
|
||
|
//Token processing
|
||
|
shouldProcessStartTagTokenInForeignContent(token) {
|
||
|
// Check that neither current === document, or ns === NS.HTML
|
||
|
if (!this.currentNotInHTML)
|
||
|
return false;
|
||
|
let current;
|
||
|
let currentTagId;
|
||
|
if (this.openElements.stackTop === 0 && this.fragmentContext) {
|
||
|
current = this.fragmentContext;
|
||
|
currentTagId = this.fragmentContextID;
|
||
|
}
|
||
|
else {
|
||
|
({ current, currentTagId } = this.openElements);
|
||
|
}
|
||
|
if (token.tagID === TAG_ID.SVG &&
|
||
|
this.treeAdapter.getTagName(current) === TAG_NAMES.ANNOTATION_XML &&
|
||
|
this.treeAdapter.getNamespaceURI(current) === NS.MATHML) {
|
||
|
return false;
|
||
|
}
|
||
|
return (
|
||
|
// Check that `current` is not an integration point for HTML or MathML elements.
|
||
|
this.tokenizer.inForeignNode ||
|
||
|
// If it _is_ an integration point, then we might have to check that it is not an HTML
|
||
|
// integration point.
|
||
|
((token.tagID === TAG_ID.MGLYPH || token.tagID === TAG_ID.MALIGNMARK) &&
|
||
|
!this._isIntegrationPoint(currentTagId, current, NS.HTML)));
|
||
|
}
|
||
|
/** @protected */
|
||
|
_processToken(token) {
|
||
|
switch (token.type) {
|
||
|
case TokenType.CHARACTER: {
|
||
|
this.onCharacter(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.NULL_CHARACTER: {
|
||
|
this.onNullCharacter(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.COMMENT: {
|
||
|
this.onComment(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.DOCTYPE: {
|
||
|
this.onDoctype(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.START_TAG: {
|
||
|
this._processStartTag(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.END_TAG: {
|
||
|
this.onEndTag(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.EOF: {
|
||
|
this.onEof(token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.WHITESPACE_CHARACTER: {
|
||
|
this.onWhitespaceCharacter(token);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//Integration points
|
||
|
/** @protected */
|
||
|
_isIntegrationPoint(tid, element, foreignNS) {
|
||
|
const ns = this.treeAdapter.getNamespaceURI(element);
|
||
|
const attrs = this.treeAdapter.getAttrList(element);
|
||
|
return isIntegrationPoint(tid, ns, attrs, foreignNS);
|
||
|
}
|
||
|
//Active formatting elements reconstruction
|
||
|
/** @protected */
|
||
|
_reconstructActiveFormattingElements() {
|
||
|
const listLength = this.activeFormattingElements.entries.length;
|
||
|
if (listLength) {
|
||
|
const endIndex = this.activeFormattingElements.entries.findIndex((entry) => entry.type === EntryType.Marker || this.openElements.contains(entry.element));
|
||
|
const unopenIdx = endIndex < 0 ? listLength - 1 : endIndex - 1;
|
||
|
for (let i = unopenIdx; i >= 0; i--) {
|
||
|
const entry = this.activeFormattingElements.entries[i];
|
||
|
this._insertElement(entry.token, this.treeAdapter.getNamespaceURI(entry.element));
|
||
|
entry.element = this.openElements.current;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//Close elements
|
||
|
/** @protected */
|
||
|
_closeTableCell() {
|
||
|
this.openElements.generateImpliedEndTags();
|
||
|
this.openElements.popUntilTableCellPopped();
|
||
|
this.activeFormattingElements.clearToLastMarker();
|
||
|
this.insertionMode = InsertionMode.IN_ROW;
|
||
|
}
|
||
|
/** @protected */
|
||
|
_closePElement() {
|
||
|
this.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.P);
|
||
|
this.openElements.popUntilTagNamePopped(TAG_ID.P);
|
||
|
}
|
||
|
//Insertion modes
|
||
|
/** @protected */
|
||
|
_resetInsertionMode() {
|
||
|
for (let i = this.openElements.stackTop; i >= 0; i--) {
|
||
|
//Insertion mode reset map
|
||
|
switch (i === 0 && this.fragmentContext ? this.fragmentContextID : this.openElements.tagIDs[i]) {
|
||
|
case TAG_ID.TR: {
|
||
|
this.insertionMode = InsertionMode.IN_ROW;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.TFOOT: {
|
||
|
this.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.CAPTION: {
|
||
|
this.insertionMode = InsertionMode.IN_CAPTION;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.COLGROUP: {
|
||
|
this.insertionMode = InsertionMode.IN_COLUMN_GROUP;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.TABLE: {
|
||
|
this.insertionMode = InsertionMode.IN_TABLE;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.BODY: {
|
||
|
this.insertionMode = InsertionMode.IN_BODY;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.FRAMESET: {
|
||
|
this.insertionMode = InsertionMode.IN_FRAMESET;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.SELECT: {
|
||
|
this._resetInsertionModeForSelect(i);
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
this.insertionMode = this.tmplInsertionModeStack[0];
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.HTML: {
|
||
|
this.insertionMode = this.headElement ? InsertionMode.AFTER_HEAD : InsertionMode.BEFORE_HEAD;
|
||
|
return;
|
||
|
}
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TH: {
|
||
|
if (i > 0) {
|
||
|
this.insertionMode = InsertionMode.IN_CELL;
|
||
|
return;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HEAD: {
|
||
|
if (i > 0) {
|
||
|
this.insertionMode = InsertionMode.IN_HEAD;
|
||
|
return;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
this.insertionMode = InsertionMode.IN_BODY;
|
||
|
}
|
||
|
/** @protected */
|
||
|
_resetInsertionModeForSelect(selectIdx) {
|
||
|
if (selectIdx > 0) {
|
||
|
for (let i = selectIdx - 1; i > 0; i--) {
|
||
|
const tn = this.openElements.tagIDs[i];
|
||
|
if (tn === TAG_ID.TEMPLATE) {
|
||
|
break;
|
||
|
}
|
||
|
else if (tn === TAG_ID.TABLE) {
|
||
|
this.insertionMode = InsertionMode.IN_SELECT_IN_TABLE;
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
this.insertionMode = InsertionMode.IN_SELECT;
|
||
|
}
|
||
|
//Foster parenting
|
||
|
/** @protected */
|
||
|
_isElementCausesFosterParenting(tn) {
|
||
|
return TABLE_STRUCTURE_TAGS.has(tn);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_shouldFosterParentOnInsertion() {
|
||
|
return this.fosterParentingEnabled && this._isElementCausesFosterParenting(this.openElements.currentTagId);
|
||
|
}
|
||
|
/** @protected */
|
||
|
_findFosterParentingLocation() {
|
||
|
for (let i = this.openElements.stackTop; i >= 0; i--) {
|
||
|
const openElement = this.openElements.items[i];
|
||
|
switch (this.openElements.tagIDs[i]) {
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
if (this.treeAdapter.getNamespaceURI(openElement) === NS.HTML) {
|
||
|
return { parent: this.treeAdapter.getTemplateContent(openElement), beforeElement: null };
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TABLE: {
|
||
|
const parent = this.treeAdapter.getParentNode(openElement);
|
||
|
if (parent) {
|
||
|
return { parent, beforeElement: openElement };
|
||
|
}
|
||
|
return { parent: this.openElements.items[i - 1], beforeElement: null };
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
return { parent: this.openElements.items[0], beforeElement: null };
|
||
|
}
|
||
|
/** @protected */
|
||
|
_fosterParentElement(element) {
|
||
|
const location = this._findFosterParentingLocation();
|
||
|
if (location.beforeElement) {
|
||
|
this.treeAdapter.insertBefore(location.parent, element, location.beforeElement);
|
||
|
}
|
||
|
else {
|
||
|
this.treeAdapter.appendChild(location.parent, element);
|
||
|
}
|
||
|
}
|
||
|
//Special elements
|
||
|
/** @protected */
|
||
|
_isSpecialElement(element, id) {
|
||
|
const ns = this.treeAdapter.getNamespaceURI(element);
|
||
|
return SPECIAL_ELEMENTS[ns].has(id);
|
||
|
}
|
||
|
/** @internal */
|
||
|
onCharacter(token) {
|
||
|
this.skipNextNewLine = false;
|
||
|
if (this.tokenizer.inForeignNode) {
|
||
|
characterInForeignContent(this, token);
|
||
|
return;
|
||
|
}
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL: {
|
||
|
tokenInInitialMode(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HTML: {
|
||
|
tokenBeforeHtml(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HEAD: {
|
||
|
tokenBeforeHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD: {
|
||
|
tokenInHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT: {
|
||
|
tokenInHeadNoScript(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_HEAD: {
|
||
|
tokenAfterHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_BODY:
|
||
|
case InsertionMode.IN_CAPTION:
|
||
|
case InsertionMode.IN_CELL:
|
||
|
case InsertionMode.IN_TEMPLATE: {
|
||
|
characterInBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.TEXT:
|
||
|
case InsertionMode.IN_SELECT:
|
||
|
case InsertionMode.IN_SELECT_IN_TABLE: {
|
||
|
this._insertCharacters(token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE:
|
||
|
case InsertionMode.IN_TABLE_BODY:
|
||
|
case InsertionMode.IN_ROW: {
|
||
|
characterInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
characterInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_COLUMN_GROUP: {
|
||
|
tokenInColumnGroup(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_BODY: {
|
||
|
tokenAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_AFTER_BODY: {
|
||
|
tokenAfterAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onNullCharacter(token) {
|
||
|
this.skipNextNewLine = false;
|
||
|
if (this.tokenizer.inForeignNode) {
|
||
|
nullCharacterInForeignContent(this, token);
|
||
|
return;
|
||
|
}
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL: {
|
||
|
tokenInInitialMode(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HTML: {
|
||
|
tokenBeforeHtml(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HEAD: {
|
||
|
tokenBeforeHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD: {
|
||
|
tokenInHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT: {
|
||
|
tokenInHeadNoScript(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_HEAD: {
|
||
|
tokenAfterHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.TEXT: {
|
||
|
this._insertCharacters(token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE:
|
||
|
case InsertionMode.IN_TABLE_BODY:
|
||
|
case InsertionMode.IN_ROW: {
|
||
|
characterInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_COLUMN_GROUP: {
|
||
|
tokenInColumnGroup(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_BODY: {
|
||
|
tokenAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_AFTER_BODY: {
|
||
|
tokenAfterAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onComment(token) {
|
||
|
this.skipNextNewLine = false;
|
||
|
if (this.currentNotInHTML) {
|
||
|
appendComment(this, token);
|
||
|
return;
|
||
|
}
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL:
|
||
|
case InsertionMode.BEFORE_HTML:
|
||
|
case InsertionMode.BEFORE_HEAD:
|
||
|
case InsertionMode.IN_HEAD:
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT:
|
||
|
case InsertionMode.AFTER_HEAD:
|
||
|
case InsertionMode.IN_BODY:
|
||
|
case InsertionMode.IN_TABLE:
|
||
|
case InsertionMode.IN_CAPTION:
|
||
|
case InsertionMode.IN_COLUMN_GROUP:
|
||
|
case InsertionMode.IN_TABLE_BODY:
|
||
|
case InsertionMode.IN_ROW:
|
||
|
case InsertionMode.IN_CELL:
|
||
|
case InsertionMode.IN_SELECT:
|
||
|
case InsertionMode.IN_SELECT_IN_TABLE:
|
||
|
case InsertionMode.IN_TEMPLATE:
|
||
|
case InsertionMode.IN_FRAMESET:
|
||
|
case InsertionMode.AFTER_FRAMESET: {
|
||
|
appendComment(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
tokenInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_BODY: {
|
||
|
appendCommentToRootHtmlElement(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_AFTER_BODY:
|
||
|
case InsertionMode.AFTER_AFTER_FRAMESET: {
|
||
|
appendCommentToDocument(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onDoctype(token) {
|
||
|
this.skipNextNewLine = false;
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL: {
|
||
|
doctypeInInitialMode(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HEAD:
|
||
|
case InsertionMode.IN_HEAD:
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT:
|
||
|
case InsertionMode.AFTER_HEAD: {
|
||
|
this._err(token, ERR.misplacedDoctype);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
tokenInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onStartTag(token) {
|
||
|
this.skipNextNewLine = false;
|
||
|
this.currentToken = token;
|
||
|
this._processStartTag(token);
|
||
|
if (token.selfClosing && !token.ackSelfClosing) {
|
||
|
this._err(token, ERR.nonVoidHtmlElementStartTagWithTrailingSolidus);
|
||
|
}
|
||
|
}
|
||
|
/**
|
||
|
* Processes a given start tag.
|
||
|
*
|
||
|
* `onStartTag` checks if a self-closing tag was recognized. When a token
|
||
|
* is moved inbetween multiple insertion modes, this check for self-closing
|
||
|
* could lead to false positives. To avoid this, `_processStartTag` is used
|
||
|
* for nested calls.
|
||
|
*
|
||
|
* @param token The token to process.
|
||
|
* @protected
|
||
|
*/
|
||
|
_processStartTag(token) {
|
||
|
if (this.shouldProcessStartTagTokenInForeignContent(token)) {
|
||
|
startTagInForeignContent(this, token);
|
||
|
}
|
||
|
else {
|
||
|
this._startTagOutsideForeignContent(token);
|
||
|
}
|
||
|
}
|
||
|
/** @protected */
|
||
|
_startTagOutsideForeignContent(token) {
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL: {
|
||
|
tokenInInitialMode(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HTML: {
|
||
|
startTagBeforeHtml(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HEAD: {
|
||
|
startTagBeforeHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD: {
|
||
|
startTagInHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT: {
|
||
|
startTagInHeadNoScript(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_HEAD: {
|
||
|
startTagAfterHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_BODY: {
|
||
|
startTagInBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE: {
|
||
|
startTagInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
tokenInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_CAPTION: {
|
||
|
startTagInCaption(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_COLUMN_GROUP: {
|
||
|
startTagInColumnGroup(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_BODY: {
|
||
|
startTagInTableBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_ROW: {
|
||
|
startTagInRow(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_CELL: {
|
||
|
startTagInCell(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_SELECT: {
|
||
|
startTagInSelect(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_SELECT_IN_TABLE: {
|
||
|
startTagInSelectInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TEMPLATE: {
|
||
|
startTagInTemplate(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_BODY: {
|
||
|
startTagAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_FRAMESET: {
|
||
|
startTagInFrameset(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_FRAMESET: {
|
||
|
startTagAfterFrameset(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_AFTER_BODY: {
|
||
|
startTagAfterAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_AFTER_FRAMESET: {
|
||
|
startTagAfterAfterFrameset(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onEndTag(token) {
|
||
|
this.skipNextNewLine = false;
|
||
|
this.currentToken = token;
|
||
|
if (this.currentNotInHTML) {
|
||
|
endTagInForeignContent(this, token);
|
||
|
}
|
||
|
else {
|
||
|
this._endTagOutsideForeignContent(token);
|
||
|
}
|
||
|
}
|
||
|
/** @protected */
|
||
|
_endTagOutsideForeignContent(token) {
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL: {
|
||
|
tokenInInitialMode(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HTML: {
|
||
|
endTagBeforeHtml(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HEAD: {
|
||
|
endTagBeforeHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD: {
|
||
|
endTagInHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT: {
|
||
|
endTagInHeadNoScript(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_HEAD: {
|
||
|
endTagAfterHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_BODY: {
|
||
|
endTagInBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.TEXT: {
|
||
|
endTagInText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE: {
|
||
|
endTagInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
tokenInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_CAPTION: {
|
||
|
endTagInCaption(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_COLUMN_GROUP: {
|
||
|
endTagInColumnGroup(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_BODY: {
|
||
|
endTagInTableBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_ROW: {
|
||
|
endTagInRow(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_CELL: {
|
||
|
endTagInCell(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_SELECT: {
|
||
|
endTagInSelect(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_SELECT_IN_TABLE: {
|
||
|
endTagInSelectInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TEMPLATE: {
|
||
|
endTagInTemplate(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_BODY: {
|
||
|
endTagAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_FRAMESET: {
|
||
|
endTagInFrameset(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_FRAMESET: {
|
||
|
endTagAfterFrameset(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_AFTER_BODY: {
|
||
|
tokenAfterAfterBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onEof(token) {
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.INITIAL: {
|
||
|
tokenInInitialMode(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HTML: {
|
||
|
tokenBeforeHtml(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.BEFORE_HEAD: {
|
||
|
tokenBeforeHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD: {
|
||
|
tokenInHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT: {
|
||
|
tokenInHeadNoScript(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_HEAD: {
|
||
|
tokenAfterHead(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_BODY:
|
||
|
case InsertionMode.IN_TABLE:
|
||
|
case InsertionMode.IN_CAPTION:
|
||
|
case InsertionMode.IN_COLUMN_GROUP:
|
||
|
case InsertionMode.IN_TABLE_BODY:
|
||
|
case InsertionMode.IN_ROW:
|
||
|
case InsertionMode.IN_CELL:
|
||
|
case InsertionMode.IN_SELECT:
|
||
|
case InsertionMode.IN_SELECT_IN_TABLE: {
|
||
|
eofInBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.TEXT: {
|
||
|
eofInText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
tokenInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TEMPLATE: {
|
||
|
eofInTemplate(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.AFTER_BODY:
|
||
|
case InsertionMode.IN_FRAMESET:
|
||
|
case InsertionMode.AFTER_FRAMESET:
|
||
|
case InsertionMode.AFTER_AFTER_BODY:
|
||
|
case InsertionMode.AFTER_AFTER_FRAMESET: {
|
||
|
stopParsing(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
/** @internal */
|
||
|
onWhitespaceCharacter(token) {
|
||
|
if (this.skipNextNewLine) {
|
||
|
this.skipNextNewLine = false;
|
||
|
if (token.chars.charCodeAt(0) === CODE_POINTS.LINE_FEED) {
|
||
|
if (token.chars.length === 1) {
|
||
|
return;
|
||
|
}
|
||
|
token.chars = token.chars.substr(1);
|
||
|
}
|
||
|
}
|
||
|
if (this.tokenizer.inForeignNode) {
|
||
|
this._insertCharacters(token);
|
||
|
return;
|
||
|
}
|
||
|
switch (this.insertionMode) {
|
||
|
case InsertionMode.IN_HEAD:
|
||
|
case InsertionMode.IN_HEAD_NO_SCRIPT:
|
||
|
case InsertionMode.AFTER_HEAD:
|
||
|
case InsertionMode.TEXT:
|
||
|
case InsertionMode.IN_COLUMN_GROUP:
|
||
|
case InsertionMode.IN_SELECT:
|
||
|
case InsertionMode.IN_SELECT_IN_TABLE:
|
||
|
case InsertionMode.IN_FRAMESET:
|
||
|
case InsertionMode.AFTER_FRAMESET: {
|
||
|
this._insertCharacters(token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_BODY:
|
||
|
case InsertionMode.IN_CAPTION:
|
||
|
case InsertionMode.IN_CELL:
|
||
|
case InsertionMode.IN_TEMPLATE:
|
||
|
case InsertionMode.AFTER_BODY:
|
||
|
case InsertionMode.AFTER_AFTER_BODY:
|
||
|
case InsertionMode.AFTER_AFTER_FRAMESET: {
|
||
|
whitespaceCharacterInBody(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE:
|
||
|
case InsertionMode.IN_TABLE_BODY:
|
||
|
case InsertionMode.IN_ROW: {
|
||
|
characterInTable(this, token);
|
||
|
break;
|
||
|
}
|
||
|
case InsertionMode.IN_TABLE_TEXT: {
|
||
|
whitespaceCharacterInTableText(this, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
//Adoption agency algorithm
|
||
|
//(see: http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#adoptionAgency)
|
||
|
//------------------------------------------------------------------
|
||
|
//Steps 5-8 of the algorithm
|
||
|
function aaObtainFormattingElementEntry(p, token) {
|
||
|
let formattingElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(token.tagName);
|
||
|
if (formattingElementEntry) {
|
||
|
if (!p.openElements.contains(formattingElementEntry.element)) {
|
||
|
p.activeFormattingElements.removeEntry(formattingElementEntry);
|
||
|
formattingElementEntry = null;
|
||
|
}
|
||
|
else if (!p.openElements.hasInScope(token.tagID)) {
|
||
|
formattingElementEntry = null;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
genericEndTagInBody(p, token);
|
||
|
}
|
||
|
return formattingElementEntry;
|
||
|
}
|
||
|
//Steps 9 and 10 of the algorithm
|
||
|
function aaObtainFurthestBlock(p, formattingElementEntry) {
|
||
|
let furthestBlock = null;
|
||
|
let idx = p.openElements.stackTop;
|
||
|
for (; idx >= 0; idx--) {
|
||
|
const element = p.openElements.items[idx];
|
||
|
if (element === formattingElementEntry.element) {
|
||
|
break;
|
||
|
}
|
||
|
if (p._isSpecialElement(element, p.openElements.tagIDs[idx])) {
|
||
|
furthestBlock = element;
|
||
|
}
|
||
|
}
|
||
|
if (!furthestBlock) {
|
||
|
p.openElements.shortenToLength(idx < 0 ? 0 : idx);
|
||
|
p.activeFormattingElements.removeEntry(formattingElementEntry);
|
||
|
}
|
||
|
return furthestBlock;
|
||
|
}
|
||
|
//Step 13 of the algorithm
|
||
|
function aaInnerLoop(p, furthestBlock, formattingElement) {
|
||
|
let lastElement = furthestBlock;
|
||
|
let nextElement = p.openElements.getCommonAncestor(furthestBlock);
|
||
|
for (let i = 0, element = nextElement; element !== formattingElement; i++, element = nextElement) {
|
||
|
//NOTE: store the next element for the next loop iteration (it may be deleted from the stack by step 9.5)
|
||
|
nextElement = p.openElements.getCommonAncestor(element);
|
||
|
const elementEntry = p.activeFormattingElements.getElementEntry(element);
|
||
|
const counterOverflow = elementEntry && i >= AA_INNER_LOOP_ITER;
|
||
|
const shouldRemoveFromOpenElements = !elementEntry || counterOverflow;
|
||
|
if (shouldRemoveFromOpenElements) {
|
||
|
if (counterOverflow) {
|
||
|
p.activeFormattingElements.removeEntry(elementEntry);
|
||
|
}
|
||
|
p.openElements.remove(element);
|
||
|
}
|
||
|
else {
|
||
|
element = aaRecreateElementFromEntry(p, elementEntry);
|
||
|
if (lastElement === furthestBlock) {
|
||
|
p.activeFormattingElements.bookmark = elementEntry;
|
||
|
}
|
||
|
p.treeAdapter.detachNode(lastElement);
|
||
|
p.treeAdapter.appendChild(element, lastElement);
|
||
|
lastElement = element;
|
||
|
}
|
||
|
}
|
||
|
return lastElement;
|
||
|
}
|
||
|
//Step 13.7 of the algorithm
|
||
|
function aaRecreateElementFromEntry(p, elementEntry) {
|
||
|
const ns = p.treeAdapter.getNamespaceURI(elementEntry.element);
|
||
|
const newElement = p.treeAdapter.createElement(elementEntry.token.tagName, ns, elementEntry.token.attrs);
|
||
|
p.openElements.replace(elementEntry.element, newElement);
|
||
|
elementEntry.element = newElement;
|
||
|
return newElement;
|
||
|
}
|
||
|
//Step 14 of the algorithm
|
||
|
function aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement) {
|
||
|
const tn = p.treeAdapter.getTagName(commonAncestor);
|
||
|
const tid = getTagID(tn);
|
||
|
if (p._isElementCausesFosterParenting(tid)) {
|
||
|
p._fosterParentElement(lastElement);
|
||
|
}
|
||
|
else {
|
||
|
const ns = p.treeAdapter.getNamespaceURI(commonAncestor);
|
||
|
if (tid === TAG_ID.TEMPLATE && ns === NS.HTML) {
|
||
|
commonAncestor = p.treeAdapter.getTemplateContent(commonAncestor);
|
||
|
}
|
||
|
p.treeAdapter.appendChild(commonAncestor, lastElement);
|
||
|
}
|
||
|
}
|
||
|
//Steps 15-19 of the algorithm
|
||
|
function aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry) {
|
||
|
const ns = p.treeAdapter.getNamespaceURI(formattingElementEntry.element);
|
||
|
const { token } = formattingElementEntry;
|
||
|
const newElement = p.treeAdapter.createElement(token.tagName, ns, token.attrs);
|
||
|
p._adoptNodes(furthestBlock, newElement);
|
||
|
p.treeAdapter.appendChild(furthestBlock, newElement);
|
||
|
p.activeFormattingElements.insertElementAfterBookmark(newElement, token);
|
||
|
p.activeFormattingElements.removeEntry(formattingElementEntry);
|
||
|
p.openElements.remove(formattingElementEntry.element);
|
||
|
p.openElements.insertAfter(furthestBlock, newElement, token.tagID);
|
||
|
}
|
||
|
//Algorithm entry point
|
||
|
function callAdoptionAgency(p, token) {
|
||
|
for (let i = 0; i < AA_OUTER_LOOP_ITER; i++) {
|
||
|
const formattingElementEntry = aaObtainFormattingElementEntry(p, token);
|
||
|
if (!formattingElementEntry) {
|
||
|
break;
|
||
|
}
|
||
|
const furthestBlock = aaObtainFurthestBlock(p, formattingElementEntry);
|
||
|
if (!furthestBlock) {
|
||
|
break;
|
||
|
}
|
||
|
p.activeFormattingElements.bookmark = formattingElementEntry;
|
||
|
const lastElement = aaInnerLoop(p, furthestBlock, formattingElementEntry.element);
|
||
|
const commonAncestor = p.openElements.getCommonAncestor(formattingElementEntry.element);
|
||
|
p.treeAdapter.detachNode(lastElement);
|
||
|
if (commonAncestor)
|
||
|
aaInsertLastNodeInCommonAncestor(p, commonAncestor, lastElement);
|
||
|
aaReplaceFormattingElement(p, furthestBlock, formattingElementEntry);
|
||
|
}
|
||
|
}
|
||
|
//Generic token handlers
|
||
|
//------------------------------------------------------------------
|
||
|
function appendComment(p, token) {
|
||
|
p._appendCommentNode(token, p.openElements.currentTmplContentOrNode);
|
||
|
}
|
||
|
function appendCommentToRootHtmlElement(p, token) {
|
||
|
p._appendCommentNode(token, p.openElements.items[0]);
|
||
|
}
|
||
|
function appendCommentToDocument(p, token) {
|
||
|
p._appendCommentNode(token, p.document);
|
||
|
}
|
||
|
function stopParsing(p, token) {
|
||
|
p.stopped = true;
|
||
|
// NOTE: Set end locations for elements that remain on the open element stack.
|
||
|
if (token.location) {
|
||
|
// NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
|
||
|
// This is a problem, as we might overwrite their end position here.
|
||
|
const target = p.fragmentContext ? 0 : 2;
|
||
|
for (let i = p.openElements.stackTop; i >= target; i--) {
|
||
|
p._setEndLocation(p.openElements.items[i], token);
|
||
|
}
|
||
|
// Handle `html` and `body`
|
||
|
if (!p.fragmentContext && p.openElements.stackTop >= 0) {
|
||
|
const htmlElement = p.openElements.items[0];
|
||
|
const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement);
|
||
|
if (htmlLocation && !htmlLocation.endTag) {
|
||
|
p._setEndLocation(htmlElement, token);
|
||
|
if (p.openElements.stackTop >= 1) {
|
||
|
const bodyElement = p.openElements.items[1];
|
||
|
const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement);
|
||
|
if (bodyLocation && !bodyLocation.endTag) {
|
||
|
p._setEndLocation(bodyElement, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// The "initial" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function doctypeInInitialMode(p, token) {
|
||
|
p._setDocumentType(token);
|
||
|
const mode = token.forceQuirks ? DOCUMENT_MODE.QUIRKS : getDocumentMode(token);
|
||
|
if (!isConforming(token)) {
|
||
|
p._err(token, ERR.nonConformingDoctype);
|
||
|
}
|
||
|
p.treeAdapter.setDocumentMode(p.document, mode);
|
||
|
p.insertionMode = InsertionMode.BEFORE_HTML;
|
||
|
}
|
||
|
function tokenInInitialMode(p, token) {
|
||
|
p._err(token, ERR.missingDoctype, true);
|
||
|
p.treeAdapter.setDocumentMode(p.document, DOCUMENT_MODE.QUIRKS);
|
||
|
p.insertionMode = InsertionMode.BEFORE_HTML;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
// The "before html" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagBeforeHtml(p, token) {
|
||
|
if (token.tagID === TAG_ID.HTML) {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.BEFORE_HEAD;
|
||
|
}
|
||
|
else {
|
||
|
tokenBeforeHtml(p, token);
|
||
|
}
|
||
|
}
|
||
|
function endTagBeforeHtml(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (tn === TAG_ID.HTML || tn === TAG_ID.HEAD || tn === TAG_ID.BODY || tn === TAG_ID.BR) {
|
||
|
tokenBeforeHtml(p, token);
|
||
|
}
|
||
|
}
|
||
|
function tokenBeforeHtml(p, token) {
|
||
|
p._insertFakeRootElement();
|
||
|
p.insertionMode = InsertionMode.BEFORE_HEAD;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
// The "before head" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagBeforeHead(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HEAD: {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.headElement = p.openElements.current;
|
||
|
p.insertionMode = InsertionMode.IN_HEAD;
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenBeforeHead(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagBeforeHead(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (tn === TAG_ID.HEAD || tn === TAG_ID.BODY || tn === TAG_ID.HTML || tn === TAG_ID.BR) {
|
||
|
tokenBeforeHead(p, token);
|
||
|
}
|
||
|
else {
|
||
|
p._err(token, ERR.endTagWithoutMatchingOpenElement);
|
||
|
}
|
||
|
}
|
||
|
function tokenBeforeHead(p, token) {
|
||
|
p._insertFakeElement(TAG_NAMES.HEAD, TAG_ID.HEAD);
|
||
|
p.headElement = p.openElements.current;
|
||
|
p.insertionMode = InsertionMode.IN_HEAD;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
// The "in head" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInHead(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BASE:
|
||
|
case TAG_ID.BASEFONT:
|
||
|
case TAG_ID.BGSOUND:
|
||
|
case TAG_ID.LINK:
|
||
|
case TAG_ID.META: {
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
token.ackSelfClosing = true;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TITLE: {
|
||
|
p._switchToTextParsing(token, TokenizerMode.RCDATA);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOSCRIPT: {
|
||
|
if (p.options.scriptingEnabled) {
|
||
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
||
|
}
|
||
|
else {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_HEAD_NO_SCRIPT;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOFRAMES:
|
||
|
case TAG_ID.STYLE: {
|
||
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.SCRIPT: {
|
||
|
p._switchToTextParsing(token, TokenizerMode.SCRIPT_DATA);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
p._insertTemplate(token);
|
||
|
p.activeFormattingElements.insertMarker();
|
||
|
p.framesetOk = false;
|
||
|
p.insertionMode = InsertionMode.IN_TEMPLATE;
|
||
|
p.tmplInsertionModeStack.unshift(InsertionMode.IN_TEMPLATE);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HEAD: {
|
||
|
p._err(token, ERR.misplacedStartTagForHeadElement);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenInHead(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInHead(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HEAD: {
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.AFTER_HEAD;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.BR:
|
||
|
case TAG_ID.HTML: {
|
||
|
tokenInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
templateEndTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
p._err(token, ERR.endTagWithoutMatchingOpenElement);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function templateEndTagInHead(p, token) {
|
||
|
if (p.openElements.tmplCount > 0) {
|
||
|
p.openElements.generateImpliedEndTagsThoroughly();
|
||
|
if (p.openElements.currentTagId !== TAG_ID.TEMPLATE) {
|
||
|
p._err(token, ERR.closingOfElementWithOpenChildElements);
|
||
|
}
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.TEMPLATE);
|
||
|
p.activeFormattingElements.clearToLastMarker();
|
||
|
p.tmplInsertionModeStack.shift();
|
||
|
p._resetInsertionMode();
|
||
|
}
|
||
|
else {
|
||
|
p._err(token, ERR.endTagWithoutMatchingOpenElement);
|
||
|
}
|
||
|
}
|
||
|
function tokenInHead(p, token) {
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.AFTER_HEAD;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
// The "in head no script" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInHeadNoScript(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BASEFONT:
|
||
|
case TAG_ID.BGSOUND:
|
||
|
case TAG_ID.HEAD:
|
||
|
case TAG_ID.LINK:
|
||
|
case TAG_ID.META:
|
||
|
case TAG_ID.NOFRAMES:
|
||
|
case TAG_ID.STYLE: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOSCRIPT: {
|
||
|
p._err(token, ERR.nestedNoscriptInHead);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenInHeadNoScript(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInHeadNoScript(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.NOSCRIPT: {
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_HEAD;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BR: {
|
||
|
tokenInHeadNoScript(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
p._err(token, ERR.endTagWithoutMatchingOpenElement);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function tokenInHeadNoScript(p, token) {
|
||
|
const errCode = token.type === TokenType.EOF ? ERR.openElementsLeftAfterEof : ERR.disallowedContentInNoscriptInHead;
|
||
|
p._err(token, errCode);
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_HEAD;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
// The "after head" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagAfterHead(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY: {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.framesetOk = false;
|
||
|
p.insertionMode = InsertionMode.IN_BODY;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FRAMESET: {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_FRAMESET;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BASE:
|
||
|
case TAG_ID.BASEFONT:
|
||
|
case TAG_ID.BGSOUND:
|
||
|
case TAG_ID.LINK:
|
||
|
case TAG_ID.META:
|
||
|
case TAG_ID.NOFRAMES:
|
||
|
case TAG_ID.SCRIPT:
|
||
|
case TAG_ID.STYLE:
|
||
|
case TAG_ID.TEMPLATE:
|
||
|
case TAG_ID.TITLE: {
|
||
|
p._err(token, ERR.abandonedHeadElementChild);
|
||
|
p.openElements.push(p.headElement, TAG_ID.HEAD);
|
||
|
startTagInHead(p, token);
|
||
|
p.openElements.remove(p.headElement);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HEAD: {
|
||
|
p._err(token, ERR.misplacedStartTagForHeadElement);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenAfterHead(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagAfterHead(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.HTML:
|
||
|
case TAG_ID.BR: {
|
||
|
tokenAfterHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
templateEndTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
p._err(token, ERR.endTagWithoutMatchingOpenElement);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function tokenAfterHead(p, token) {
|
||
|
p._insertFakeElement(TAG_NAMES.BODY, TAG_ID.BODY);
|
||
|
p.insertionMode = InsertionMode.IN_BODY;
|
||
|
modeInBody(p, token);
|
||
|
}
|
||
|
// The "in body" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function modeInBody(p, token) {
|
||
|
switch (token.type) {
|
||
|
case TokenType.CHARACTER: {
|
||
|
characterInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.WHITESPACE_CHARACTER: {
|
||
|
whitespaceCharacterInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.COMMENT: {
|
||
|
appendComment(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.START_TAG: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.END_TAG: {
|
||
|
endTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.EOF: {
|
||
|
eofInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
function whitespaceCharacterInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertCharacters(token);
|
||
|
}
|
||
|
function characterInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertCharacters(token);
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
function htmlStartTagInBody(p, token) {
|
||
|
if (p.openElements.tmplCount === 0) {
|
||
|
p.treeAdapter.adoptAttributes(p.openElements.items[0], token.attrs);
|
||
|
}
|
||
|
}
|
||
|
function bodyStartTagInBody(p, token) {
|
||
|
const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
|
||
|
if (bodyElement && p.openElements.tmplCount === 0) {
|
||
|
p.framesetOk = false;
|
||
|
p.treeAdapter.adoptAttributes(bodyElement, token.attrs);
|
||
|
}
|
||
|
}
|
||
|
function framesetStartTagInBody(p, token) {
|
||
|
const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
|
||
|
if (p.framesetOk && bodyElement) {
|
||
|
p.treeAdapter.detachNode(bodyElement);
|
||
|
p.openElements.popAllUpToHtmlElement();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_FRAMESET;
|
||
|
}
|
||
|
}
|
||
|
function addressStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function numberedHeaderStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
if (NUMBERED_HEADERS.has(p.openElements.currentTagId)) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function preStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
//NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
|
||
|
//on to the next one. (Newlines at the start of pre blocks are ignored as an authoring convenience.)
|
||
|
p.skipNextNewLine = true;
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
function formStartTagInBody(p, token) {
|
||
|
const inTemplate = p.openElements.tmplCount > 0;
|
||
|
if (!p.formElement || inTemplate) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
if (!inTemplate) {
|
||
|
p.formElement = p.openElements.current;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function listItemStartTagInBody(p, token) {
|
||
|
p.framesetOk = false;
|
||
|
const tn = token.tagID;
|
||
|
for (let i = p.openElements.stackTop; i >= 0; i--) {
|
||
|
const elementId = p.openElements.tagIDs[i];
|
||
|
if ((tn === TAG_ID.LI && elementId === TAG_ID.LI) ||
|
||
|
((tn === TAG_ID.DD || tn === TAG_ID.DT) && (elementId === TAG_ID.DD || elementId === TAG_ID.DT))) {
|
||
|
p.openElements.generateImpliedEndTagsWithExclusion(elementId);
|
||
|
p.openElements.popUntilTagNamePopped(elementId);
|
||
|
break;
|
||
|
}
|
||
|
if (elementId !== TAG_ID.ADDRESS &&
|
||
|
elementId !== TAG_ID.DIV &&
|
||
|
elementId !== TAG_ID.P &&
|
||
|
p._isSpecialElement(p.openElements.items[i], elementId)) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function plaintextStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.tokenizer.state = TokenizerMode.PLAINTEXT;
|
||
|
}
|
||
|
function buttonStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInScope(TAG_ID.BUTTON)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.BUTTON);
|
||
|
}
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
function aStartTagInBody(p, token) {
|
||
|
const activeElementEntry = p.activeFormattingElements.getElementEntryInScopeWithTagName(TAG_NAMES.A);
|
||
|
if (activeElementEntry) {
|
||
|
callAdoptionAgency(p, token);
|
||
|
p.openElements.remove(activeElementEntry.element);
|
||
|
p.activeFormattingElements.removeEntry(activeElementEntry);
|
||
|
}
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.activeFormattingElements.pushElement(p.openElements.current, token);
|
||
|
}
|
||
|
function bStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.activeFormattingElements.pushElement(p.openElements.current, token);
|
||
|
}
|
||
|
function nobrStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
if (p.openElements.hasInScope(TAG_ID.NOBR)) {
|
||
|
callAdoptionAgency(p, token);
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.activeFormattingElements.pushElement(p.openElements.current, token);
|
||
|
}
|
||
|
function appletStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.activeFormattingElements.insertMarker();
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
function tableStartTagInBody(p, token) {
|
||
|
if (p.treeAdapter.getDocumentMode(p.document) !== DOCUMENT_MODE.QUIRKS && p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.framesetOk = false;
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
}
|
||
|
function areaStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
p.framesetOk = false;
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function isHiddenInput(token) {
|
||
|
const inputType = getTokenAttr(token, ATTRS.TYPE);
|
||
|
return inputType != null && inputType.toLowerCase() === HIDDEN_INPUT_TYPE;
|
||
|
}
|
||
|
function inputStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
if (!isHiddenInput(token)) {
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function paramStartTagInBody(p, token) {
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function hrStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
p.framesetOk = false;
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function imageStartTagInBody(p, token) {
|
||
|
token.tagName = TAG_NAMES.IMG;
|
||
|
token.tagID = TAG_ID.IMG;
|
||
|
areaStartTagInBody(p, token);
|
||
|
}
|
||
|
function textareaStartTagInBody(p, token) {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
//NOTE: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move
|
||
|
//on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.)
|
||
|
p.skipNextNewLine = true;
|
||
|
p.tokenizer.state = TokenizerMode.RCDATA;
|
||
|
p.originalInsertionMode = p.insertionMode;
|
||
|
p.framesetOk = false;
|
||
|
p.insertionMode = InsertionMode.TEXT;
|
||
|
}
|
||
|
function xmpStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._closePElement();
|
||
|
}
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p.framesetOk = false;
|
||
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
||
|
}
|
||
|
function iframeStartTagInBody(p, token) {
|
||
|
p.framesetOk = false;
|
||
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
||
|
}
|
||
|
//NOTE: here we assume that we always act as a user agent with enabled plugins/frames, so we parse
|
||
|
//<noembed>/<noframes> as rawtext.
|
||
|
function rawTextStartTagInBody(p, token) {
|
||
|
p._switchToTextParsing(token, TokenizerMode.RAWTEXT);
|
||
|
}
|
||
|
function selectStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.framesetOk = false;
|
||
|
p.insertionMode =
|
||
|
p.insertionMode === InsertionMode.IN_TABLE ||
|
||
|
p.insertionMode === InsertionMode.IN_CAPTION ||
|
||
|
p.insertionMode === InsertionMode.IN_TABLE_BODY ||
|
||
|
p.insertionMode === InsertionMode.IN_ROW ||
|
||
|
p.insertionMode === InsertionMode.IN_CELL
|
||
|
? InsertionMode.IN_SELECT_IN_TABLE
|
||
|
: InsertionMode.IN_SELECT;
|
||
|
}
|
||
|
function optgroupStartTagInBody(p, token) {
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTION) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function rbStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInScope(TAG_ID.RUBY)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function rtStartTagInBody(p, token) {
|
||
|
if (p.openElements.hasInScope(TAG_ID.RUBY)) {
|
||
|
p.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.RTC);
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function mathStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
adjustTokenMathMLAttrs(token);
|
||
|
adjustTokenXMLAttrs(token);
|
||
|
if (token.selfClosing) {
|
||
|
p._appendElement(token, NS.MATHML);
|
||
|
}
|
||
|
else {
|
||
|
p._insertElement(token, NS.MATHML);
|
||
|
}
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function svgStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
adjustTokenSVGAttrs(token);
|
||
|
adjustTokenXMLAttrs(token);
|
||
|
if (token.selfClosing) {
|
||
|
p._appendElement(token, NS.SVG);
|
||
|
}
|
||
|
else {
|
||
|
p._insertElement(token, NS.SVG);
|
||
|
}
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function genericStartTagInBody(p, token) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
}
|
||
|
function startTagInBody(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.I:
|
||
|
case TAG_ID.S:
|
||
|
case TAG_ID.B:
|
||
|
case TAG_ID.U:
|
||
|
case TAG_ID.EM:
|
||
|
case TAG_ID.TT:
|
||
|
case TAG_ID.BIG:
|
||
|
case TAG_ID.CODE:
|
||
|
case TAG_ID.FONT:
|
||
|
case TAG_ID.SMALL:
|
||
|
case TAG_ID.STRIKE:
|
||
|
case TAG_ID.STRONG: {
|
||
|
bStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.A: {
|
||
|
aStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.H1:
|
||
|
case TAG_ID.H2:
|
||
|
case TAG_ID.H3:
|
||
|
case TAG_ID.H4:
|
||
|
case TAG_ID.H5:
|
||
|
case TAG_ID.H6: {
|
||
|
numberedHeaderStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.P:
|
||
|
case TAG_ID.DL:
|
||
|
case TAG_ID.OL:
|
||
|
case TAG_ID.UL:
|
||
|
case TAG_ID.DIV:
|
||
|
case TAG_ID.DIR:
|
||
|
case TAG_ID.NAV:
|
||
|
case TAG_ID.MAIN:
|
||
|
case TAG_ID.MENU:
|
||
|
case TAG_ID.ASIDE:
|
||
|
case TAG_ID.CENTER:
|
||
|
case TAG_ID.FIGURE:
|
||
|
case TAG_ID.FOOTER:
|
||
|
case TAG_ID.HEADER:
|
||
|
case TAG_ID.HGROUP:
|
||
|
case TAG_ID.DIALOG:
|
||
|
case TAG_ID.DETAILS:
|
||
|
case TAG_ID.ADDRESS:
|
||
|
case TAG_ID.ARTICLE:
|
||
|
case TAG_ID.SEARCH:
|
||
|
case TAG_ID.SECTION:
|
||
|
case TAG_ID.SUMMARY:
|
||
|
case TAG_ID.FIELDSET:
|
||
|
case TAG_ID.BLOCKQUOTE:
|
||
|
case TAG_ID.FIGCAPTION: {
|
||
|
addressStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.LI:
|
||
|
case TAG_ID.DD:
|
||
|
case TAG_ID.DT: {
|
||
|
listItemStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BR:
|
||
|
case TAG_ID.IMG:
|
||
|
case TAG_ID.WBR:
|
||
|
case TAG_ID.AREA:
|
||
|
case TAG_ID.EMBED:
|
||
|
case TAG_ID.KEYGEN: {
|
||
|
areaStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HR: {
|
||
|
hrStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.RB:
|
||
|
case TAG_ID.RTC: {
|
||
|
rbStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.RT:
|
||
|
case TAG_ID.RP: {
|
||
|
rtStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.PRE:
|
||
|
case TAG_ID.LISTING: {
|
||
|
preStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.XMP: {
|
||
|
xmpStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.SVG: {
|
||
|
svgStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HTML: {
|
||
|
htmlStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BASE:
|
||
|
case TAG_ID.LINK:
|
||
|
case TAG_ID.META:
|
||
|
case TAG_ID.STYLE:
|
||
|
case TAG_ID.TITLE:
|
||
|
case TAG_ID.SCRIPT:
|
||
|
case TAG_ID.BGSOUND:
|
||
|
case TAG_ID.BASEFONT:
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY: {
|
||
|
bodyStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FORM: {
|
||
|
formStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOBR: {
|
||
|
nobrStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.MATH: {
|
||
|
mathStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TABLE: {
|
||
|
tableStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.INPUT: {
|
||
|
inputStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.PARAM:
|
||
|
case TAG_ID.TRACK:
|
||
|
case TAG_ID.SOURCE: {
|
||
|
paramStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.IMAGE: {
|
||
|
imageStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BUTTON: {
|
||
|
buttonStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.APPLET:
|
||
|
case TAG_ID.OBJECT:
|
||
|
case TAG_ID.MARQUEE: {
|
||
|
appletStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.IFRAME: {
|
||
|
iframeStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.SELECT: {
|
||
|
selectStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.OPTION:
|
||
|
case TAG_ID.OPTGROUP: {
|
||
|
optgroupStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOEMBED:
|
||
|
case TAG_ID.NOFRAMES: {
|
||
|
rawTextStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FRAMESET: {
|
||
|
framesetStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEXTAREA: {
|
||
|
textareaStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOSCRIPT: {
|
||
|
if (p.options.scriptingEnabled) {
|
||
|
rawTextStartTagInBody(p, token);
|
||
|
}
|
||
|
else {
|
||
|
genericStartTagInBody(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.PLAINTEXT: {
|
||
|
plaintextStartTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TR:
|
||
|
case TAG_ID.HEAD:
|
||
|
case TAG_ID.FRAME:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COLGROUP: {
|
||
|
// Ignore token
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
genericStartTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function bodyEndTagInBody(p, token) {
|
||
|
if (p.openElements.hasInScope(TAG_ID.BODY)) {
|
||
|
p.insertionMode = InsertionMode.AFTER_BODY;
|
||
|
//NOTE: <body> is never popped from the stack, so we need to updated
|
||
|
//the end location explicitly.
|
||
|
if (p.options.sourceCodeLocationInfo) {
|
||
|
const bodyElement = p.openElements.tryPeekProperlyNestedBodyElement();
|
||
|
if (bodyElement) {
|
||
|
p._setEndLocation(bodyElement, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function htmlEndTagInBody(p, token) {
|
||
|
if (p.openElements.hasInScope(TAG_ID.BODY)) {
|
||
|
p.insertionMode = InsertionMode.AFTER_BODY;
|
||
|
endTagAfterBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
function addressEndTagInBody(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (p.openElements.hasInScope(tn)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilTagNamePopped(tn);
|
||
|
}
|
||
|
}
|
||
|
function formEndTagInBody(p) {
|
||
|
const inTemplate = p.openElements.tmplCount > 0;
|
||
|
const { formElement } = p;
|
||
|
if (!inTemplate) {
|
||
|
p.formElement = null;
|
||
|
}
|
||
|
if ((formElement || inTemplate) && p.openElements.hasInScope(TAG_ID.FORM)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
if (inTemplate) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.FORM);
|
||
|
}
|
||
|
else if (formElement) {
|
||
|
p.openElements.remove(formElement);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function pEndTagInBody(p) {
|
||
|
if (!p.openElements.hasInButtonScope(TAG_ID.P)) {
|
||
|
p._insertFakeElement(TAG_NAMES.P, TAG_ID.P);
|
||
|
}
|
||
|
p._closePElement();
|
||
|
}
|
||
|
function liEndTagInBody(p) {
|
||
|
if (p.openElements.hasInListItemScope(TAG_ID.LI)) {
|
||
|
p.openElements.generateImpliedEndTagsWithExclusion(TAG_ID.LI);
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.LI);
|
||
|
}
|
||
|
}
|
||
|
function ddEndTagInBody(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (p.openElements.hasInScope(tn)) {
|
||
|
p.openElements.generateImpliedEndTagsWithExclusion(tn);
|
||
|
p.openElements.popUntilTagNamePopped(tn);
|
||
|
}
|
||
|
}
|
||
|
function numberedHeaderEndTagInBody(p) {
|
||
|
if (p.openElements.hasNumberedHeaderInScope()) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilNumberedHeaderPopped();
|
||
|
}
|
||
|
}
|
||
|
function appletEndTagInBody(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (p.openElements.hasInScope(tn)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilTagNamePopped(tn);
|
||
|
p.activeFormattingElements.clearToLastMarker();
|
||
|
}
|
||
|
}
|
||
|
function brEndTagInBody(p) {
|
||
|
p._reconstructActiveFormattingElements();
|
||
|
p._insertFakeElement(TAG_NAMES.BR, TAG_ID.BR);
|
||
|
p.openElements.pop();
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
function genericEndTagInBody(p, token) {
|
||
|
const tn = token.tagName;
|
||
|
const tid = token.tagID;
|
||
|
for (let i = p.openElements.stackTop; i > 0; i--) {
|
||
|
const element = p.openElements.items[i];
|
||
|
const elementId = p.openElements.tagIDs[i];
|
||
|
// Compare the tag name here, as the tag might not be a known tag with an ID.
|
||
|
if (tid === elementId && (tid !== TAG_ID.UNKNOWN || p.treeAdapter.getTagName(element) === tn)) {
|
||
|
p.openElements.generateImpliedEndTagsWithExclusion(tid);
|
||
|
if (p.openElements.stackTop >= i)
|
||
|
p.openElements.shortenToLength(i);
|
||
|
break;
|
||
|
}
|
||
|
if (p._isSpecialElement(element, elementId)) {
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInBody(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.A:
|
||
|
case TAG_ID.B:
|
||
|
case TAG_ID.I:
|
||
|
case TAG_ID.S:
|
||
|
case TAG_ID.U:
|
||
|
case TAG_ID.EM:
|
||
|
case TAG_ID.TT:
|
||
|
case TAG_ID.BIG:
|
||
|
case TAG_ID.CODE:
|
||
|
case TAG_ID.FONT:
|
||
|
case TAG_ID.NOBR:
|
||
|
case TAG_ID.SMALL:
|
||
|
case TAG_ID.STRIKE:
|
||
|
case TAG_ID.STRONG: {
|
||
|
callAdoptionAgency(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.P: {
|
||
|
pEndTagInBody(p);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.DL:
|
||
|
case TAG_ID.UL:
|
||
|
case TAG_ID.OL:
|
||
|
case TAG_ID.DIR:
|
||
|
case TAG_ID.DIV:
|
||
|
case TAG_ID.NAV:
|
||
|
case TAG_ID.PRE:
|
||
|
case TAG_ID.MAIN:
|
||
|
case TAG_ID.MENU:
|
||
|
case TAG_ID.ASIDE:
|
||
|
case TAG_ID.BUTTON:
|
||
|
case TAG_ID.CENTER:
|
||
|
case TAG_ID.FIGURE:
|
||
|
case TAG_ID.FOOTER:
|
||
|
case TAG_ID.HEADER:
|
||
|
case TAG_ID.HGROUP:
|
||
|
case TAG_ID.DIALOG:
|
||
|
case TAG_ID.ADDRESS:
|
||
|
case TAG_ID.ARTICLE:
|
||
|
case TAG_ID.DETAILS:
|
||
|
case TAG_ID.SEARCH:
|
||
|
case TAG_ID.SECTION:
|
||
|
case TAG_ID.SUMMARY:
|
||
|
case TAG_ID.LISTING:
|
||
|
case TAG_ID.FIELDSET:
|
||
|
case TAG_ID.BLOCKQUOTE:
|
||
|
case TAG_ID.FIGCAPTION: {
|
||
|
addressEndTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.LI: {
|
||
|
liEndTagInBody(p);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.DD:
|
||
|
case TAG_ID.DT: {
|
||
|
ddEndTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.H1:
|
||
|
case TAG_ID.H2:
|
||
|
case TAG_ID.H3:
|
||
|
case TAG_ID.H4:
|
||
|
case TAG_ID.H5:
|
||
|
case TAG_ID.H6: {
|
||
|
numberedHeaderEndTagInBody(p);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BR: {
|
||
|
brEndTagInBody(p);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY: {
|
||
|
bodyEndTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HTML: {
|
||
|
htmlEndTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FORM: {
|
||
|
formEndTagInBody(p);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.APPLET:
|
||
|
case TAG_ID.OBJECT:
|
||
|
case TAG_ID.MARQUEE: {
|
||
|
appletEndTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
templateEndTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
genericEndTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function eofInBody(p, token) {
|
||
|
if (p.tmplInsertionModeStack.length > 0) {
|
||
|
eofInTemplate(p, token);
|
||
|
}
|
||
|
else {
|
||
|
stopParsing(p, token);
|
||
|
}
|
||
|
}
|
||
|
// The "text" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function endTagInText(p, token) {
|
||
|
var _a;
|
||
|
if (token.tagID === TAG_ID.SCRIPT) {
|
||
|
(_a = p.scriptHandler) === null || _a === void 0 ? void 0 : _a.call(p, p.openElements.current);
|
||
|
}
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = p.originalInsertionMode;
|
||
|
}
|
||
|
function eofInText(p, token) {
|
||
|
p._err(token, ERR.eofInElementThatCanContainOnlyText);
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = p.originalInsertionMode;
|
||
|
p.onEof(token);
|
||
|
}
|
||
|
// The "in table" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function characterInTable(p, token) {
|
||
|
if (TABLE_STRUCTURE_TAGS.has(p.openElements.currentTagId)) {
|
||
|
p.pendingCharacterTokens.length = 0;
|
||
|
p.hasNonWhitespacePendingCharacterToken = false;
|
||
|
p.originalInsertionMode = p.insertionMode;
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_TEXT;
|
||
|
switch (token.type) {
|
||
|
case TokenType.CHARACTER: {
|
||
|
characterInTableText(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TokenType.WHITESPACE_CHARACTER: {
|
||
|
whitespaceCharacterInTableText(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Ignore null
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
tokenInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
function captionStartTagInTable(p, token) {
|
||
|
p.openElements.clearBackToTableContext();
|
||
|
p.activeFormattingElements.insertMarker();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_CAPTION;
|
||
|
}
|
||
|
function colgroupStartTagInTable(p, token) {
|
||
|
p.openElements.clearBackToTableContext();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
|
||
|
}
|
||
|
function colStartTagInTable(p, token) {
|
||
|
p.openElements.clearBackToTableContext();
|
||
|
p._insertFakeElement(TAG_NAMES.COLGROUP, TAG_ID.COLGROUP);
|
||
|
p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
|
||
|
startTagInColumnGroup(p, token);
|
||
|
}
|
||
|
function tbodyStartTagInTable(p, token) {
|
||
|
p.openElements.clearBackToTableContext();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
}
|
||
|
function tdStartTagInTable(p, token) {
|
||
|
p.openElements.clearBackToTableContext();
|
||
|
p._insertFakeElement(TAG_NAMES.TBODY, TAG_ID.TBODY);
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
startTagInTableBody(p, token);
|
||
|
}
|
||
|
function tableStartTagInTable(p, token) {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.TABLE)) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.TABLE);
|
||
|
p._resetInsertionMode();
|
||
|
p._processStartTag(token);
|
||
|
}
|
||
|
}
|
||
|
function inputStartTagInTable(p, token) {
|
||
|
if (isHiddenInput(token)) {
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
}
|
||
|
else {
|
||
|
tokenInTable(p, token);
|
||
|
}
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
function formStartTagInTable(p, token) {
|
||
|
if (!p.formElement && p.openElements.tmplCount === 0) {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.formElement = p.openElements.current;
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
}
|
||
|
function startTagInTable(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.TR: {
|
||
|
tdStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.STYLE:
|
||
|
case TAG_ID.SCRIPT:
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.COL: {
|
||
|
colStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FORM: {
|
||
|
formStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TABLE: {
|
||
|
tableStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD: {
|
||
|
tbodyStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.INPUT: {
|
||
|
inputStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.CAPTION: {
|
||
|
captionStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.COLGROUP: {
|
||
|
colgroupStartTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInTable(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.TABLE: {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.TABLE)) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.TABLE);
|
||
|
p._resetInsertionMode();
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
templateEndTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.HTML:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.TR: {
|
||
|
// Ignore token
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function tokenInTable(p, token) {
|
||
|
const savedFosterParentingState = p.fosterParentingEnabled;
|
||
|
p.fosterParentingEnabled = true;
|
||
|
// Process token in `In Body` mode
|
||
|
modeInBody(p, token);
|
||
|
p.fosterParentingEnabled = savedFosterParentingState;
|
||
|
}
|
||
|
// The "in table text" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function whitespaceCharacterInTableText(p, token) {
|
||
|
p.pendingCharacterTokens.push(token);
|
||
|
}
|
||
|
function characterInTableText(p, token) {
|
||
|
p.pendingCharacterTokens.push(token);
|
||
|
p.hasNonWhitespacePendingCharacterToken = true;
|
||
|
}
|
||
|
function tokenInTableText(p, token) {
|
||
|
let i = 0;
|
||
|
if (p.hasNonWhitespacePendingCharacterToken) {
|
||
|
for (; i < p.pendingCharacterTokens.length; i++) {
|
||
|
tokenInTable(p, p.pendingCharacterTokens[i]);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
for (; i < p.pendingCharacterTokens.length; i++) {
|
||
|
p._insertCharacters(p.pendingCharacterTokens[i]);
|
||
|
}
|
||
|
}
|
||
|
p.insertionMode = p.originalInsertionMode;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
// The "in caption" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
const TABLE_VOID_ELEMENTS = new Set([TAG_ID.CAPTION, TAG_ID.COL, TAG_ID.COLGROUP, TAG_ID.TBODY, TAG_ID.TD, TAG_ID.TFOOT, TAG_ID.TH, TAG_ID.THEAD, TAG_ID.TR]);
|
||
|
function startTagInCaption(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (TABLE_VOID_ELEMENTS.has(tn)) {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.CAPTION)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.CAPTION);
|
||
|
p.activeFormattingElements.clearToLastMarker();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
startTagInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
startTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
function endTagInCaption(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
switch (tn) {
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.TABLE: {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.CAPTION)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.CAPTION);
|
||
|
p.activeFormattingElements.clearToLastMarker();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
if (tn === TAG_ID.TABLE) {
|
||
|
endTagInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.HTML:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.TR: {
|
||
|
// Ignore token
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
endTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// The "in column group" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInColumnGroup(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.COL: {
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
token.ackSelfClosing = true;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenInColumnGroup(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInColumnGroup(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.COLGROUP: {
|
||
|
if (p.openElements.currentTagId === TAG_ID.COLGROUP) {
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
templateEndTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.COL: {
|
||
|
// Ignore token
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
tokenInColumnGroup(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function tokenInColumnGroup(p, token) {
|
||
|
if (p.openElements.currentTagId === TAG_ID.COLGROUP) {
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
p._processToken(token);
|
||
|
}
|
||
|
}
|
||
|
// The "in table body" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInTableBody(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.TR: {
|
||
|
p.openElements.clearBackToTableBodyContext();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_ROW;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.TD: {
|
||
|
p.openElements.clearBackToTableBodyContext();
|
||
|
p._insertFakeElement(TAG_NAMES.TR, TAG_ID.TR);
|
||
|
p.insertionMode = InsertionMode.IN_ROW;
|
||
|
startTagInRow(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD: {
|
||
|
if (p.openElements.hasTableBodyContextInTableScope()) {
|
||
|
p.openElements.clearBackToTableBodyContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
startTagInTable(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
startTagInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInTableBody(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD: {
|
||
|
if (p.openElements.hasInTableScope(tn)) {
|
||
|
p.openElements.clearBackToTableBodyContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TABLE: {
|
||
|
if (p.openElements.hasTableBodyContextInTableScope()) {
|
||
|
p.openElements.clearBackToTableBodyContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
endTagInTable(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.HTML:
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.TR: {
|
||
|
// Ignore token
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
endTagInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// The "in row" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInRow(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.TH:
|
||
|
case TAG_ID.TD: {
|
||
|
p.openElements.clearBackToTableRowContext();
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
p.insertionMode = InsertionMode.IN_CELL;
|
||
|
p.activeFormattingElements.insertMarker();
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.TR: {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.TR)) {
|
||
|
p.openElements.clearBackToTableRowContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
startTagInTableBody(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
startTagInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInRow(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.TR: {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.TR)) {
|
||
|
p.openElements.clearBackToTableRowContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TABLE: {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.TR)) {
|
||
|
p.openElements.clearBackToTableRowContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
endTagInTableBody(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD: {
|
||
|
if (p.openElements.hasInTableScope(token.tagID) || p.openElements.hasInTableScope(TAG_ID.TR)) {
|
||
|
p.openElements.clearBackToTableRowContext();
|
||
|
p.openElements.pop();
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
endTagInTableBody(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.HTML:
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TH: {
|
||
|
// Ignore end tag
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
endTagInTable(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// The "in cell" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInCell(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (TABLE_VOID_ELEMENTS.has(tn)) {
|
||
|
if (p.openElements.hasInTableScope(TAG_ID.TD) || p.openElements.hasInTableScope(TAG_ID.TH)) {
|
||
|
p._closeTableCell();
|
||
|
startTagInRow(p, token);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
startTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
function endTagInCell(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
switch (tn) {
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TH: {
|
||
|
if (p.openElements.hasInTableScope(tn)) {
|
||
|
p.openElements.generateImpliedEndTags();
|
||
|
p.openElements.popUntilTagNamePopped(tn);
|
||
|
p.activeFormattingElements.clearToLastMarker();
|
||
|
p.insertionMode = InsertionMode.IN_ROW;
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TABLE:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD:
|
||
|
case TAG_ID.TR: {
|
||
|
if (p.openElements.hasInTableScope(tn)) {
|
||
|
p._closeTableCell();
|
||
|
endTagInRow(p, token);
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.BODY:
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COL:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.HTML: {
|
||
|
// Ignore token
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
endTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// The "in select" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInSelect(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.OPTION: {
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTION) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.OPTGROUP: {
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTION) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.HR: {
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTION) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
token.ackSelfClosing = true;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.INPUT:
|
||
|
case TAG_ID.KEYGEN:
|
||
|
case TAG_ID.TEXTAREA:
|
||
|
case TAG_ID.SELECT: {
|
||
|
if (p.openElements.hasInSelectScope(TAG_ID.SELECT)) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
|
||
|
p._resetInsertionMode();
|
||
|
if (token.tagID !== TAG_ID.SELECT) {
|
||
|
p._processStartTag(token);
|
||
|
}
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.SCRIPT:
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
function endTagInSelect(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.OPTGROUP: {
|
||
|
if (p.openElements.stackTop > 0 &&
|
||
|
p.openElements.currentTagId === TAG_ID.OPTION &&
|
||
|
p.openElements.tagIDs[p.openElements.stackTop - 1] === TAG_ID.OPTGROUP) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTGROUP) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.OPTION: {
|
||
|
if (p.openElements.currentTagId === TAG_ID.OPTION) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.SELECT: {
|
||
|
if (p.openElements.hasInSelectScope(TAG_ID.SELECT)) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
|
||
|
p._resetInsertionMode();
|
||
|
}
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TEMPLATE: {
|
||
|
templateEndTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
// The "in select in table" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInSelectInTable(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (tn === TAG_ID.CAPTION ||
|
||
|
tn === TAG_ID.TABLE ||
|
||
|
tn === TAG_ID.TBODY ||
|
||
|
tn === TAG_ID.TFOOT ||
|
||
|
tn === TAG_ID.THEAD ||
|
||
|
tn === TAG_ID.TR ||
|
||
|
tn === TAG_ID.TD ||
|
||
|
tn === TAG_ID.TH) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
|
||
|
p._resetInsertionMode();
|
||
|
p._processStartTag(token);
|
||
|
}
|
||
|
else {
|
||
|
startTagInSelect(p, token);
|
||
|
}
|
||
|
}
|
||
|
function endTagInSelectInTable(p, token) {
|
||
|
const tn = token.tagID;
|
||
|
if (tn === TAG_ID.CAPTION ||
|
||
|
tn === TAG_ID.TABLE ||
|
||
|
tn === TAG_ID.TBODY ||
|
||
|
tn === TAG_ID.TFOOT ||
|
||
|
tn === TAG_ID.THEAD ||
|
||
|
tn === TAG_ID.TR ||
|
||
|
tn === TAG_ID.TD ||
|
||
|
tn === TAG_ID.TH) {
|
||
|
if (p.openElements.hasInTableScope(tn)) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.SELECT);
|
||
|
p._resetInsertionMode();
|
||
|
p.onEndTag(token);
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
endTagInSelect(p, token);
|
||
|
}
|
||
|
}
|
||
|
// The "in template" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInTemplate(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
// First, handle tags that can start without a mode change
|
||
|
case TAG_ID.BASE:
|
||
|
case TAG_ID.BASEFONT:
|
||
|
case TAG_ID.BGSOUND:
|
||
|
case TAG_ID.LINK:
|
||
|
case TAG_ID.META:
|
||
|
case TAG_ID.NOFRAMES:
|
||
|
case TAG_ID.SCRIPT:
|
||
|
case TAG_ID.STYLE:
|
||
|
case TAG_ID.TEMPLATE:
|
||
|
case TAG_ID.TITLE: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Re-process the token in the appropriate mode
|
||
|
case TAG_ID.CAPTION:
|
||
|
case TAG_ID.COLGROUP:
|
||
|
case TAG_ID.TBODY:
|
||
|
case TAG_ID.TFOOT:
|
||
|
case TAG_ID.THEAD: {
|
||
|
p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE;
|
||
|
p.insertionMode = InsertionMode.IN_TABLE;
|
||
|
startTagInTable(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.COL: {
|
||
|
p.tmplInsertionModeStack[0] = InsertionMode.IN_COLUMN_GROUP;
|
||
|
p.insertionMode = InsertionMode.IN_COLUMN_GROUP;
|
||
|
startTagInColumnGroup(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TR: {
|
||
|
p.tmplInsertionModeStack[0] = InsertionMode.IN_TABLE_BODY;
|
||
|
p.insertionMode = InsertionMode.IN_TABLE_BODY;
|
||
|
startTagInTableBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.TD:
|
||
|
case TAG_ID.TH: {
|
||
|
p.tmplInsertionModeStack[0] = InsertionMode.IN_ROW;
|
||
|
p.insertionMode = InsertionMode.IN_ROW;
|
||
|
startTagInRow(p, token);
|
||
|
break;
|
||
|
}
|
||
|
default: {
|
||
|
p.tmplInsertionModeStack[0] = InsertionMode.IN_BODY;
|
||
|
p.insertionMode = InsertionMode.IN_BODY;
|
||
|
startTagInBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
function endTagInTemplate(p, token) {
|
||
|
if (token.tagID === TAG_ID.TEMPLATE) {
|
||
|
templateEndTagInHead(p, token);
|
||
|
}
|
||
|
}
|
||
|
function eofInTemplate(p, token) {
|
||
|
if (p.openElements.tmplCount > 0) {
|
||
|
p.openElements.popUntilTagNamePopped(TAG_ID.TEMPLATE);
|
||
|
p.activeFormattingElements.clearToLastMarker();
|
||
|
p.tmplInsertionModeStack.shift();
|
||
|
p._resetInsertionMode();
|
||
|
p.onEof(token);
|
||
|
}
|
||
|
else {
|
||
|
stopParsing(p, token);
|
||
|
}
|
||
|
}
|
||
|
// The "after body" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagAfterBody(p, token) {
|
||
|
if (token.tagID === TAG_ID.HTML) {
|
||
|
startTagInBody(p, token);
|
||
|
}
|
||
|
else {
|
||
|
tokenAfterBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
function endTagAfterBody(p, token) {
|
||
|
var _a;
|
||
|
if (token.tagID === TAG_ID.HTML) {
|
||
|
if (!p.fragmentContext) {
|
||
|
p.insertionMode = InsertionMode.AFTER_AFTER_BODY;
|
||
|
}
|
||
|
//NOTE: <html> is never popped from the stack, so we need to updated
|
||
|
//the end location explicitly.
|
||
|
if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === TAG_ID.HTML) {
|
||
|
p._setEndLocation(p.openElements.items[0], token);
|
||
|
// Update the body element, if it doesn't have an end tag
|
||
|
const bodyElement = p.openElements.items[1];
|
||
|
if (bodyElement && !((_a = p.treeAdapter.getNodeSourceCodeLocation(bodyElement)) === null || _a === void 0 ? void 0 : _a.endTag)) {
|
||
|
p._setEndLocation(bodyElement, token);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
tokenAfterBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
function tokenAfterBody(p, token) {
|
||
|
p.insertionMode = InsertionMode.IN_BODY;
|
||
|
modeInBody(p, token);
|
||
|
}
|
||
|
// The "in frameset" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagInFrameset(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FRAMESET: {
|
||
|
p._insertElement(token, NS.HTML);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.FRAME: {
|
||
|
p._appendElement(token, NS.HTML);
|
||
|
token.ackSelfClosing = true;
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOFRAMES: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
function endTagInFrameset(p, token) {
|
||
|
if (token.tagID === TAG_ID.FRAMESET && !p.openElements.isRootHtmlElementCurrent()) {
|
||
|
p.openElements.pop();
|
||
|
if (!p.fragmentContext && p.openElements.currentTagId !== TAG_ID.FRAMESET) {
|
||
|
p.insertionMode = InsertionMode.AFTER_FRAMESET;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
// The "after frameset" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagAfterFrameset(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOFRAMES: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
function endTagAfterFrameset(p, token) {
|
||
|
if (token.tagID === TAG_ID.HTML) {
|
||
|
p.insertionMode = InsertionMode.AFTER_AFTER_FRAMESET;
|
||
|
}
|
||
|
}
|
||
|
// The "after after body" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagAfterAfterBody(p, token) {
|
||
|
if (token.tagID === TAG_ID.HTML) {
|
||
|
startTagInBody(p, token);
|
||
|
}
|
||
|
else {
|
||
|
tokenAfterAfterBody(p, token);
|
||
|
}
|
||
|
}
|
||
|
function tokenAfterAfterBody(p, token) {
|
||
|
p.insertionMode = InsertionMode.IN_BODY;
|
||
|
modeInBody(p, token);
|
||
|
}
|
||
|
// The "after after frameset" insertion mode
|
||
|
//------------------------------------------------------------------
|
||
|
function startTagAfterAfterFrameset(p, token) {
|
||
|
switch (token.tagID) {
|
||
|
case TAG_ID.HTML: {
|
||
|
startTagInBody(p, token);
|
||
|
break;
|
||
|
}
|
||
|
case TAG_ID.NOFRAMES: {
|
||
|
startTagInHead(p, token);
|
||
|
break;
|
||
|
}
|
||
|
// Do nothing
|
||
|
}
|
||
|
}
|
||
|
// The rules for parsing tokens in foreign content
|
||
|
//------------------------------------------------------------------
|
||
|
function nullCharacterInForeignContent(p, token) {
|
||
|
token.chars = REPLACEMENT_CHARACTER;
|
||
|
p._insertCharacters(token);
|
||
|
}
|
||
|
function characterInForeignContent(p, token) {
|
||
|
p._insertCharacters(token);
|
||
|
p.framesetOk = false;
|
||
|
}
|
||
|
function popUntilHtmlOrIntegrationPoint(p) {
|
||
|
while (p.treeAdapter.getNamespaceURI(p.openElements.current) !== NS.HTML &&
|
||
|
!p._isIntegrationPoint(p.openElements.currentTagId, p.openElements.current)) {
|
||
|
p.openElements.pop();
|
||
|
}
|
||
|
}
|
||
|
function startTagInForeignContent(p, token) {
|
||
|
if (causesExit(token)) {
|
||
|
popUntilHtmlOrIntegrationPoint(p);
|
||
|
p._startTagOutsideForeignContent(token);
|
||
|
}
|
||
|
else {
|
||
|
const current = p._getAdjustedCurrentElement();
|
||
|
const currentNs = p.treeAdapter.getNamespaceURI(current);
|
||
|
if (currentNs === NS.MATHML) {
|
||
|
adjustTokenMathMLAttrs(token);
|
||
|
}
|
||
|
else if (currentNs === NS.SVG) {
|
||
|
adjustTokenSVGTagName(token);
|
||
|
adjustTokenSVGAttrs(token);
|
||
|
}
|
||
|
adjustTokenXMLAttrs(token);
|
||
|
if (token.selfClosing) {
|
||
|
p._appendElement(token, currentNs);
|
||
|
}
|
||
|
else {
|
||
|
p._insertElement(token, currentNs);
|
||
|
}
|
||
|
token.ackSelfClosing = true;
|
||
|
}
|
||
|
}
|
||
|
function endTagInForeignContent(p, token) {
|
||
|
if (token.tagID === TAG_ID.P || token.tagID === TAG_ID.BR) {
|
||
|
popUntilHtmlOrIntegrationPoint(p);
|
||
|
p._endTagOutsideForeignContent(token);
|
||
|
return;
|
||
|
}
|
||
|
for (let i = p.openElements.stackTop; i > 0; i--) {
|
||
|
const element = p.openElements.items[i];
|
||
|
if (p.treeAdapter.getNamespaceURI(element) === NS.HTML) {
|
||
|
p._endTagOutsideForeignContent(token);
|
||
|
break;
|
||
|
}
|
||
|
const tagName = p.treeAdapter.getTagName(element);
|
||
|
if (tagName.toLowerCase() === token.tagName) {
|
||
|
//NOTE: update the token tag name for `_setEndLocation`.
|
||
|
token.tagName = tagName;
|
||
|
p.openElements.shortenToLength(i);
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Shorthands
|
||
|
/**
|
||
|
* Parses an HTML string.
|
||
|
*
|
||
|
* @param html Input HTML string.
|
||
|
* @param options Parsing options.
|
||
|
* @returns Document
|
||
|
*
|
||
|
* @example
|
||
|
*
|
||
|
* ```js
|
||
|
* const parse5 = require('parse5');
|
||
|
*
|
||
|
* const document = parse5.parse('<!DOCTYPE html><html><head></head><body>Hi there!</body></html>');
|
||
|
*
|
||
|
* console.log(document.childNodes[1].tagName); //> 'html'
|
||
|
*```
|
||
|
*/
|
||
|
function parse(html, options) {
|
||
|
return Parser.parse(html, options);
|
||
|
}
|
||
|
|
||
|
export { ERR as ErrorCodes, Parser, Tokenizer, TokenizerMode, defaultTreeAdapter, parse };
|