diff --git a/.eslintrc.json b/.eslintrc.json index 486d7bd1..4839447e 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -179,7 +179,7 @@ "ext/bg/js/options.js", "ext/bg/js/profile-conditions.js", "ext/bg/js/request-builder.js", - "ext/bg/js/native-simple-dom-parser.js", + "ext/bg/js/simple-dom-parser.js", "ext/bg/js/text-source-map.js", "ext/bg/js/translator.js", "ext/bg/js/backend.js", diff --git a/README.md b/README.md index d487ad07..205f7469 100644 --- a/README.md +++ b/README.md @@ -266,6 +266,7 @@ versions packaged. * jQuery: [homepage](https://blog.jquery.com/) - [snapshot](https://code.jquery.com/jquery-3.2.1.min.js) - [license](https://github.com/jquery/jquery/blob/3.2.1/LICENSE.txt) * JSZip: [homepage](https://stuk.github.io/jszip/) - [snapshot](https://raw.githubusercontent.com/Stuk/jszip/de7f52fbcba485737bef7923a83f0fad92d9f5bc/dist/jszip.min.js) - [license](https://github.com/Stuk/jszip/blob/v3.1.3/LICENSE.markdown) * WanaKana: [homepage](https://wanakana.com/) - [snapshot](https://unpkg.com/wanakana@4.0.2/umd/wanakana.min.js) - [license](https://github.com/WaniKani/WanaKana/blob/4.0.2/LICENSE) +* parse5: [homepage](https://github.com/inikulin/parse5) - [snapshot](https://github.com/inikulin/parse5/tree/v6.0.1/packages/parse5) - [license](https://github.com/inikulin/parse5/blob/v6.0.1/LICENSE) _(Only used in MV3 build)_ ## Frequently Asked Questions ## diff --git a/dev/build-libs.js b/dev/build-libs.js new file mode 100644 index 00000000..138fe089 --- /dev/null +++ b/dev/build-libs.js @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2020 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +const fs = require('fs'); +const path = require('path'); +const browserify = require('browserify'); + +async function main() { + const extLibPath = path.join(__dirname, '..', 'ext', 'mixed', 'lib'); + const parse5Path = require.resolve('parse5'); + + const content = await new Promise((resolve, reject) => { + browserify([parse5Path], {standalone: 'parse5', debug: true}).bundle((error, result) => { + if (error) { + reject(error); + } else { + resolve(result); + } + }); + }); + + fs.writeFileSync(path.join(extLibPath, 'parse5.js'), content); +} + +main(); diff --git a/dev/data/manifest-variants.json b/dev/data/manifest-variants.json index 43d06032..75911771 100644 --- a/dev/data/manifest-variants.json +++ b/dev/data/manifest-variants.json @@ -110,7 +110,9 @@ "name": "chrome", "fileName": "yomichan-chrome.zip", "excludeFiles": [ - "sw.js" + "sw.js", + "bg/js/simple-dom-parser.js", + "mixed/lib/parse5.js" ] }, { @@ -133,7 +135,9 @@ } ], "excludeFiles": [ - "sw.js" + "sw.js", + "bg/js/simple-dom-parser.js", + "mixed/lib/parse5.js" ] }, { @@ -155,6 +159,10 @@ {"action": "move", "path": ["web_accessible_resources"], "newPath": ["web_accessible_resources_old"]}, {"action": "set", "path": ["web_accessible_resources"], "value": [{"resources": [], "matches": [""]}], "after": "web_accessible_resources_old"}, {"action": "move", "path": ["web_accessible_resources_old"], "newPath": ["web_accessible_resources", 0, "resources"]} + ], + "excludeFiles": [ + "bg/background.html", + "bg/js/native-simple-dom-parser.js" ] }, { @@ -187,7 +195,9 @@ } ], "excludeFiles": [ - "sw.js" + "sw.js", + "bg/js/simple-dom-parser.js", + "mixed/lib/parse5.js" ] }, { @@ -216,7 +226,9 @@ } ], "excludeFiles": [ - "sw.js" + "sw.js", + "bg/js/simple-dom-parser.js", + "mixed/lib/parse5.js" ] } ] diff --git a/ext/bg/js/audio-downloader.js b/ext/bg/js/audio-downloader.js index 839eab7b..d1c4a02e 100644 --- a/ext/bg/js/audio-downloader.js +++ b/ext/bg/js/audio-downloader.js @@ -17,6 +17,7 @@ /* global * NativeSimpleDOMParser + * SimpleDOMParser */ class AudioDownloader { @@ -239,8 +240,10 @@ class AudioDownloader { } _createSimpleDOMParser(content) { - if (NativeSimpleDOMParser.isSupported()) { + if (typeof NativeSimpleDOMParser !== 'undefined' && NativeSimpleDOMParser.isSupported()) { return new NativeSimpleDOMParser(content); + } else if (typeof SimpleDOMParser !== 'undefined' && SimpleDOMParser.isSupported()) { + return new SimpleDOMParser(content); } else { throw new Error('DOM parsing not supported'); } diff --git a/ext/bg/js/native-simple-dom-parser.js b/ext/bg/js/native-simple-dom-parser.js index c1752bc4..4e0d89ea 100644 --- a/ext/bg/js/native-simple-dom-parser.js +++ b/ext/bg/js/native-simple-dom-parser.js @@ -17,8 +17,6 @@ class NativeSimpleDOMParser { constructor(content) { - // TODO : Remove - // eslint-disable-next-line no-undef this._document = new DOMParser().parseFromString(content, 'text/html'); } diff --git a/ext/bg/js/simple-dom-parser.js b/ext/bg/js/simple-dom-parser.js new file mode 100644 index 00000000..391ad3d3 --- /dev/null +++ b/ext/bg/js/simple-dom-parser.js @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2020 Yomichan Authors + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +/* globals + * parse5 + */ + +class SimpleDOMParser { + constructor(content) { + this._document = parse5.parse(content); + } + + getElementById(id, root=null) { + for (const node of this._allNodes(root)) { + if (typeof node.tagName === 'string' && this.getAttribute(node, 'id') === id) { + return node; + } + } + return null; + } + + getElementByTagName(tagName, root=null) { + for (const node of this._allNodes(root)) { + if (node.tagName === tagName) { + return node; + } + } + return null; + } + + getElementsByTagName(tagName, root=null) { + const results = []; + for (const node of this._allNodes(root)) { + if (node.tagName === tagName) { + results.push(node); + } + } + return results; + } + + getElementsByClassName(className, root=null) { + const results = []; + const classNamePattern = new RegExp(`(^|\\s)${escapeRegExp(className)}(\\s|$)`); + for (const node of this._allNodes(root)) { + if (typeof node.tagName === 'string') { + const nodeClassName = this.getAttribute(node, 'class'); + if (nodeClassName !== null && classNamePattern.test(nodeClassName)) { + results.push(node); + } + } + } + return results; + } + + getAttribute(element, attribute) { + for (const attr of element.attrs) { + if ( + attr.name === attribute && + typeof attr.namespace === 'undefined' + ) { + return attr.value; + } + } + return null; + } + + getTextContent(element) { + let source = ''; + for (const node of this._allNodes(element)) { + if (node.nodeName === '#text') { + source += node.value; + } + } + return source; + } + + static isSupported() { + return typeof parse5 !== 'undefined'; + } + + // Private + + *_allNodes(root) { + if (root === null) { + root = this._document; + } + + // Depth-first pre-order traversal + const nodeQueue = [root]; + while (nodeQueue.length > 0) { + const node = nodeQueue.pop(); + + yield node; + + const childNodes = node.childNodes; + if (typeof childNodes !== 'undefined') { + for (let i = childNodes.length - 1; i >= 0; --i) { + nodeQueue.push(childNodes[i]); + } + } + } + } +} diff --git a/ext/bg/legal.html b/ext/bg/legal.html index 80364943..cda3dcc7 100644 --- a/ext/bg/legal.html +++ b/ext/bg/legal.html @@ -191,6 +191,33 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +
+

parse5 v6.0.1

+
+
+Copyright (c) 2013-2019 Ivan Nikulin (ifaaan@gmail.com, https://github.com/inikulin)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+
+
+ diff --git a/ext/mixed/lib/parse5.js b/ext/mixed/lib/parse5.js new file mode 100644 index 00000000..1519dedc --- /dev/null +++ b/ext/mixed/lib/parse5.js @@ -0,0 +1,7985 @@ +(function(f){if(typeof exports==="object"&&typeof module!=="undefined"){module.exports=f()}else if(typeof define==="function"&&define.amd){define([],f)}else{var g;if(typeof window!=="undefined"){g=window}else if(typeof global!=="undefined"){g=global}else if(typeof self!=="undefined"){g=self}else{g=this}g.parse5 = f()}})(function(){var define,module,exports;return (function(){function r(e,n,t){function o(i,f){if(!n[i]){if(!e[i]){var c="function"==typeof require&&require;if(!f&&c)return c(i,!0);if(u)return u(i,!0);var a=new Error("Cannot find module '"+i+"'");throw a.code="MODULE_NOT_FOUND",a}var p=n[i]={exports:{}};e[i][0].call(p.exports,function(r){var n=e[i][1][r];return o(n||r)},p,p.exports,r,e,n,t)}return n[i].exports}for(var u="function"==typeof require&&require,i=0;i -1) { + return DOCUMENT_MODE.QUIRKS; + } + + let prefixes = systemId === null ? QUIRKS_MODE_NO_SYSTEM_ID_PUBLIC_ID_PREFIXES : QUIRKS_MODE_PUBLIC_ID_PREFIXES; + + if (hasPrefix(publicId, prefixes)) { + return DOCUMENT_MODE.QUIRKS; + } + + prefixes = + systemId === null ? LIMITED_QUIRKS_PUBLIC_ID_PREFIXES : LIMITED_QUIRKS_WITH_SYSTEM_ID_PUBLIC_ID_PREFIXES; + + if (hasPrefix(publicId, prefixes)) { + return DOCUMENT_MODE.LIMITED_QUIRKS; + } + } + + return DOCUMENT_MODE.NO_QUIRKS; +}; + +exports.serializeContent = function(name, publicId, systemId) { + let str = '!DOCTYPE '; + + if (name) { + str += name; + } + + if (publicId) { + str += ' PUBLIC ' + enquoteDoctypeId(publicId); + } else if (systemId) { + str += ' SYSTEM'; + } + + if (systemId !== null) { + str += ' ' + enquoteDoctypeId(systemId); + } + + return str; +}; + +},{"./html":4}],2:[function(require,module,exports){ +'use strict'; + +module.exports = { + controlCharacterInInputStream: 'control-character-in-input-stream', + noncharacterInInputStream: 'noncharacter-in-input-stream', + surrogateInInputStream: 'surrogate-in-input-stream', + nonVoidHtmlElementStartTagWithTrailingSolidus: 'non-void-html-element-start-tag-with-trailing-solidus', + endTagWithAttributes: 'end-tag-with-attributes', + endTagWithTrailingSolidus: 'end-tag-with-trailing-solidus', + unexpectedSolidusInTag: 'unexpected-solidus-in-tag', + unexpectedNullCharacter: 'unexpected-null-character', + unexpectedQuestionMarkInsteadOfTagName: 'unexpected-question-mark-instead-of-tag-name', + invalidFirstCharacterOfTagName: 'invalid-first-character-of-tag-name', + unexpectedEqualsSignBeforeAttributeName: 'unexpected-equals-sign-before-attribute-name', + missingEndTagName: 'missing-end-tag-name', + unexpectedCharacterInAttributeName: 'unexpected-character-in-attribute-name', + unknownNamedCharacterReference: 'unknown-named-character-reference', + missingSemicolonAfterCharacterReference: 'missing-semicolon-after-character-reference', + unexpectedCharacterAfterDoctypeSystemIdentifier: 'unexpected-character-after-doctype-system-identifier', + unexpectedCharacterInUnquotedAttributeValue: 'unexpected-character-in-unquoted-attribute-value', + eofBeforeTagName: 'eof-before-tag-name', + eofInTag: 'eof-in-tag', + missingAttributeValue: 'missing-attribute-value', + missingWhitespaceBetweenAttributes: 'missing-whitespace-between-attributes', + missingWhitespaceAfterDoctypePublicKeyword: 'missing-whitespace-after-doctype-public-keyword', + missingWhitespaceBetweenDoctypePublicAndSystemIdentifiers: + 'missing-whitespace-between-doctype-public-and-system-identifiers', + missingWhitespaceAfterDoctypeSystemKeyword: 'missing-whitespace-after-doctype-system-keyword', + missingQuoteBeforeDoctypePublicIdentifier: 'missing-quote-before-doctype-public-identifier', + missingQuoteBeforeDoctypeSystemIdentifier: 'missing-quote-before-doctype-system-identifier', + missingDoctypePublicIdentifier: 'missing-doctype-public-identifier', + missingDoctypeSystemIdentifier: 'missing-doctype-system-identifier', + abruptDoctypePublicIdentifier: 'abrupt-doctype-public-identifier', + abruptDoctypeSystemIdentifier: 'abrupt-doctype-system-identifier', + cdataInHtmlContent: 'cdata-in-html-content', + incorrectlyOpenedComment: 'incorrectly-opened-comment', + eofInScriptHtmlCommentLikeText: 'eof-in-script-html-comment-like-text', + eofInDoctype: 'eof-in-doctype', + nestedComment: 'nested-comment', + abruptClosingOfEmptyComment: 'abrupt-closing-of-empty-comment', + eofInComment: 'eof-in-comment', + incorrectlyClosedComment: 'incorrectly-closed-comment', + eofInCdata: 'eof-in-cdata', + absenceOfDigitsInNumericCharacterReference: 'absence-of-digits-in-numeric-character-reference', + nullCharacterReference: 'null-character-reference', + surrogateCharacterReference: 'surrogate-character-reference', + characterReferenceOutsideUnicodeRange: 'character-reference-outside-unicode-range', + controlCharacterReference: 'control-character-reference', + noncharacterCharacterReference: 'noncharacter-character-reference', + missingWhitespaceBeforeDoctypeName: 'missing-whitespace-before-doctype-name', + missingDoctypeName: 'missing-doctype-name', + invalidCharacterSequenceAfterDoctypeName: 'invalid-character-sequence-after-doctype-name', + duplicateAttribute: 'duplicate-attribute', + nonConformingDoctype: 'non-conforming-doctype', + missingDoctype: 'missing-doctype', + misplacedDoctype: 'misplaced-doctype', + endTagWithoutMatchingOpenElement: 'end-tag-without-matching-open-element', + closingOfElementWithOpenChildElements: 'closing-of-element-with-open-child-elements', + disallowedContentInNoscriptInHead: 'disallowed-content-in-noscript-in-head', + openElementsLeftAfterEof: 'open-elements-left-after-eof', + abandonedHeadElementChild: 'abandoned-head-element-child', + misplacedStartTagForHeadElement: 'misplaced-start-tag-for-head-element', + nestedNoscriptInHead: 'nested-noscript-in-head', + eofInElementThatCanContainOnlyText: 'eof-in-element-that-can-contain-only-text' +}; + +},{}],3:[function(require,module,exports){ +'use strict'; + +const Tokenizer = require('../tokenizer'); +const HTML = require('./html'); + +//Aliases +const $ = HTML.TAG_NAMES; +const NS = HTML.NAMESPACES; +const ATTRS = HTML.ATTRS; + +//MIME types +const MIME_TYPES = { + TEXT_HTML: 'text/html', + APPLICATION_XML: 'application/xhtml+xml' +}; + +//Attributes +const DEFINITION_URL_ATTR = 'definitionurl'; +const ADJUSTED_DEFINITION_URL_ATTR = 'definitionURL'; +const SVG_ATTRS_ADJUSTMENT_MAP = { + attributename: 'attributeName', + attributetype: 'attributeType', + basefrequency: 'baseFrequency', + baseprofile: 'baseProfile', + calcmode: 'calcMode', + clippathunits: 'clipPathUnits', + diffuseconstant: 'diffuseConstant', + edgemode: 'edgeMode', + filterunits: 'filterUnits', + glyphref: 'glyphRef', + gradienttransform: 'gradientTransform', + gradientunits: 'gradientUnits', + kernelmatrix: 'kernelMatrix', + kernelunitlength: 'kernelUnitLength', + keypoints: 'keyPoints', + keysplines: 'keySplines', + keytimes: 'keyTimes', + lengthadjust: 'lengthAdjust', + limitingconeangle: 'limitingConeAngle', + markerheight: 'markerHeight', + markerunits: 'markerUnits', + markerwidth: 'markerWidth', + maskcontentunits: 'maskContentUnits', + maskunits: 'maskUnits', + numoctaves: 'numOctaves', + pathlength: 'pathLength', + patterncontentunits: 'patternContentUnits', + patterntransform: 'patternTransform', + patternunits: 'patternUnits', + pointsatx: 'pointsAtX', + pointsaty: 'pointsAtY', + pointsatz: 'pointsAtZ', + preservealpha: 'preserveAlpha', + preserveaspectratio: 'preserveAspectRatio', + primitiveunits: 'primitiveUnits', + refx: 'refX', + refy: 'refY', + repeatcount: 'repeatCount', + repeatdur: 'repeatDur', + requiredextensions: 'requiredExtensions', + requiredfeatures: 'requiredFeatures', + specularconstant: 'specularConstant', + specularexponent: 'specularExponent', + spreadmethod: 'spreadMethod', + startoffset: 'startOffset', + stddeviation: 'stdDeviation', + stitchtiles: 'stitchTiles', + surfacescale: 'surfaceScale', + systemlanguage: 'systemLanguage', + tablevalues: 'tableValues', + targetx: 'targetX', + targety: 'targetY', + textlength: 'textLength', + viewbox: 'viewBox', + viewtarget: 'viewTarget', + xchannelselector: 'xChannelSelector', + ychannelselector: 'yChannelSelector', + zoomandpan: 'zoomAndPan' +}; + +const XML_ATTRS_ADJUSTMENT_MAP = { + 'xlink:actuate': { prefix: 'xlink', name: 'actuate', namespace: NS.XLINK }, + 'xlink:arcrole': { prefix: 'xlink', name: 'arcrole', namespace: NS.XLINK }, + 'xlink:href': { prefix: 'xlink', name: 'href', namespace: NS.XLINK }, + 'xlink:role': { prefix: 'xlink', name: 'role', namespace: NS.XLINK }, + 'xlink:show': { prefix: 'xlink', name: 'show', namespace: NS.XLINK }, + 'xlink:title': { prefix: 'xlink', name: 'title', namespace: NS.XLINK }, + 'xlink:type': { prefix: 'xlink', name: 'type', namespace: NS.XLINK }, + 'xml:base': { prefix: 'xml', name: 'base', namespace: NS.XML }, + 'xml:lang': { prefix: 'xml', name: 'lang', namespace: NS.XML }, + 'xml:space': { prefix: 'xml', name: 'space', namespace: NS.XML }, + xmlns: { prefix: '', name: 'xmlns', namespace: NS.XMLNS }, + 'xmlns:xlink': { prefix: 'xmlns', name: 'xlink', namespace: NS.XMLNS } +}; + +//SVG tag names adjustment map +const SVG_TAG_NAMES_ADJUSTMENT_MAP = (exports.SVG_TAG_NAMES_ADJUSTMENT_MAP = { + altglyph: 'altGlyph', + altglyphdef: 'altGlyphDef', + altglyphitem: 'altGlyphItem', + animatecolor: 'animateColor', + animatemotion: 'animateMotion', + animatetransform: 'animateTransform', + clippath: 'clipPath', + feblend: 'feBlend', + fecolormatrix: 'feColorMatrix', + fecomponenttransfer: 'feComponentTransfer', + fecomposite: 'feComposite', + feconvolvematrix: 'feConvolveMatrix', + fediffuselighting: 'feDiffuseLighting', + fedisplacementmap: 'feDisplacementMap', + fedistantlight: 'feDistantLight', + feflood: 'feFlood', + fefunca: 'feFuncA', + fefuncb: 'feFuncB', + fefuncg: 'feFuncG', + fefuncr: 'feFuncR', + fegaussianblur: 'feGaussianBlur', + feimage: 'feImage', + femerge: 'feMerge', + femergenode: 'feMergeNode', + femorphology: 'feMorphology', + feoffset: 'feOffset', + fepointlight: 'fePointLight', + fespecularlighting: 'feSpecularLighting', + fespotlight: 'feSpotLight', + fetile: 'feTile', + feturbulence: 'feTurbulence', + foreignobject: 'foreignObject', + glyphref: 'glyphRef', + lineargradient: 'linearGradient', + radialgradient: 'radialGradient', + textpath: 'textPath' +}); + +//Tags that causes exit from foreign content +const EXITS_FOREIGN_CONTENT = { + [$.B]: true, + [$.BIG]: true, + [$.BLOCKQUOTE]: true, + [$.BODY]: true, + [$.BR]: true, + [$.CENTER]: true, + [$.CODE]: true, + [$.DD]: true, + [$.DIV]: true, + [$.DL]: true, + [$.DT]: true, + [$.EM]: true, + [$.EMBED]: true, + [$.H1]: true, + [$.H2]: true, + [$.H3]: true, + [$.H4]: true, + [$.H5]: true, + [$.H6]: true, + [$.HEAD]: true, + [$.HR]: true, + [$.I]: true, + [$.IMG]: true, + [$.LI]: true, + [$.LISTING]: true, + [$.MENU]: true, + [$.META]: true, + [$.NOBR]: true, + [$.OL]: true, + [$.P]: true, + [$.PRE]: true, + [$.RUBY]: true, + [$.S]: true, + [$.SMALL]: true, + [$.SPAN]: true, + [$.STRONG]: true, + [$.STRIKE]: true, + [$.SUB]: true, + [$.SUP]: true, + [$.TABLE]: true, + [$.TT]: true, + [$.U]: true, + [$.UL]: true, + [$.VAR]: true +}; + +//Check exit from foreign content +exports.causesExit = function(startTagToken) { + const tn = startTagToken.tagName; + const isFontWithAttrs = + tn === $.FONT && + (Tokenizer.getTokenAttr(startTagToken, ATTRS.COLOR) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.SIZE) !== null || + Tokenizer.getTokenAttr(startTagToken, ATTRS.FACE) !== null); + + return isFontWithAttrs ? true : EXITS_FOREIGN_CONTENT[tn]; +}; + +//Token adjustments +exports.adjustTokenMathMLAttrs = function(token) { + for (let i = 0; i < token.attrs.length; i++) { + if (token.attrs[i].name === DEFINITION_URL_ATTR) { + token.attrs[i].name = ADJUSTED_DEFINITION_URL_ATTR; + break; + } + } +}; + +exports.adjustTokenSVGAttrs = function(token) { + for (let i = 0; i < token.attrs.length; i++) { + const adjustedAttrName = SVG_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrName) { + token.attrs[i].name = adjustedAttrName; + } + } +}; + +exports.adjustTokenXMLAttrs = function(token) { + for (let i = 0; i < token.attrs.length; i++) { + const adjustedAttrEntry = XML_ATTRS_ADJUSTMENT_MAP[token.attrs[i].name]; + + if (adjustedAttrEntry) { + token.attrs[i].prefix = adjustedAttrEntry.prefix; + token.attrs[i].name = adjustedAttrEntry.name; + token.attrs[i].namespace = adjustedAttrEntry.namespace; + } + } +}; + +exports.adjustTokenSVGTagName = function(token) { + const adjustedTagName = SVG_TAG_NAMES_ADJUSTMENT_MAP[token.tagName]; + + if (adjustedTagName) { + token.tagName = adjustedTagName; + } +}; + +//Integration points +function isMathMLTextIntegrationPoint(tn, ns) { + return ns === NS.MATHML && (tn === $.MI || tn === $.MO || tn === $.MN || tn === $.MS || tn === $.MTEXT); +} + +function isHtmlIntegrationPoint(tn, ns, attrs) { + if (ns === NS.MATHML && tn === $.ANNOTATION_XML) { + for (let i = 0; i < attrs.length; i++) { + if (attrs[i].name === ATTRS.ENCODING) { + const value = attrs[i].value.toLowerCase(); + + return value === MIME_TYPES.TEXT_HTML || value === MIME_TYPES.APPLICATION_XML; + } + } + } + + return ns === NS.SVG && (tn === $.FOREIGN_OBJECT || tn === $.DESC || tn === $.TITLE); +} + +exports.isIntegrationPoint = function(tn, ns, attrs, foreignNS) { + if ((!foreignNS || foreignNS === NS.HTML) && isHtmlIntegrationPoint(tn, ns, attrs)) { + return true; + } + + if ((!foreignNS || foreignNS === NS.MATHML) && isMathMLTextIntegrationPoint(tn, ns)) { + return true; + } + + return false; +}; + +},{"../tokenizer":19,"./html":4}],4:[function(require,module,exports){ +'use strict'; + +const NS = (exports.NAMESPACES = { + HTML: 'http://www.w3.org/1999/xhtml', + MATHML: 'http://www.w3.org/1998/Math/MathML', + SVG: 'http://www.w3.org/2000/svg', + XLINK: 'http://www.w3.org/1999/xlink', + XML: 'http://www.w3.org/XML/1998/namespace', + XMLNS: 'http://www.w3.org/2000/xmlns/' +}); + +exports.ATTRS = { + TYPE: 'type', + ACTION: 'action', + ENCODING: 'encoding', + PROMPT: 'prompt', + NAME: 'name', + COLOR: 'color', + FACE: 'face', + SIZE: 'size' +}; + +exports.DOCUMENT_MODE = { + NO_QUIRKS: 'no-quirks', + QUIRKS: 'quirks', + LIMITED_QUIRKS: 'limited-quirks' +}; + +const $ = (exports.TAG_NAMES = { + A: 'a', + ADDRESS: 'address', + ANNOTATION_XML: 'annotation-xml', + APPLET: 'applet', + AREA: 'area', + ARTICLE: 'article', + ASIDE: 'aside', + + B: 'b', + BASE: 'base', + BASEFONT: 'basefont', + BGSOUND: 'bgsound', + BIG: 'big', + BLOCKQUOTE: 'blockquote', + BODY: 'body', + BR: 'br', + BUTTON: 'button', + + CAPTION: 'caption', + CENTER: 'center', + CODE: 'code', + COL: 'col', + COLGROUP: 'colgroup', + + DD: 'dd', + DESC: 'desc', + DETAILS: 'details', + DIALOG: 'dialog', + DIR: 'dir', + DIV: 'div', + DL: 'dl', + DT: 'dt', + + EM: 'em', + EMBED: 'embed', + + FIELDSET: 'fieldset', + FIGCAPTION: 'figcaption', + FIGURE: 'figure', + FONT: 'font', + FOOTER: 'footer', + FOREIGN_OBJECT: 'foreignObject', + FORM: 'form', + FRAME: 'frame', + FRAMESET: 'frameset', + + H1: 'h1', + H2: 'h2', + H3: 'h3', + H4: 'h4', + H5: 'h5', + H6: 'h6', + HEAD: 'head', + HEADER: 'header', + HGROUP: 'hgroup', + HR: 'hr', + HTML: 'html', + + I: 'i', + IMG: 'img', + IMAGE: 'image', + INPUT: 'input', + IFRAME: 'iframe', + + KEYGEN: 'keygen', + + LABEL: 'label', + LI: 'li', + LINK: 'link', + LISTING: 'listing', + + MAIN: 'main', + MALIGNMARK: 'malignmark', + MARQUEE: 'marquee', + MATH: 'math', + MENU: 'menu', + META: 'meta', + MGLYPH: 'mglyph', + MI: 'mi', + MO: 'mo', + MN: 'mn', + MS: 'ms', + MTEXT: 'mtext', + + NAV: 'nav', + NOBR: 'nobr', + NOFRAMES: 'noframes', + NOEMBED: 'noembed', + NOSCRIPT: 'noscript', + + OBJECT: 'object', + OL: 'ol', + OPTGROUP: 'optgroup', + OPTION: 'option', + + P: 'p', + PARAM: 'param', + PLAINTEXT: 'plaintext', + PRE: 'pre', + + RB: 'rb', + RP: 'rp', + RT: 'rt', + RTC: 'rtc', + RUBY: 'ruby', + + S: 's', + SCRIPT: 'script', + SECTION: 'section', + SELECT: 'select', + SOURCE: 'source', + SMALL: 'small', + SPAN: 'span', + STRIKE: 'strike', + STRONG: 'strong', + STYLE: 'style', + SUB: 'sub', + SUMMARY: 'summary', + SUP: 'sup', + + TABLE: 'table', + TBODY: 'tbody', + TEMPLATE: 'template', + TEXTAREA: 'textarea', + TFOOT: 'tfoot', + TD: 'td', + TH: 'th', + THEAD: 'thead', + TITLE: 'title', + TR: 'tr', + TRACK: 'track', + TT: 'tt', + + U: 'u', + UL: 'ul', + + SVG: 'svg', + + VAR: 'var', + + WBR: 'wbr', + + XMP: 'xmp' +}); + +exports.SPECIAL_ELEMENTS = { + [NS.HTML]: { + [$.ADDRESS]: true, + [$.APPLET]: true, + [$.AREA]: true, + [$.ARTICLE]: true, + [$.ASIDE]: true, + [$.BASE]: true, + [$.BASEFONT]: true, + [$.BGSOUND]: true, + [$.BLOCKQUOTE]: true, + [$.BODY]: true, + [$.BR]: true, + [$.BUTTON]: true, + [$.CAPTION]: true, + [$.CENTER]: true, + [$.COL]: true, + [$.COLGROUP]: true, + [$.DD]: true, + [$.DETAILS]: true, + [$.DIR]: true, + [$.DIV]: true, + [$.DL]: true, + [$.DT]: true, + [$.EMBED]: true, + [$.FIELDSET]: true, + [$.FIGCAPTION]: true, + [$.FIGURE]: true, + [$.FOOTER]: true, + [$.FORM]: true, + [$.FRAME]: true, + [$.FRAMESET]: true, + [$.H1]: true, + [$.H2]: true, + [$.H3]: true, + [$.H4]: true, + [$.H5]: true, + [$.H6]: true, + [$.HEAD]: true, + [$.HEADER]: true, + [$.HGROUP]: true, + [$.HR]: true, + [$.HTML]: true, + [$.IFRAME]: true, + [$.IMG]: true, + [$.INPUT]: true, + [$.LI]: true, + [$.LINK]: true, + [$.LISTING]: true, + [$.MAIN]: true, + [$.MARQUEE]: true, + [$.MENU]: true, + [$.META]: true, + [$.NAV]: true, + [$.NOEMBED]: true, + [$.NOFRAMES]: true, + [$.NOSCRIPT]: true, + [$.OBJECT]: true, + [$.OL]: true, + [$.P]: true, + [$.PARAM]: true, + [$.PLAINTEXT]: true, + [$.PRE]: true, + [$.SCRIPT]: true, + [$.SECTION]: true, + [$.SELECT]: true, + [$.SOURCE]: true, + [$.STYLE]: true, + [$.SUMMARY]: true, + [$.TABLE]: true, + [$.TBODY]: true, + [$.TD]: true, + [$.TEMPLATE]: true, + [$.TEXTAREA]: true, + [$.TFOOT]: true, + [$.TH]: true, + [$.THEAD]: true, + [$.TITLE]: true, + [$.TR]: true, + [$.TRACK]: true, + [$.UL]: true, + [$.WBR]: true, + [$.XMP]: true + }, + [NS.MATHML]: { + [$.MI]: true, + [$.MO]: true, + [$.MN]: true, + [$.MS]: true, + [$.MTEXT]: true, + [$.ANNOTATION_XML]: true + }, + [NS.SVG]: { + [$.TITLE]: true, + [$.FOREIGN_OBJECT]: true, + [$.DESC]: true + } +}; + +},{}],5:[function(require,module,exports){ +'use strict'; + +const UNDEFINED_CODE_POINTS = [ + 0xfffe, + 0xffff, + 0x1fffe, + 0x1ffff, + 0x2fffe, + 0x2ffff, + 0x3fffe, + 0x3ffff, + 0x4fffe, + 0x4ffff, + 0x5fffe, + 0x5ffff, + 0x6fffe, + 0x6ffff, + 0x7fffe, + 0x7ffff, + 0x8fffe, + 0x8ffff, + 0x9fffe, + 0x9ffff, + 0xafffe, + 0xaffff, + 0xbfffe, + 0xbffff, + 0xcfffe, + 0xcffff, + 0xdfffe, + 0xdffff, + 0xefffe, + 0xeffff, + 0xffffe, + 0xfffff, + 0x10fffe, + 0x10ffff +]; + +exports.REPLACEMENT_CHARACTER = '\uFFFD'; + +exports.CODE_POINTS = { + EOF: -1, + NULL: 0x00, + TABULATION: 0x09, + CARRIAGE_RETURN: 0x0d, + LINE_FEED: 0x0a, + FORM_FEED: 0x0c, + SPACE: 0x20, + EXCLAMATION_MARK: 0x21, + QUOTATION_MARK: 0x22, + NUMBER_SIGN: 0x23, + AMPERSAND: 0x26, + APOSTROPHE: 0x27, + HYPHEN_MINUS: 0x2d, + SOLIDUS: 0x2f, + DIGIT_0: 0x30, + DIGIT_9: 0x39, + SEMICOLON: 0x3b, + LESS_THAN_SIGN: 0x3c, + EQUALS_SIGN: 0x3d, + GREATER_THAN_SIGN: 0x3e, + QUESTION_MARK: 0x3f, + LATIN_CAPITAL_A: 0x41, + LATIN_CAPITAL_F: 0x46, + LATIN_CAPITAL_X: 0x58, + LATIN_CAPITAL_Z: 0x5a, + RIGHT_SQUARE_BRACKET: 0x5d, + GRAVE_ACCENT: 0x60, + LATIN_SMALL_A: 0x61, + LATIN_SMALL_F: 0x66, + LATIN_SMALL_X: 0x78, + LATIN_SMALL_Z: 0x7a, + REPLACEMENT_CHARACTER: 0xfffd +}; + +exports.CODE_POINT_SEQUENCES = { + DASH_DASH_STRING: [0x2d, 0x2d], //-- + DOCTYPE_STRING: [0x44, 0x4f, 0x43, 0x54, 0x59, 0x50, 0x45], //DOCTYPE + CDATA_START_STRING: [0x5b, 0x43, 0x44, 0x41, 0x54, 0x41, 0x5b], //[CDATA[ + SCRIPT_STRING: [0x73, 0x63, 0x72, 0x69, 0x70, 0x74], //script + PUBLIC_STRING: [0x50, 0x55, 0x42, 0x4c, 0x49, 0x43], //PUBLIC + SYSTEM_STRING: [0x53, 0x59, 0x53, 0x54, 0x45, 0x4d] //SYSTEM +}; + +//Surrogates +exports.isSurrogate = function(cp) { + return cp >= 0xd800 && cp <= 0xdfff; +}; + +exports.isSurrogatePair = function(cp) { + return cp >= 0xdc00 && cp <= 0xdfff; +}; + +exports.getSurrogatePairCodePoint = function(cp1, cp2) { + return (cp1 - 0xd800) * 0x400 + 0x2400 + cp2; +}; + +//NOTE: excluding NULL and ASCII whitespace +exports.isControlCodePoint = function(cp) { + return ( + (cp !== 0x20 && cp !== 0x0a && cp !== 0x0d && cp !== 0x09 && cp !== 0x0c && cp >= 0x01 && cp <= 0x1f) || + (cp >= 0x7f && cp <= 0x9f) + ); +}; + +exports.isUndefinedCodePoint = function(cp) { + return (cp >= 0xfdd0 && cp <= 0xfdef) || UNDEFINED_CODE_POINTS.indexOf(cp) > -1; +}; + +},{}],6:[function(require,module,exports){ +'use strict'; + +const Mixin = require('../../utils/mixin'); + +class ErrorReportingMixinBase extends Mixin { + constructor(host, opts) { + super(host); + + this.posTracker = null; + this.onParseError = opts.onParseError; + } + + _setErrorLocation(err) { + err.startLine = err.endLine = this.posTracker.line; + err.startCol = err.endCol = this.posTracker.col; + err.startOffset = err.endOffset = this.posTracker.offset; + } + + _reportError(code) { + const err = { + code: code, + startLine: -1, + startCol: -1, + startOffset: -1, + endLine: -1, + endCol: -1, + endOffset: -1 + }; + + this._setErrorLocation(err); + this.onParseError(err); + } + + _getOverriddenMethods(mxn) { + return { + _err(code) { + mxn._reportError(code); + } + }; + } +} + +module.exports = ErrorReportingMixinBase; + +},{"../../utils/mixin":24}],7:[function(require,module,exports){ +'use strict'; + +const ErrorReportingMixinBase = require('./mixin-base'); +const ErrorReportingTokenizerMixin = require('./tokenizer-mixin'); +const LocationInfoTokenizerMixin = require('../location-info/tokenizer-mixin'); +const Mixin = require('../../utils/mixin'); + +class ErrorReportingParserMixin extends ErrorReportingMixinBase { + constructor(parser, opts) { + super(parser, opts); + + this.opts = opts; + this.ctLoc = null; + this.locBeforeToken = false; + } + + _setErrorLocation(err) { + if (this.ctLoc) { + err.startLine = this.ctLoc.startLine; + err.startCol = this.ctLoc.startCol; + err.startOffset = this.ctLoc.startOffset; + + err.endLine = this.locBeforeToken ? this.ctLoc.startLine : this.ctLoc.endLine; + err.endCol = this.locBeforeToken ? this.ctLoc.startCol : this.ctLoc.endCol; + err.endOffset = this.locBeforeToken ? this.ctLoc.startOffset : this.ctLoc.endOffset; + } + } + + _getOverriddenMethods(mxn, orig) { + return { + _bootstrap(document, fragmentContext) { + orig._bootstrap.call(this, document, fragmentContext); + + Mixin.install(this.tokenizer, ErrorReportingTokenizerMixin, mxn.opts); + Mixin.install(this.tokenizer, LocationInfoTokenizerMixin); + }, + + _processInputToken(token) { + mxn.ctLoc = token.location; + + orig._processInputToken.call(this, token); + }, + + _err(code, options) { + mxn.locBeforeToken = options && options.beforeToken; + mxn._reportError(code); + } + }; + } +} + +module.exports = ErrorReportingParserMixin; + +},{"../../utils/mixin":24,"../location-info/tokenizer-mixin":12,"./mixin-base":6,"./tokenizer-mixin":9}],8:[function(require,module,exports){ +'use strict'; + +const ErrorReportingMixinBase = require('./mixin-base'); +const PositionTrackingPreprocessorMixin = require('../position-tracking/preprocessor-mixin'); +const Mixin = require('../../utils/mixin'); + +class ErrorReportingPreprocessorMixin extends ErrorReportingMixinBase { + constructor(preprocessor, opts) { + super(preprocessor, opts); + + this.posTracker = Mixin.install(preprocessor, PositionTrackingPreprocessorMixin); + this.lastErrOffset = -1; + } + + _reportError(code) { + //NOTE: avoid reporting error twice on advance/retreat + if (this.lastErrOffset !== this.posTracker.offset) { + this.lastErrOffset = this.posTracker.offset; + super._reportError(code); + } + } +} + +module.exports = ErrorReportingPreprocessorMixin; + +},{"../../utils/mixin":24,"../position-tracking/preprocessor-mixin":13,"./mixin-base":6}],9:[function(require,module,exports){ +'use strict'; + +const ErrorReportingMixinBase = require('./mixin-base'); +const ErrorReportingPreprocessorMixin = require('./preprocessor-mixin'); +const Mixin = require('../../utils/mixin'); + +class ErrorReportingTokenizerMixin extends ErrorReportingMixinBase { + constructor(tokenizer, opts) { + super(tokenizer, opts); + + const preprocessorMixin = Mixin.install(tokenizer.preprocessor, ErrorReportingPreprocessorMixin, opts); + + this.posTracker = preprocessorMixin.posTracker; + } +} + +module.exports = ErrorReportingTokenizerMixin; + +},{"../../utils/mixin":24,"./mixin-base":6,"./preprocessor-mixin":8}],10:[function(require,module,exports){ +'use strict'; + +const Mixin = require('../../utils/mixin'); + +class LocationInfoOpenElementStackMixin extends Mixin { + constructor(stack, opts) { + super(stack); + + this.onItemPop = opts.onItemPop; + } + + _getOverriddenMethods(mxn, orig) { + return { + pop() { + mxn.onItemPop(this.current); + orig.pop.call(this); + }, + + popAllUpToHtmlElement() { + for (let i = this.stackTop; i > 0; i--) { + mxn.onItemPop(this.items[i]); + } + + orig.popAllUpToHtmlElement.call(this); + }, + + remove(element) { + mxn.onItemPop(this.current); + orig.remove.call(this, element); + } + }; + } +} + +module.exports = LocationInfoOpenElementStackMixin; + +},{"../../utils/mixin":24}],11:[function(require,module,exports){ +'use strict'; + +const Mixin = require('../../utils/mixin'); +const Tokenizer = require('../../tokenizer'); +const LocationInfoTokenizerMixin = require('./tokenizer-mixin'); +const LocationInfoOpenElementStackMixin = require('./open-element-stack-mixin'); +const HTML = require('../../common/html'); + +//Aliases +const $ = HTML.TAG_NAMES; + +class LocationInfoParserMixin extends Mixin { + constructor(parser) { + super(parser); + + this.parser = parser; + this.treeAdapter = this.parser.treeAdapter; + this.posTracker = null; + this.lastStartTagToken = null; + this.lastFosterParentingLocation = null; + this.currentToken = null; + } + + _setStartLocation(element) { + let loc = null; + + if (this.lastStartTagToken) { + loc = Object.assign({}, this.lastStartTagToken.location); + loc.startTag = this.lastStartTagToken.location; + } + + this.treeAdapter.setNodeSourceCodeLocation(element, loc); + } + + _setEndLocation(element, closingToken) { + const loc = this.treeAdapter.getNodeSourceCodeLocation(element); + + if (loc) { + if (closingToken.location) { + const ctLoc = closingToken.location; + const tn = this.treeAdapter.getTagName(element); + + // NOTE: For cases like

- First 'p' closes without a closing + // tag and for cases like

- 'p' closes without a closing tag. + const isClosingEndTag = closingToken.type === Tokenizer.END_TAG_TOKEN && tn === closingToken.tagName; + const endLoc = {}; + if (isClosingEndTag) { + endLoc.endTag = Object.assign({}, ctLoc); + endLoc.endLine = ctLoc.endLine; + endLoc.endCol = ctLoc.endCol; + endLoc.endOffset = ctLoc.endOffset; + } else { + endLoc.endLine = ctLoc.startLine; + endLoc.endCol = ctLoc.startCol; + endLoc.endOffset = ctLoc.startOffset; + } + + this.treeAdapter.updateNodeSourceCodeLocation(element, endLoc); + } + } + } + + _getOverriddenMethods(mxn, orig) { + return { + _bootstrap(document, fragmentContext) { + orig._bootstrap.call(this, document, fragmentContext); + + mxn.lastStartTagToken = null; + mxn.lastFosterParentingLocation = null; + mxn.currentToken = null; + + const tokenizerMixin = Mixin.install(this.tokenizer, LocationInfoTokenizerMixin); + + mxn.posTracker = tokenizerMixin.posTracker; + + Mixin.install(this.openElements, LocationInfoOpenElementStackMixin, { + onItemPop: function(element) { + mxn._setEndLocation(element, mxn.currentToken); + } + }); + }, + + _runParsingLoop(scriptHandler) { + orig._runParsingLoop.call(this, scriptHandler); + + // NOTE: generate location info for elements + // that remains on open element stack + for (let i = this.openElements.stackTop; i >= 0; i--) { + mxn._setEndLocation(this.openElements.items[i], mxn.currentToken); + } + }, + + //Token processing + _processTokenInForeignContent(token) { + mxn.currentToken = token; + orig._processTokenInForeignContent.call(this, token); + }, + + _processToken(token) { + mxn.currentToken = token; + orig._processToken.call(this, token); + + //NOTE: and are never popped from the stack, so we need to updated + //their end location explicitly. + const requireExplicitUpdate = + token.type === Tokenizer.END_TAG_TOKEN && + (token.tagName === $.HTML || (token.tagName === $.BODY && this.openElements.hasInScope($.BODY))); + + if (requireExplicitUpdate) { + for (let i = this.openElements.stackTop; i >= 0; i--) { + const element = this.openElements.items[i]; + + if (this.treeAdapter.getTagName(element) === token.tagName) { + mxn._setEndLocation(element, token); + break; + } + } + } + }, + + //Doctype + _setDocumentType(token) { + orig._setDocumentType.call(this, token); + + const documentChildren = this.treeAdapter.getChildNodes(this.document); + const cnLength = documentChildren.length; + + for (let i = 0; i < cnLength; i++) { + const node = documentChildren[i]; + + if (this.treeAdapter.isDocumentTypeNode(node)) { + this.treeAdapter.setNodeSourceCodeLocation(node, token.location); + break; + } + } + }, + + //Elements + _attachElementToTree(element) { + //NOTE: _attachElementToTree is called from _appendElement, _insertElement and _insertTemplate methods. + //So we will use token location stored in this methods for the element. + mxn._setStartLocation(element); + mxn.lastStartTagToken = null; + orig._attachElementToTree.call(this, element); + }, + + _appendElement(token, namespaceURI) { + mxn.lastStartTagToken = token; + orig._appendElement.call(this, token, namespaceURI); + }, + + _insertElement(token, namespaceURI) { + mxn.lastStartTagToken = token; + orig._insertElement.call(this, token, namespaceURI); + }, + + _insertTemplate(token) { + mxn.lastStartTagToken = token; + orig._insertTemplate.call(this, token); + + const tmplContent = this.treeAdapter.getTemplateContent(this.openElements.current); + + this.treeAdapter.setNodeSourceCodeLocation(tmplContent, null); + }, + + _insertFakeRootElement() { + orig._insertFakeRootElement.call(this); + this.treeAdapter.setNodeSourceCodeLocation(this.openElements.current, null); + }, + + //Comments + _appendCommentNode(token, parent) { + orig._appendCommentNode.call(this, token, parent); + + const children = this.treeAdapter.getChildNodes(parent); + const commentNode = children[children.length - 1]; + + this.treeAdapter.setNodeSourceCodeLocation(commentNode, token.location); + }, + + //Text + _findFosterParentingLocation() { + //NOTE: store last foster parenting location, so we will be able to find inserted text + //in case of foster parenting + mxn.lastFosterParentingLocation = orig._findFosterParentingLocation.call(this); + + return mxn.lastFosterParentingLocation; + }, + + _insertCharacters(token) { + orig._insertCharacters.call(this, token); + + const hasFosterParent = this._shouldFosterParentOnInsertion(); + + const parent = + (hasFosterParent && mxn.lastFosterParentingLocation.parent) || + this.openElements.currentTmplContent || + this.openElements.current; + + const siblings = this.treeAdapter.getChildNodes(parent); + + const textNodeIdx = + hasFosterParent && mxn.lastFosterParentingLocation.beforeElement + ? siblings.indexOf(mxn.lastFosterParentingLocation.beforeElement) - 1 + : siblings.length - 1; + + const textNode = siblings[textNodeIdx]; + + //NOTE: if we have location assigned by another token, then just update end position + const tnLoc = this.treeAdapter.getNodeSourceCodeLocation(textNode); + + if (tnLoc) { + const { endLine, endCol, endOffset } = token.location; + this.treeAdapter.updateNodeSourceCodeLocation(textNode, { endLine, endCol, endOffset }); + } else { + this.treeAdapter.setNodeSourceCodeLocation(textNode, token.location); + } + } + }; + } +} + +module.exports = LocationInfoParserMixin; + +},{"../../common/html":4,"../../tokenizer":19,"../../utils/mixin":24,"./open-element-stack-mixin":10,"./tokenizer-mixin":12}],12:[function(require,module,exports){ +'use strict'; + +const Mixin = require('../../utils/mixin'); +const Tokenizer = require('../../tokenizer'); +const PositionTrackingPreprocessorMixin = require('../position-tracking/preprocessor-mixin'); + +class LocationInfoTokenizerMixin extends Mixin { + constructor(tokenizer) { + super(tokenizer); + + this.tokenizer = tokenizer; + this.posTracker = Mixin.install(tokenizer.preprocessor, PositionTrackingPreprocessorMixin); + this.currentAttrLocation = null; + this.ctLoc = null; + } + + _getCurrentLocation() { + return { + startLine: this.posTracker.line, + startCol: this.posTracker.col, + startOffset: this.posTracker.offset, + endLine: -1, + endCol: -1, + endOffset: -1 + }; + } + + _attachCurrentAttrLocationInfo() { + this.currentAttrLocation.endLine = this.posTracker.line; + this.currentAttrLocation.endCol = this.posTracker.col; + this.currentAttrLocation.endOffset = this.posTracker.offset; + + const currentToken = this.tokenizer.currentToken; + const currentAttr = this.tokenizer.currentAttr; + + if (!currentToken.location.attrs) { + currentToken.location.attrs = Object.create(null); + } + + currentToken.location.attrs[currentAttr.name] = this.currentAttrLocation; + } + + _getOverriddenMethods(mxn, orig) { + const methods = { + _createStartTagToken() { + orig._createStartTagToken.call(this); + this.currentToken.location = mxn.ctLoc; + }, + + _createEndTagToken() { + orig._createEndTagToken.call(this); + this.currentToken.location = mxn.ctLoc; + }, + + _createCommentToken() { + orig._createCommentToken.call(this); + this.currentToken.location = mxn.ctLoc; + }, + + _createDoctypeToken(initialName) { + orig._createDoctypeToken.call(this, initialName); + this.currentToken.location = mxn.ctLoc; + }, + + _createCharacterToken(type, ch) { + orig._createCharacterToken.call(this, type, ch); + this.currentCharacterToken.location = mxn.ctLoc; + }, + + _createEOFToken() { + orig._createEOFToken.call(this); + this.currentToken.location = mxn._getCurrentLocation(); + }, + + _createAttr(attrNameFirstCh) { + orig._createAttr.call(this, attrNameFirstCh); + mxn.currentAttrLocation = mxn._getCurrentLocation(); + }, + + _leaveAttrName(toState) { + orig._leaveAttrName.call(this, toState); + mxn._attachCurrentAttrLocationInfo(); + }, + + _leaveAttrValue(toState) { + orig._leaveAttrValue.call(this, toState); + mxn._attachCurrentAttrLocationInfo(); + }, + + _emitCurrentToken() { + const ctLoc = this.currentToken.location; + + //NOTE: if we have pending character token make it's end location equal to the + //current token's start location. + if (this.currentCharacterToken) { + this.currentCharacterToken.location.endLine = ctLoc.startLine; + this.currentCharacterToken.location.endCol = ctLoc.startCol; + this.currentCharacterToken.location.endOffset = ctLoc.startOffset; + } + + if (this.currentToken.type === Tokenizer.EOF_TOKEN) { + ctLoc.endLine = ctLoc.startLine; + ctLoc.endCol = ctLoc.startCol; + ctLoc.endOffset = ctLoc.startOffset; + } else { + ctLoc.endLine = mxn.posTracker.line; + ctLoc.endCol = mxn.posTracker.col + 1; + ctLoc.endOffset = mxn.posTracker.offset + 1; + } + + orig._emitCurrentToken.call(this); + }, + + _emitCurrentCharacterToken() { + const ctLoc = this.currentCharacterToken && this.currentCharacterToken.location; + + //NOTE: if we have character token and it's location wasn't set in the _emitCurrentToken(), + //then set it's location at the current preprocessor position. + //We don't need to increment preprocessor position, since character token + //emission is always forced by the start of the next character token here. + //So, we already have advanced position. + if (ctLoc && ctLoc.endOffset === -1) { + ctLoc.endLine = mxn.posTracker.line; + ctLoc.endCol = mxn.posTracker.col; + ctLoc.endOffset = mxn.posTracker.offset; + } + + orig._emitCurrentCharacterToken.call(this); + } + }; + + //NOTE: patch initial states for each mode to obtain token start position + Object.keys(Tokenizer.MODE).forEach(modeName => { + const state = Tokenizer.MODE[modeName]; + + methods[state] = function(cp) { + mxn.ctLoc = mxn._getCurrentLocation(); + orig[state].call(this, cp); + }; + }); + + return methods; + } +} + +module.exports = LocationInfoTokenizerMixin; + +},{"../../tokenizer":19,"../../utils/mixin":24,"../position-tracking/preprocessor-mixin":13}],13:[function(require,module,exports){ +'use strict'; + +const Mixin = require('../../utils/mixin'); + +class PositionTrackingPreprocessorMixin extends Mixin { + constructor(preprocessor) { + super(preprocessor); + + this.preprocessor = preprocessor; + this.isEol = false; + this.lineStartPos = 0; + this.droppedBufferSize = 0; + + this.offset = 0; + this.col = 0; + this.line = 1; + } + + _getOverriddenMethods(mxn, orig) { + return { + advance() { + const pos = this.pos + 1; + const ch = this.html[pos]; + + //NOTE: LF should be in the last column of the line + if (mxn.isEol) { + mxn.isEol = false; + mxn.line++; + mxn.lineStartPos = pos; + } + + if (ch === '\n' || (ch === '\r' && this.html[pos + 1] !== '\n')) { + mxn.isEol = true; + } + + mxn.col = pos - mxn.lineStartPos + 1; + mxn.offset = mxn.droppedBufferSize + pos; + + return orig.advance.call(this); + }, + + retreat() { + orig.retreat.call(this); + + mxn.isEol = false; + mxn.col = this.pos - mxn.lineStartPos + 1; + }, + + dropParsedChunk() { + const prevPos = this.pos; + + orig.dropParsedChunk.call(this); + + const reduction = prevPos - this.pos; + + mxn.lineStartPos -= reduction; + mxn.droppedBufferSize += reduction; + mxn.offset = mxn.droppedBufferSize + this.pos; + } + }; + } +} + +module.exports = PositionTrackingPreprocessorMixin; + +},{"../../utils/mixin":24}],14:[function(require,module,exports){ +'use strict'; + +const Parser = require('./parser'); +const Serializer = require('./serializer'); + +// Shorthands +exports.parse = function parse(html, options) { + const parser = new Parser(options); + + return parser.parse(html); +}; + +exports.parseFragment = function parseFragment(fragmentContext, html, options) { + if (typeof fragmentContext === 'string') { + options = html; + html = fragmentContext; + fragmentContext = null; + } + + const parser = new Parser(options); + + return parser.parseFragment(html, fragmentContext); +}; + +exports.serialize = function(node, options) { + const serializer = new Serializer(node, options); + + return serializer.serialize(); +}; + +},{"./parser":16,"./serializer":18}],15:[function(require,module,exports){ +'use strict'; + +//Const +const NOAH_ARK_CAPACITY = 3; + +//List of formatting elements +class FormattingElementList { + constructor(treeAdapter) { + this.length = 0; + this.entries = []; + this.treeAdapter = treeAdapter; + this.bookmark = null; + } + + //Noah Ark's condition + //OPTIMIZATION: at first we try to find possible candidates for exclusion using + //lightweight heuristics without thorough attributes check. + _getNoahArkConditionCandidates(newElement) { + const candidates = []; + + if (this.length >= NOAH_ARK_CAPACITY) { + const neAttrsLength = this.treeAdapter.getAttrList(newElement).length; + const neTagName = this.treeAdapter.getTagName(newElement); + const neNamespaceURI = this.treeAdapter.getNamespaceURI(newElement); + + for (let i = this.length - 1; i >= 0; i--) { + const entry = this.entries[i]; + + if (entry.type === FormattingElementList.MARKER_ENTRY) { + break; + } + + const element = entry.element; + const elementAttrs = this.treeAdapter.getAttrList(element); + + const isCandidate = + this.treeAdapter.getTagName(element) === neTagName && + this.treeAdapter.getNamespaceURI(element) === neNamespaceURI && + elementAttrs.length === neAttrsLength; + + if (isCandidate) { + candidates.push({ idx: i, attrs: elementAttrs }); + } + } + } + + return candidates.length < NOAH_ARK_CAPACITY ? [] : candidates; + } + + _ensureNoahArkCondition(newElement) { + const candidates = this._getNoahArkConditionCandidates(newElement); + let cLength = candidates.length; + + if (cLength) { + const neAttrs = this.treeAdapter.getAttrList(newElement); + const neAttrsLength = neAttrs.length; + const neAttrsMap = Object.create(null); + + //NOTE: build attrs map for the new element so we can perform fast lookups + for (let i = 0; i < neAttrsLength; i++) { + const neAttr = neAttrs[i]; + + neAttrsMap[neAttr.name] = neAttr.value; + } + + for (let i = 0; i < neAttrsLength; i++) { + for (let j = 0; j < cLength; j++) { + const cAttr = candidates[j].attrs[i]; + + if (neAttrsMap[cAttr.name] !== cAttr.value) { + candidates.splice(j, 1); + cLength--; + } + + if (candidates.length < NOAH_ARK_CAPACITY) { + return; + } + } + } + + //NOTE: remove bottommost candidates until Noah's Ark condition will not be met + for (let i = cLength - 1; i >= NOAH_ARK_CAPACITY - 1; i--) { + this.entries.splice(candidates[i].idx, 1); + this.length--; + } + } + } + + //Mutations + insertMarker() { + this.entries.push({ type: FormattingElementList.MARKER_ENTRY }); + this.length++; + } + + pushElement(element, token) { + this._ensureNoahArkCondition(element); + + this.entries.push({ + type: FormattingElementList.ELEMENT_ENTRY, + element: element, + token: token + }); + + this.length++; + } + + insertElementAfterBookmark(element, token) { + let bookmarkIdx = this.length - 1; + + for (; bookmarkIdx >= 0; bookmarkIdx--) { + if (this.entries[bookmarkIdx] === this.bookmark) { + break; + } + } + + this.entries.splice(bookmarkIdx + 1, 0, { + type: FormattingElementList.ELEMENT_ENTRY, + element: element, + token: token + }); + + this.length++; + } + + removeEntry(entry) { + for (let i = this.length - 1; i >= 0; i--) { + if (this.entries[i] === entry) { + this.entries.splice(i, 1); + this.length--; + break; + } + } + } + + clearToLastMarker() { + while (this.length) { + const entry = this.entries.pop(); + + this.length--; + + if (entry.type === FormattingElementList.MARKER_ENTRY) { + break; + } + } + } + + //Search + getElementEntryInScopeWithTagName(tagName) { + for (let i = this.length - 1; i >= 0; i--) { + const entry = this.entries[i]; + + if (entry.type === FormattingElementList.MARKER_ENTRY) { + return null; + } + + if (this.treeAdapter.getTagName(entry.element) === tagName) { + return entry; + } + } + + return null; + } + + getElementEntry(element) { + for (let i = this.length - 1; i >= 0; i--) { + const entry = this.entries[i]; + + if (entry.type === FormattingElementList.ELEMENT_ENTRY && entry.element === element) { + return entry; + } + } + + return null; + } +} + +//Entry types +FormattingElementList.MARKER_ENTRY = 'MARKER_ENTRY'; +FormattingElementList.ELEMENT_ENTRY = 'ELEMENT_ENTRY'; + +module.exports = FormattingElementList; + +},{}],16:[function(require,module,exports){ +'use strict'; + +const Tokenizer = require('../tokenizer'); +const OpenElementStack = require('./open-element-stack'); +const FormattingElementList = require('./formatting-element-list'); +const LocationInfoParserMixin = require('../extensions/location-info/parser-mixin'); +const ErrorReportingParserMixin = require('../extensions/error-reporting/parser-mixin'); +const Mixin = require('../utils/mixin'); +const defaultTreeAdapter = require('../tree-adapters/default'); +const mergeOptions = require('../utils/merge-options'); +const doctype = require('../common/doctype'); +const foreignContent = require('../common/foreign-content'); +const ERR = require('../common/error-codes'); +const unicode = require('../common/unicode'); +const HTML = require('../common/html'); + +//Aliases +const $ = HTML.TAG_NAMES; +const NS = HTML.NAMESPACES; +const ATTRS = HTML.ATTRS; + +const DEFAULT_OPTIONS = { + scriptingEnabled: true, + sourceCodeLocationInfo: false, + onParseError: null, + treeAdapter: defaultTreeAdapter +}; + +//Misc constants +const HIDDEN_INPUT_TYPE = 'hidden'; + +//Adoption agency loops iteration count +const AA_OUTER_LOOP_ITER = 8; +const AA_INNER_LOOP_ITER = 3; + +//Insertion modes +const INITIAL_MODE = 'INITIAL_MODE'; +const BEFORE_HTML_MODE = 'BEFORE_HTML_MODE'; +const BEFORE_HEAD_MODE = 'BEFORE_HEAD_MODE'; +const IN_HEAD_MODE = 'IN_HEAD_MODE'; +const IN_HEAD_NO_SCRIPT_MODE = 'IN_HEAD_NO_SCRIPT_MODE'; +const AFTER_HEAD_MODE = 'AFTER_HEAD_MODE'; +const IN_BODY_MODE = 'IN_BODY_MODE'; +const TEXT_MODE = 'TEXT_MODE'; +const IN_TABLE_MODE = 'IN_TABLE_MODE'; +const IN_TABLE_TEXT_MODE = 'IN_TABLE_TEXT_MODE'; +const IN_CAPTION_MODE = 'IN_CAPTION_MODE'; +const IN_COLUMN_GROUP_MODE = 'IN_COLUMN_GROUP_MODE'; +const IN_TABLE_BODY_MODE = 'IN_TABLE_BODY_MODE'; +const IN_ROW_MODE = 'IN_ROW_MODE'; +const IN_CELL_MODE = 'IN_CELL_MODE'; +const IN_SELECT_MODE = 'IN_SELECT_MODE'; +const IN_SELECT_IN_TABLE_MODE = 'IN_SELECT_IN_TABLE_MODE'; +const IN_TEMPLATE_MODE = 'IN_TEMPLATE_MODE'; +const AFTER_BODY_MODE = 'AFTER_BODY_MODE'; +const IN_FRAMESET_MODE = 'IN_FRAMESET_MODE'; +const AFTER_FRAMESET_MODE = 'AFTER_FRAMESET_MODE'; +const AFTER_AFTER_BODY_MODE = 'AFTER_AFTER_BODY_MODE'; +const AFTER_AFTER_FRAMESET_MODE = 'AFTER_AFTER_FRAMESET_MODE'; + +//Insertion mode reset map +const INSERTION_MODE_RESET_MAP = { + [$.TR]: IN_ROW_MODE, + [$.TBODY]: IN_TABLE_BODY_MODE, + [$.THEAD]: IN_TABLE_BODY_MODE, + [$.TFOOT]: IN_TABLE_BODY_MODE, + [$.CAPTION]: IN_CAPTION_MODE, + [$.COLGROUP]: IN_COLUMN_GROUP_MODE, + [$.TABLE]: IN_TABLE_MODE, + [$.BODY]: IN_BODY_MODE, + [$.FRAMESET]: IN_FRAMESET_MODE +}; + +//Template insertion mode switch map +const TEMPLATE_INSERTION_MODE_SWITCH_MAP = { + [$.CAPTION]: IN_TABLE_MODE, + [$.COLGROUP]: IN_TABLE_MODE, + [$.TBODY]: IN_TABLE_MODE, + [$.TFOOT]: IN_TABLE_MODE, + [$.THEAD]: IN_TABLE_MODE, + [$.COL]: IN_COLUMN_GROUP_MODE, + [$.TR]: IN_TABLE_BODY_MODE, + [$.TD]: IN_ROW_MODE, + [$.TH]: IN_ROW_MODE +}; + +//Token handlers map for insertion modes +const TOKEN_HANDLERS = { + [INITIAL_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInInitialMode, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInInitialMode, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: doctypeInInitialMode, + [Tokenizer.START_TAG_TOKEN]: tokenInInitialMode, + [Tokenizer.END_TAG_TOKEN]: tokenInInitialMode, + [Tokenizer.EOF_TOKEN]: tokenInInitialMode + }, + [BEFORE_HTML_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHtml, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHtml, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagBeforeHtml, + [Tokenizer.END_TAG_TOKEN]: endTagBeforeHtml, + [Tokenizer.EOF_TOKEN]: tokenBeforeHtml + }, + [BEFORE_HEAD_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenBeforeHead, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenBeforeHead, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagBeforeHead, + [Tokenizer.END_TAG_TOKEN]: endTagBeforeHead, + [Tokenizer.EOF_TOKEN]: tokenBeforeHead + }, + [IN_HEAD_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInHead, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHead, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagInHead, + [Tokenizer.END_TAG_TOKEN]: endTagInHead, + [Tokenizer.EOF_TOKEN]: tokenInHead + }, + [IN_HEAD_NO_SCRIPT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInHeadNoScript, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInHeadNoScript, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagInHeadNoScript, + [Tokenizer.END_TAG_TOKEN]: endTagInHeadNoScript, + [Tokenizer.EOF_TOKEN]: tokenInHeadNoScript + }, + [AFTER_HEAD_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenAfterHead, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterHead, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: misplacedDoctype, + [Tokenizer.START_TAG_TOKEN]: startTagAfterHead, + [Tokenizer.END_TAG_TOKEN]: endTagAfterHead, + [Tokenizer.EOF_TOKEN]: tokenAfterHead + }, + [IN_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInBody, + [Tokenizer.END_TAG_TOKEN]: endTagInBody, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [TEXT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.NULL_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: ignoreToken, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: ignoreToken, + [Tokenizer.END_TAG_TOKEN]: endTagInText, + [Tokenizer.EOF_TOKEN]: eofInText + }, + [IN_TABLE_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTable, + [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInTable, + [Tokenizer.END_TAG_TOKEN]: endTagInTable, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_TABLE_TEXT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTableText, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInTableText, + [Tokenizer.COMMENT_TOKEN]: tokenInTableText, + [Tokenizer.DOCTYPE_TOKEN]: tokenInTableText, + [Tokenizer.START_TAG_TOKEN]: tokenInTableText, + [Tokenizer.END_TAG_TOKEN]: tokenInTableText, + [Tokenizer.EOF_TOKEN]: tokenInTableText + }, + [IN_CAPTION_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInCaption, + [Tokenizer.END_TAG_TOKEN]: endTagInCaption, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_COLUMN_GROUP_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenInColumnGroup, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenInColumnGroup, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInColumnGroup, + [Tokenizer.END_TAG_TOKEN]: endTagInColumnGroup, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_TABLE_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTable, + [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInTableBody, + [Tokenizer.END_TAG_TOKEN]: endTagInTableBody, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_ROW_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInTable, + [Tokenizer.NULL_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: characterInTable, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInRow, + [Tokenizer.END_TAG_TOKEN]: endTagInRow, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_CELL_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInCell, + [Tokenizer.END_TAG_TOKEN]: endTagInCell, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_SELECT_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInSelect, + [Tokenizer.END_TAG_TOKEN]: endTagInSelect, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_SELECT_IN_TABLE_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInSelectInTable, + [Tokenizer.END_TAG_TOKEN]: endTagInSelectInTable, + [Tokenizer.EOF_TOKEN]: eofInBody + }, + [IN_TEMPLATE_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: characterInBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInTemplate, + [Tokenizer.END_TAG_TOKEN]: endTagInTemplate, + [Tokenizer.EOF_TOKEN]: eofInTemplate + }, + [AFTER_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenAfterBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterBody, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendCommentToRootHtmlElement, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterBody, + [Tokenizer.END_TAG_TOKEN]: endTagAfterBody, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [IN_FRAMESET_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagInFrameset, + [Tokenizer.END_TAG_TOKEN]: endTagInFrameset, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [AFTER_FRAMESET_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: insertCharacters, + [Tokenizer.COMMENT_TOKEN]: appendComment, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterFrameset, + [Tokenizer.END_TAG_TOKEN]: endTagAfterFrameset, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [AFTER_AFTER_BODY_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: tokenAfterAfterBody, + [Tokenizer.NULL_CHARACTER_TOKEN]: tokenAfterAfterBody, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendCommentToDocument, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterAfterBody, + [Tokenizer.END_TAG_TOKEN]: tokenAfterAfterBody, + [Tokenizer.EOF_TOKEN]: stopParsing + }, + [AFTER_AFTER_FRAMESET_MODE]: { + [Tokenizer.CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.NULL_CHARACTER_TOKEN]: ignoreToken, + [Tokenizer.WHITESPACE_CHARACTER_TOKEN]: whitespaceCharacterInBody, + [Tokenizer.COMMENT_TOKEN]: appendCommentToDocument, + [Tokenizer.DOCTYPE_TOKEN]: ignoreToken, + [Tokenizer.START_TAG_TOKEN]: startTagAfterAfterFrameset, + [Tokenizer.END_TAG_TOKEN]: ignoreToken, + [Tokenizer.EOF_TOKEN]: stopParsing + } +}; + +//Parser +class Parser { + constructor(options) { + this.options = mergeOptions(DEFAULT_OPTIONS, options); + + this.treeAdapter = this.options.treeAdapter; + this.pendingScript = null; + + if (this.options.sourceCodeLocationInfo) { + Mixin.install(this, LocationInfoParserMixin); + } + + if (this.options.onParseError) { + Mixin.install(this, ErrorReportingParserMixin, { onParseError: this.options.onParseError }); + } + } + + // API + parse(html) { + const document = this.treeAdapter.createDocument(); + + this._bootstrap(document, null); + this.tokenizer.write(html, true); + this._runParsingLoop(null); + + return document; + } + + parseFragment(html, fragmentContext) { + //NOTE: use