Regex optimizations (#2132)

* Remove regex construction for SimpleDOMParser.getElementsByClassName

* Remove regex construction for CssStyleApplier._getRulesForClass

* Rename, add jsdoc for clarity
This commit is contained in:
toasted-nutbread 2022-05-14 18:13:04 -04:00 committed by GitHub
parent 5dcc2315d2
commit 8b6f526dc6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 65 additions and 6 deletions

View File

@ -20,6 +20,14 @@
* that is the same across different browsers.
*/
class CssStyleApplier {
/**
* @typedef {object} CssRule
* @property {string} selectors A CSS selector string representing one or more selectors.
* @property {[string, string][]} styles A list of CSS property and value pairs.
* @property {string} styles[][0] The CSS property.
* @property {string} styles[][1] The CSS value.
*/
/**
* Creates a new instance of the class.
* @param styleDataUrl The local URL to the JSON file continaing the style rules.
@ -37,6 +45,9 @@ class CssStyleApplier {
this._styleDataUrl = styleDataUrl;
this._styleData = [];
this._cachedRules = new Map();
// eslint-disable-next-line no-control-regex
this._patternHtmlWhitespace = /[\t\r\n\x0C ]+/g;
this._patternClassNameCharacter = /[0-9a-zA-Z-_]/;
}
/**
@ -65,7 +76,7 @@ class CssStyleApplier {
const className = element.getAttribute('class');
if (className.length === 0) { continue; }
let cssTextNew = '';
for (const {selectorText, styles} of this._getRulesForClass(className)) {
for (const {selectorText, styles} of this._getCandidateCssRulesForClass(className)) {
if (!element.matches(selectorText)) { continue; }
cssTextNew += this._getCssText(styles);
}
@ -99,17 +110,22 @@ class CssStyleApplier {
return await response.json();
}
_getRulesForClass(className) {
/**
* Gets an array of candidate CSS rules which might match a specific class.
* @param {string} className A whitespace-separated list of classes.
* @returns {CssRule[]} An array of candidate CSS rules.
*/
_getCandidateCssRulesForClass(className) {
let rules = this._cachedRules.get(className);
if (typeof rules !== 'undefined') { return rules; }
rules = [];
this._cachedRules.set(className, rules);
const classNamePattern = new RegExp(`.${className}(?![0-9a-zA-Z-])`, '');
const classList = this._getTokens(className);
for (const {selectors, styles} of this._styleData) {
const selectorText = selectors.join(',');
if (!classNamePattern.test(selectorText)) { continue; }
if (!this._selectorMatches(selectorText, classList)) { continue; }
rules.push({selectorText, styles});
}
@ -123,4 +139,33 @@ class CssStyleApplier {
}
return cssText;
}
_selectorMatches(selectorText, classList) {
const pattern = this._patternClassNameCharacter;
for (const item of classList) {
const prefixedItem = `.${item}`;
let start = 0;
while (true) {
const index = selectorText.indexOf(prefixedItem, start);
if (index < 0) { break; }
start = index + prefixedItem.length;
if (start >= selectorText.length || !pattern.test(selectorText[start])) { return true; }
}
}
return false;
}
_getTokens(tokenListString) {
let start = 0;
const pattern = this._patternHtmlWhitespace;
pattern.lastIndex = 0;
const result = [];
while (true) {
const match = pattern.exec(tokenListString);
const end = match === null ? tokenListString.length : match.index;
if (end > start) { result.push(tokenListString.substring(start, end)); }
if (match === null) { return result; }
start = end + match[0].length;
}
}
}

View File

@ -22,6 +22,8 @@
class SimpleDOMParser {
constructor(content) {
this._document = parse5.parse(content);
// eslint-disable-next-line no-control-regex
this._patternHtmlWhitespace = /[\t\r\n\x0C ]+/g;
}
getElementById(id, root=null) {
@ -54,11 +56,10 @@ class SimpleDOMParser {
getElementsByClassName(className, root=null) {
const results = [];
const classNamePattern = new RegExp(`(^|\\s)${escapeRegExp(className)}(\\s|$)`);
for (const node of this._allNodes(root)) {
if (typeof node.tagName === 'string') {
const nodeClassName = this.getAttribute(node, 'class');
if (nodeClassName !== null && classNamePattern.test(nodeClassName)) {
if (nodeClassName !== null && this._hasToken(nodeClassName, className)) {
results.push(node);
}
}
@ -114,4 +115,17 @@ class SimpleDOMParser {
}
}
}
_hasToken(tokenListString, token) {
let start = 0;
const pattern = this._patternHtmlWhitespace;
pattern.lastIndex = 0;
while (true) {
const match = pattern.exec(tokenListString);
const end = match === null ? tokenListString.length : match.index;
if (end > start && tokenListString.substring(start, end) === token) { return true; }
if (match === null) { return false; }
start = end + match[0].length;
}
}
}