Create abstraction class for parsing DOM (#798)

This commit is contained in:
toasted-nutbread 2020-09-10 11:30:01 -04:00 committed by GitHub
parent 58e5ddfde0
commit 17ebe6a754
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 70 additions and 8 deletions

View File

@ -37,6 +37,7 @@
<script src="/bg/js/profile-conditions.js"></script> <script src="/bg/js/profile-conditions.js"></script>
<script src="/bg/js/request-builder.js"></script> <script src="/bg/js/request-builder.js"></script>
<script src="/bg/js/template-renderer.js"></script> <script src="/bg/js/template-renderer.js"></script>
<script src="/bg/js/simple-dom-parser.js"></script>
<script src="/bg/js/text-source-map.js"></script> <script src="/bg/js/text-source-map.js"></script>
<script src="/bg/js/translator.js"></script> <script src="/bg/js/translator.js"></script>
<script src="/bg/js/util.js"></script> <script src="/bg/js/util.js"></script>

View File

@ -16,6 +16,7 @@
*/ */
/* global /* global
* SimpleDOMParser
* jp * jp
*/ */
@ -99,12 +100,23 @@ class AudioUriBuilder {
}); });
const responseText = await response.text(); const responseText = await response.text();
const dom = new DOMParser().parseFromString(responseText, 'text/html'); const dom = new SimpleDOMParser(responseText);
for (const row of dom.getElementsByClassName('dc-result-row')) { for (const row of dom.getElementsByClassName('dc-result-row')) {
try { try {
const url = row.querySelector('audio>source[src]').getAttribute('src'); const audio = dom.getElementByTagName('audio', row);
const reading = row.getElementsByClassName('dc-vocab_kana').item(0).textContent; if (audio === null) { continue; }
if (url && reading && (!definition.reading || definition.reading === reading)) {
const source = dom.getElementByTagName('source', audio);
if (source === null) { continue; }
const url = dom.getAttribute(source, 'src');
if (url === null) { continue; }
const readings = dom.getElementsByClassName('dc-vocab_kana');
if (readings.length === 0) { continue; }
const reading = dom.getTextContent(readings[0]);
if (reading && (!definition.reading || definition.reading === reading)) {
return this.normalizeUrl(url, 'https://www.japanesepod101.com', '/learningcenter/reference/'); return this.normalizeUrl(url, 'https://www.japanesepod101.com', '/learningcenter/reference/');
} }
} catch (e) { } catch (e) {
@ -127,15 +139,18 @@ class AudioUriBuilder {
}); });
const responseText = await response.text(); const responseText = await response.text();
const dom = new DOMParser().parseFromString(responseText, 'text/html'); const dom = new SimpleDOMParser(responseText);
try { try {
const audio = dom.getElementById(`audio_${definition.expression}:${definition.reading}`); const audio = dom.getElementById(`audio_${definition.expression}:${definition.reading}`);
if (audio !== null) { if (audio !== null) {
const url = audio.getElementsByTagName('source').item(0).getAttribute('src'); const source = dom.getElementByTagName('source', audio);
if (url) { if (source !== null) {
const url = dom.getAttribute(source, 'src');
if (url !== null) {
return this.normalizeUrl(url, 'https://jisho.org', '/search/'); return this.normalizeUrl(url, 'https://jisho.org', '/search/');
} }
} }
}
} catch (e) { } catch (e) {
// NOP // NOP
} }

View File

@ -0,0 +1,46 @@
/*
* Copyright (C) 2020 Yomichan Authors
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
class SimpleDOMParser {
constructor(content) {
this._document = new DOMParser().parseFromString(content, 'text/html');
}
getElementById(id, root=null) {
return (root || this._document).querySelector(`[id='${id}']`);
}
getElementByTagName(tagName, root=null) {
return (root || this._document).querySelector(tagName);
}
getElementsByTagName(tagName, root=null) {
return [...(root || this._document).querySelectorAll(tagName)];
}
getElementsByClassName(className, root=null) {
return [...(root || this._document).querySelectorAll(`.${className}`)];
}
getAttribute(element, attribute) {
return element.hasAttribute(attribute) ? element.getAttribute(attribute) : null;
}
getTextContent(element) {
return element.textContent;
}
}