yomichan/ext/bg/js/audio-downloader.js

/*
 * Copyright (C) 2017-2021  Yomichan Authors
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

/* global
 * JsonSchemaValidator
 * NativeSimpleDOMParser
 * SimpleDOMParser
 */

class AudioDownloader {
    constructor({japaneseUtil, requestBuilder}) {
        this._japaneseUtil = japaneseUtil;
        this._requestBuilder = requestBuilder;
        this._customAudioListSchema = null;
        this._schemaValidator = null;
        this._getInfoHandlers = new Map([
            ['jpod101', this._getInfoJpod101.bind(this)],
            ['jpod101-alternate', this._getInfoJpod101Alternate.bind(this)],
            ['jisho', this._getInfoJisho.bind(this)],
            ['text-to-speech', this._getInfoTextToSpeech.bind(this)],
            ['text-to-speech-reading', this._getInfoTextToSpeechReading.bind(this)],
            ['custom', this._getInfoCustom.bind(this)]
        ]);
    }

    async getExpressionAudioInfoList(source, expression, reading, details) {
        const handler = this._getInfoHandlers.get(source);
        if (typeof handler === 'function') {
            try {
                return await handler(expression, reading, details);
            } catch (e) {
                // NOP
            }
        }
        return [];
    }

    async downloadExpressionAudio(sources, expression, reading, details) {
        for (const source of sources) {
            const infoList = await this.getExpressionAudioInfoList(source, expression, reading, details);
            for (const info of infoList) {
                switch (info.type) {
                    case 'url':
                        try {
                            return await this._downloadAudioFromUrl(info.url, source);
                        } catch (e) {
                            // NOP
                        }
                        break;
                }
            }
        }

        throw new Error('Could not download audio');
    }

    // Private

    _normalizeUrl(url, base) {
        return new URL(url, base).href;
    }

    async _getInfoJpod101(expression, reading) {
        let kana = reading;
        let kanji = expression;

        if (!kana && this._japaneseUtil.isStringEntirelyKana(kanji)) {
            kana = kanji;
            kanji = null;
        }

        const params = [];
        if (kanji) {
            params.push(`kanji=${encodeURIComponent(kanji)}`);
        }
        if (kana) {
            params.push(`kana=${encodeURIComponent(kana)}`);
        }

        const url = `https://assets.languagepod101.com/dictionary/japanese/audiomp3.php?${params.join('&')}`;
        return [{type: 'url', url}];
    }

    async _getInfoJpod101Alternate(expression, reading) {
        const fetchUrl = 'https://www.japanesepod101.com/learningcenter/reference/dictionary_post';
        const data = `post=dictionary_reference&match_type=exact&search_query=${encodeURIComponent(expression)}&vulgar=true`;
        const response = await this._requestBuilder.fetchAnonymous(fetchUrl, {
            method: 'POST',
            mode: 'cors',
            cache: 'default',
            credentials: 'omit',
            redirect: 'follow',
            referrerPolicy: 'no-referrer',
            headers: {
                'Content-Type': 'application/x-www-form-urlencoded'
            },
            body: data
        });
        const responseText = await response.text();

        const dom = this._createSimpleDOMParser(responseText);
        for (const row of dom.getElementsByClassName('dc-result-row')) {
            try {
                const audio = dom.getElementByTagName('audio', row);
                if (audio === null) { continue; }

                const source = dom.getElementByTagName('source', audio);
                if (source === null) { continue; }

                let url = dom.getAttribute(source, 'src');
                if (url === null) { continue; }

                const htmlReadings = dom.getElementsByClassName('dc-vocab_kana');
                if (htmlReadings.length === 0) { continue; }

                const htmlReading = dom.getTextContent(htmlReadings[0]);
                if (htmlReading && (!reading || reading === htmlReading)) {
                    url = this._normalizeUrl(url, response.url);
                    return [{type: 'url', url}];
                }
            } catch (e) {
                // NOP
            }
        }

        throw new Error('Failed to find audio URL');
    }

    async _getInfoJisho(expression, reading) {
        const fetchUrl = `https://jisho.org/search/${expression}`;
        const response = await this._requestBuilder.fetchAnonymous(fetchUrl, {
            method: 'GET',
            mode: 'cors',
            cache: 'default',
            credentials: 'omit',
            redirect: 'follow',
            referrerPolicy: 'no-referrer'
        });
        const responseText = await response.text();

        const dom = this._createSimpleDOMParser(responseText);
        try {
            const audio = dom.getElementById(`audio_${expression}:${reading}`);
            if (audio !== null) {
                const source = dom.getElementByTagName('source', audio);
                if (source !== null) {
                    let url = dom.getAttribute(source, 'src');
                    if (url !== null) {
                        url = this._normalizeUrl(url, response.url);
                        return [{type: 'url', url}];
                    }
                }
            }
        } catch (e) {
            // NOP
        }

        throw new Error('Failed to find audio URL');
    }

    async _getInfoTextToSpeech(expression, reading, {textToSpeechVoice}) {
        if (!textToSpeechVoice) {
            throw new Error('No voice');
        }
        return [{type: 'tts', text: expression, voice: textToSpeechVoice}];
    }

    async _getInfoTextToSpeechReading(expression, reading, {textToSpeechVoice}) {
        if (!textToSpeechVoice) {
            throw new Error('No voice');
        }
        return [{type: 'tts', text: reading || expression, voice: textToSpeechVoice}];
    }

    async _getInfoCustom(expression, reading, {customSourceUrl, customSourceType}) {
        if (typeof customSourceUrl !== 'string') {
            throw new Error('No custom URL defined');
        }
        const data = {expression, reading};
        const url = customSourceUrl.replace(/\{([^}]*)\}/g, (m0, m1) => (Object.prototype.hasOwnProperty.call(data, m1) ? `${data[m1]}` : m0));

        switch (customSourceType) {
            case 'json':
                return await this._getInfoCustomJson(url);
            default:
                return [{type: 'url', url}];
        }
    }

    async _getInfoCustomJson(url) {
        const response = await this._requestBuilder.fetchAnonymous(url, {
            method: 'GET',
            mode: 'cors',
            cache: 'default',
            credentials: 'omit',
            redirect: 'follow',
            referrerPolicy: 'no-referrer'
        });

        if (!response.ok) {
            throw new Error(`Invalid response: ${response.status}`);
        }

        const responseJson = await response.json();

        const schema = await this._getCustomAudioListSchema();
        if (this._schemaValidator === null) {
            this._schemaValidator = new JsonSchemaValidator();
        }
        this._schemaValidator.validate(responseJson, schema);

        const results = [];
        for (const {url: url2, name} of responseJson.audioSources) {
            const info = {type: 'url', url: url2};
            if (typeof name === 'string') { info.name = name; }
            results.push(info);
        }
        return results;
    }

    async _downloadAudioFromUrl(url, source) {
        const response = await this._requestBuilder.fetchAnonymous(url, {
            method: 'GET',
            mode: 'cors',
            cache: 'default',
            credentials: 'omit',
            redirect: 'follow',
            referrerPolicy: 'no-referrer'
        });

        if (!response.ok) {
            throw new Error(`Invalid response: ${response.status}`);
        }

        const arrayBuffer = await response.arrayBuffer();

        if (!await this._isAudioBinaryValid(arrayBuffer, source)) {
            throw new Error('Could not retrieve audio');
        }

        return this._arrayBufferToBase64(arrayBuffer);
    }

    async _isAudioBinaryValid(arrayBuffer, source) {
        switch (source) {
            case 'jpod101':
            {
                const digest = await this._arrayBufferDigest(arrayBuffer);
                switch (digest) {
                    case 'ae6398b5a27bc8c0a771df6c907ade794be15518174773c58c7c7ddd17098906': // Invalid audio
                        return false;
                    default:
                        return true;
                }
            }
            default:
                return true;
        }
    }

    async _arrayBufferDigest(arrayBuffer) {
        const hash = new Uint8Array(await crypto.subtle.digest('SHA-256', new Uint8Array(arrayBuffer)));
        let digest = '';
        for (const byte of hash) {
            digest += byte.toString(16).padStart(2, '0');
        }
        return digest;
    }

    _arrayBufferToBase64(arrayBuffer) {
        return btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
    }

    _createSimpleDOMParser(content) {
        if (typeof NativeSimpleDOMParser !== 'undefined' && NativeSimpleDOMParser.isSupported()) {
            return new NativeSimpleDOMParser(content);
        } else if (typeof SimpleDOMParser !== 'undefined' && SimpleDOMParser.isSupported()) {
            return new SimpleDOMParser(content);
        } else {
            throw new Error('DOM parsing not supported');
        }
    }

    async _getCustomAudioListSchema() {
        let schema = this._customAudioListSchema;
        if (schema === null) {
            const url = chrome.runtime.getURL('/bg/data/custom-audio-list-schema.json');
            const response = await fetch(url, {
                method: 'GET',
                mode: 'no-cors',
                cache: 'default',
                credentials: 'omit',
                redirect: 'follow',
                referrerPolicy: 'no-referrer'
            });
            schema = await response.json();
            this._customAudioListSchema = schema;
        }
        return schema;
    }
}