yomichan/ext/mixed/js/audio.js

/*
 * Copyright (C) 2019-2020  Alex Yatskov <alex@foosoft.net>
 * Author: Alex Yatskov <alex@foosoft.net>
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

/*global apiAudioGetUrl*/

class TextToSpeechAudio {
    constructor(text, voice) {
        this.text = text;
        this.voice = voice;
        this._utterance = null;
        this._volume = 1;
    }

    get currentTime() {
        return 0;
    }
    set currentTime(value) {
        // NOP
    }

    get volume() {
        return this._volume;
    }
    set volume(value) {
        this._volume = value;
        if (this._utterance !== null) {
            this._utterance.volume = value;
        }
    }

    play() {
        try {
            if (this._utterance === null) {
                this._utterance = new SpeechSynthesisUtterance(this.text || '');
                this._utterance.lang = 'ja-JP';
                this._utterance.volume = this._volume;
                this._utterance.voice = this.voice;
            }

            speechSynthesis.cancel();
            speechSynthesis.speak(this._utterance);
        } catch (e) {
            // NOP
        }
    }

    pause() {
        try {
            speechSynthesis.cancel();
        } catch (e) {
            // NOP
        }
    }

    static createFromUri(ttsUri) {
        const m = /^tts:[^#?]*\?([^#]*)/.exec(ttsUri);
        if (m === null) { return null; }

        const searchParameters = new URLSearchParams(m[1]);
        const text = searchParameters.get('text');
        let voice = searchParameters.get('voice');
        if (text === null || voice === null) { return null; }

        voice = audioGetTextToSpeechVoice(voice);
        if (voice === null) { return null; }

        return new TextToSpeechAudio(text, voice);
    }
}

class AudioSystem {
    constructor() {
        this._cache = new Map();
        this._cacheSizeMaximum = 32;

        if (typeof speechSynthesis !== 'undefined') {
            // speechSynthesis.getVoices() will not be populated unless some API call is made.
            speechSynthesis.addEventListener('voiceschanged', this._onVoicesChanged.bind(this));
        }
    }

    async getExpressionAudio(expression, sources, optionsContext, details) {
        const key = `${expression.expression}:${expression.reading}`;
        const cacheValue = this._cache.get(expression);
        if (typeof cacheValue !== 'undefined') {
            const {audio, uri, source} = cacheValue;
            return {audio, uri, source};
        }

        for (const source of sources) {
            const uri = await apiAudioGetUrl(expression, source, optionsContext);
            if (uri === null) { continue; }

            try {
                const audio = await this._createAudio(uri, details);
                this._cacheCheck();
                this._cache.set(key, {audio, uri, source});
                return {audio, uri, source};
            } catch (e) {
                // NOP
            }
        }

        throw new Error('Could not create audio');
    }

    createTextToSpeechAudio({text, voiceUri}) {
        const voice = this._getTextToSpeechVoiceFromVoiceUri(voiceUri);
        if (voice === null) {
            throw new Error('Invalid text-to-speech voice');
        }
        return new TextToSpeechAudio(text, voice);
    }

    _onVoicesChanged() {
        // NOP
    }

    async _createAudio(uri, details) {
        const ttsParameters = this._getTextToSpeechParameters(uri);
        if (ttsParameters !== null) {
            if (typeof details === 'object' && details !== null) {
                if (details.tts === false) {
                    throw new Error('Text-to-speech not permitted');
                }
            }
            return this.createTextToSpeechAudio(ttsParameters);
        }

        return await this._createAudioFromUrl(uri);
    }

    _createAudioFromUrl(url) {
        return new Promise((resolve, reject) => {
            const audio = new Audio(url);
            audio.addEventListener('loadeddata', () => {
                const duration = audio.duration;
                if (duration === 5.694694 || duration === 5.720718) {
                    // Hardcoded values for invalid audio
                    reject(new Error('Could not retrieve audio'));
                } else {
                    resolve(audio);
                }
            });
            audio.addEventListener('error', () => reject(audio.error));
        });
    }

    _getTextToSpeechVoiceFromVoiceUri(voiceUri) {
        try {
            for (const voice of speechSynthesis.getVoices()) {
                if (voice.voiceURI === voiceUri) {
                    return voice;
                }
            }
        } catch (e) {
            // NOP
        }
        return null;
    }

    _getTextToSpeechParameters(uri) {
        const m = /^tts:[^#?]*\?([^#]*)/.exec(uri);
        if (m === null) { return null; }

        const searchParameters = new URLSearchParams(m[1]);
        const text = searchParameters.get('text');
        const voiceUri = searchParameters.get('voice');
        return (text !== null && voiceUri !== null ? {text, voiceUri} : null);
    }

    _cacheCheck() {
        const removeCount = this._cache.size - this._cacheSizeMaximum;
        if (removeCount <= 0) { return; }

        const removeKeys = [];
        for (const key of this._cache.keys()) {
            removeKeys.push(key);
            if (removeKeys.length >= removeCount) { break; }
        }

        for (const key of removeKeys) {
            this._cache.delete(key);
        }
    }
}


function audioGetFromUrl(url, willDownload) {
    const tts = TextToSpeechAudio.createFromUri(url);
    if (tts !== null) {
        if (willDownload) {
            throw new Error('AnkiConnect does not support downloading text-to-speech audio.');
        }
        return Promise.resolve(tts);
    }

    return new Promise((resolve, reject) => {
        const audio = new Audio(url);
        audio.addEventListener('loadeddata', () => {
            if (audio.duration === 5.694694 || audio.duration === 5.720718) {
                // Hardcoded values for invalid audio
                reject(new Error('Could not retrieve audio'));
            } else {
                resolve(audio);
            }
        });
        audio.addEventListener('error', () => reject(audio.error));
    });
}

async function audioGetFromSources(expression, sources, optionsContext, willDownload, cache=null) {
    const key = `${expression.expression}:${expression.reading}`;
    if (cache !== null) {
        const cacheValue = cache.get(expression);
        if (typeof cacheValue !== 'undefined') {
            return cacheValue;
        }
    }

    for (let i = 0, ii = sources.length; i < ii; ++i) {
        const source = sources[i];
        const url = await apiAudioGetUrl(expression, source, optionsContext);
        if (url === null) {
            continue;
        }

        try {
            let audio = await audioGetFromUrl(url, willDownload);
            if (willDownload) {
                // AnkiConnect handles downloading URLs into cards
                audio = null;
            }
            const result = {audio, url, source};
            if (cache !== null) {
                cache.set(key, result);
            }
            return result;
        } catch (e) {
            // NOP
        }
    }
    return {audio: null, url: null, source: null};
}

function audioGetTextToSpeechVoice(voiceURI) {
    try {
        for (const voice of speechSynthesis.getVoices()) {
            if (voice.voiceURI === voiceURI) {
                return voice;
            }
        }
    } catch (e) {
        // NOP
    }
    return null;
}

function audioPrepareTextToSpeech(options) {
    if (
        audioPrepareTextToSpeech.state ||
        !options.audio.textToSpeechVoice ||
        !(
            options.audio.sources.includes('text-to-speech') ||
            options.audio.sources.includes('text-to-speech-reading')
        )
    ) {
        // Text-to-speech not in use.
        return;
    }

    // Chrome needs this value called once before it will become populated.
    // The first call will return an empty list.
    audioPrepareTextToSpeech.state = true;
    try {
        speechSynthesis.getVoices();
    } catch (e) {
        // NOP
    }
}
audioPrepareTextToSpeech.state = false;
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`/*`
Update copyright 2020-01-01 17:00:00 +00:00			`* Copyright (C) 2019-2020 Alex Yatskov <alex@foosoft.net>`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`* Author: Alex Yatskov <alex@foosoft.net>`
			`*`
			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, either version 3 of the License, or`
			`* (at your option) any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
Update license info URL 2020-01-01 17:00:31 +00:00			`* along with this program. If not, see <https://www.gnu.org/licenses/>.`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`*/`

Change no-undef from off to error 2020-02-01 20:00:34 +00:00			`/global apiAudioGetUrl/`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`class TextToSpeechAudio {`
			`constructor(text, voice) {`
			`this.text = text;`
			`this.voice = voice;`
			`this._utterance = null;`
			`this._volume = 1;`
			`}`

			`get currentTime() {`
			`return 0;`
			`}`
			`set currentTime(value) {`
			`// NOP`
			`}`

			`get volume() {`
			`return this._volume;`
			`}`
			`set volume(value) {`
			`this._volume = value;`
			`if (this._utterance !== null) {`
			`this._utterance.volume = value;`
			`}`
			`}`

			`play() {`
			`try {`
			`if (this._utterance === null) {`
			`this._utterance = new SpeechSynthesisUtterance(this.text \|\| '');`
			`this._utterance.lang = 'ja-JP';`
			`this._utterance.volume = this._volume;`
			`this._utterance.voice = this.voice;`
			`}`

			`speechSynthesis.cancel();`
			`speechSynthesis.speak(this._utterance);`
			`} catch (e) {`
			`// NOP`
			`}`
			`}`

			`pause() {`
			`try {`
			`speechSynthesis.cancel();`
			`} catch (e) {`
			`// NOP`
			`}`
			`}`

			`static createFromUri(ttsUri) {`
Remove unnecessary escapes from regex literals 2019-11-25 19:25:11 +00:00			`const m = /^tts:[^#?]\?([^#])/.exec(ttsUri);`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`if (m === null) { return null; }`

Use URLSearchParams 2020-02-15 18:36:32 +00:00			`const searchParameters = new URLSearchParams(m[1]);`
			`const text = searchParameters.get('text');`
			`let voice = searchParameters.get('voice');`
			`if (text === null \|\| voice === null) { return null; }`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00
Use URLSearchParams 2020-02-15 18:36:32 +00:00			`voice = audioGetTextToSpeechVoice(voice);`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`if (voice === null) { return null; }`

Use URLSearchParams 2020-02-15 18:36:32 +00:00			`return new TextToSpeechAudio(text, voice);`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`}`
			`}`

Create new AudioSystem class 2020-03-07 17:44:14 +00:00			`class AudioSystem {`
			`constructor() {`
			`this._cache = new Map();`
			`this._cacheSizeMaximum = 32;`

			`if (typeof speechSynthesis !== 'undefined') {`
			`// speechSynthesis.getVoices() will not be populated unless some API call is made.`
			`speechSynthesis.addEventListener('voiceschanged', this._onVoicesChanged.bind(this));`
			`}`
			`}`

			`async getExpressionAudio(expression, sources, optionsContext, details) {`
			const key = `${expression.expression}:${expression.reading}`;
			`const cacheValue = this._cache.get(expression);`
			`if (typeof cacheValue !== 'undefined') {`
			`const {audio, uri, source} = cacheValue;`
			`return {audio, uri, source};`
			`}`

			`for (const source of sources) {`
			`const uri = await apiAudioGetUrl(expression, source, optionsContext);`
			`if (uri === null) { continue; }`

			`try {`
			`const audio = await this._createAudio(uri, details);`
			`this._cacheCheck();`
			`this._cache.set(key, {audio, uri, source});`
			`return {audio, uri, source};`
			`} catch (e) {`
			`// NOP`
			`}`
			`}`

			`throw new Error('Could not create audio');`
			`}`

			`createTextToSpeechAudio({text, voiceUri}) {`
			`const voice = this._getTextToSpeechVoiceFromVoiceUri(voiceUri);`
			`if (voice === null) {`
			`throw new Error('Invalid text-to-speech voice');`
			`}`
			`return new TextToSpeechAudio(text, voice);`
			`}`

			`_onVoicesChanged() {`
			`// NOP`
			`}`

			`async _createAudio(uri, details) {`
			`const ttsParameters = this._getTextToSpeechParameters(uri);`
			`if (ttsParameters !== null) {`
			`if (typeof details === 'object' && details !== null) {`
			`if (details.tts === false) {`
			`throw new Error('Text-to-speech not permitted');`
			`}`
			`}`
			`return this.createTextToSpeechAudio(ttsParameters);`
			`}`

			`return await this._createAudioFromUrl(uri);`
			`}`

			`_createAudioFromUrl(url) {`
			`return new Promise((resolve, reject) => {`
			`const audio = new Audio(url);`
			`audio.addEventListener('loadeddata', () => {`
			`const duration = audio.duration;`
			`if (duration === 5.694694 \|\| duration === 5.720718) {`
			`// Hardcoded values for invalid audio`
			`reject(new Error('Could not retrieve audio'));`
			`} else {`
			`resolve(audio);`
			`}`
			`});`
			`audio.addEventListener('error', () => reject(audio.error));`
			`});`
			`}`

			`_getTextToSpeechVoiceFromVoiceUri(voiceUri) {`
			`try {`
			`for (const voice of speechSynthesis.getVoices()) {`
			`if (voice.voiceURI === voiceUri) {`
			`return voice;`
			`}`
			`}`
			`} catch (e) {`
			`// NOP`
			`}`
			`return null;`
			`}`

			`_getTextToSpeechParameters(uri) {`
			`const m = /^tts:[^#?]\?([^#])/.exec(uri);`
			`if (m === null) { return null; }`

			`const searchParameters = new URLSearchParams(m[1]);`
			`const text = searchParameters.get('text');`
			`const voiceUri = searchParameters.get('voice');`
			`return (text !== null && voiceUri !== null ? {text, voiceUri} : null);`
			`}`

			`_cacheCheck() {`
			`const removeCount = this._cache.size - this._cacheSizeMaximum;`
			`if (removeCount <= 0) { return; }`

			`const removeKeys = [];`
			`for (const key of this._cache.keys()) {`
			`removeKeys.push(key);`
			`if (removeKeys.length >= removeCount) { break; }`
			`}`

			`for (const key of removeKeys) {`
			`this._cache.delete(key);`
			`}`
			`}`
			`}`


Remove Download check when resolving Audio data There is a bug (seemingly unreported) in Yomichan where an Anki card will not contain any audio if the JapanesePod101 audio source trumps a secondary audio source (e.g. JapanesePod101-alternate) where the jpod101 source can't find the word requested. For example, そして has an audio entry in the alternate source but not the standard source. (Alternatively, there may be a bug in the jpod101 audioUrlBuilder, because I've only noticed this problem with hiragana-only expressions. JPod101 may not host those on the same url scheme any more. I'm not sure how to fix that, though, and the bug I'm addressing here does still persist). The reason this happens is that audioGetFromUrl uses downloaded audio to effectively check for a 404 (by examining the audio duration), but that check doesn't happen when an Anki card is being created (i.e. "download" is set, which I've changed to "willDownload" here). This change removes that check, but retains the will-download intent information to prevent attempts to download tts data, which AnkiConnect cannot do. I've also added a short explanation as to why the download check happens where it does. I think the unused audio object will get garbage collected since it's not referenced again, but I've explicitly unset it as well. 2019-11-09 21:51:53 +00:00			`function audioGetFromUrl(url, willDownload) {`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`const tts = TextToSpeechAudio.createFromUri(url);`
			`if (tts !== null) {`
Remove Download check when resolving Audio data There is a bug (seemingly unreported) in Yomichan where an Anki card will not contain any audio if the JapanesePod101 audio source trumps a secondary audio source (e.g. JapanesePod101-alternate) where the jpod101 source can't find the word requested. For example, そして has an audio entry in the alternate source but not the standard source. (Alternatively, there may be a bug in the jpod101 audioUrlBuilder, because I've only noticed this problem with hiragana-only expressions. JPod101 may not host those on the same url scheme any more. I'm not sure how to fix that, though, and the bug I'm addressing here does still persist). The reason this happens is that audioGetFromUrl uses downloaded audio to effectively check for a 404 (by examining the audio duration), but that check doesn't happen when an Anki card is being created (i.e. "download" is set, which I've changed to "willDownload" here). This change removes that check, but retains the will-download intent information to prevent attempts to download tts data, which AnkiConnect cannot do. I've also added a short explanation as to why the download check happens where it does. I think the unused audio object will get garbage collected since it's not referenced again, but I've explicitly unset it as well. 2019-11-09 21:51:53 +00:00			`if (willDownload) {`
			`throw new Error('AnkiConnect does not support downloading text-to-speech audio.');`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`}`
			`return Promise.resolve(tts);`
			`}`

Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`return new Promise((resolve, reject) => {`
			`const audio = new Audio(url);`
			`audio.addEventListener('loadeddata', () => {`
			`if (audio.duration === 5.694694 \|\| audio.duration === 5.720718) {`
			`// Hardcoded values for invalid audio`
			`reject(new Error('Could not retrieve audio'));`
			`} else {`
			`resolve(audio);`
			`}`
			`});`
			`audio.addEventListener('error', () => reject(audio.error));`
			`});`
			`}`

Remove Download check when resolving Audio data There is a bug (seemingly unreported) in Yomichan where an Anki card will not contain any audio if the JapanesePod101 audio source trumps a secondary audio source (e.g. JapanesePod101-alternate) where the jpod101 source can't find the word requested. For example, そして has an audio entry in the alternate source but not the standard source. (Alternatively, there may be a bug in the jpod101 audioUrlBuilder, because I've only noticed this problem with hiragana-only expressions. JPod101 may not host those on the same url scheme any more. I'm not sure how to fix that, though, and the bug I'm addressing here does still persist). The reason this happens is that audioGetFromUrl uses downloaded audio to effectively check for a 404 (by examining the audio duration), but that check doesn't happen when an Anki card is being created (i.e. "download" is set, which I've changed to "willDownload" here). This change removes that check, but retains the will-download intent information to prevent attempts to download tts data, which AnkiConnect cannot do. I've also added a short explanation as to why the download check happens where it does. I think the unused audio object will get garbage collected since it's not referenced again, but I've explicitly unset it as well. 2019-11-09 21:51:53 +00:00			`async function audioGetFromSources(expression, sources, optionsContext, willDownload, cache=null) {`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			const key = `${expression.expression}:${expression.reading}`;
Use Map for audioGetFromSources's cache parameter 2020-02-14 01:26:48 +00:00			`if (cache !== null) {`
			`const cacheValue = cache.get(expression);`
			`if (typeof cacheValue !== 'undefined') {`
			`return cacheValue;`
			`}`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`}`

			`for (let i = 0, ii = sources.length; i < ii; ++i) {`
			`const source = sources[i];`
			`const url = await apiAudioGetUrl(expression, source, optionsContext);`
			`if (url === null) {`
			`continue;`
			`}`

			`try {`
Fix overwriting const value 2019-11-10 18:56:06 +00:00			`let audio = await audioGetFromUrl(url, willDownload);`
Remove Download check when resolving Audio data There is a bug (seemingly unreported) in Yomichan where an Anki card will not contain any audio if the JapanesePod101 audio source trumps a secondary audio source (e.g. JapanesePod101-alternate) where the jpod101 source can't find the word requested. For example, そして has an audio entry in the alternate source but not the standard source. (Alternatively, there may be a bug in the jpod101 audioUrlBuilder, because I've only noticed this problem with hiragana-only expressions. JPod101 may not host those on the same url scheme any more. I'm not sure how to fix that, though, and the bug I'm addressing here does still persist). The reason this happens is that audioGetFromUrl uses downloaded audio to effectively check for a 404 (by examining the audio duration), but that check doesn't happen when an Anki card is being created (i.e. "download" is set, which I've changed to "willDownload" here). This change removes that check, but retains the will-download intent information to prevent attempts to download tts data, which AnkiConnect cannot do. I've also added a short explanation as to why the download check happens where it does. I think the unused audio object will get garbage collected since it's not referenced again, but I've explicitly unset it as well. 2019-11-09 21:51:53 +00:00			`if (willDownload) {`
			`// AnkiConnect handles downloading URLs into cards`
Fix overwriting const value 2019-11-10 18:56:06 +00:00			`audio = null;`
Remove Download check when resolving Audio data There is a bug (seemingly unreported) in Yomichan where an Anki card will not contain any audio if the JapanesePod101 audio source trumps a secondary audio source (e.g. JapanesePod101-alternate) where the jpod101 source can't find the word requested. For example, そして has an audio entry in the alternate source but not the standard source. (Alternatively, there may be a bug in the jpod101 audioUrlBuilder, because I've only noticed this problem with hiragana-only expressions. JPod101 may not host those on the same url scheme any more. I'm not sure how to fix that, though, and the bug I'm addressing here does still persist). The reason this happens is that audioGetFromUrl uses downloaded audio to effectively check for a 404 (by examining the audio duration), but that check doesn't happen when an Anki card is being created (i.e. "download" is set, which I've changed to "willDownload" here). This change removes that check, but retains the will-download intent information to prevent attempts to download tts data, which AnkiConnect cannot do. I've also added a short explanation as to why the download check happens where it does. I think the unused audio object will get garbage collected since it's not referenced again, but I've explicitly unset it as well. 2019-11-09 21:51:53 +00:00			`}`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`const result = {audio, url, source};`
			`if (cache !== null) {`
Use Map for audioGetFromSources's cache parameter 2020-02-14 01:26:48 +00:00			`cache.set(key, result);`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`}`
			`return result;`
			`} catch (e) {`
			`// NOP`
			`}`
			`}`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00			`return {audio: null, url: null, source: null};`
Implement audio fallbacks 2019-10-10 23:58:06 +00:00			`}`
Add option for text-to-speech 2019-10-13 02:50:22 +00:00
			`function audioGetTextToSpeechVoice(voiceURI) {`
			`try {`
			`for (const voice of speechSynthesis.getVoices()) {`
			`if (voice.voiceURI === voiceURI) {`
			`return voice;`
			`}`
			`}`
			`} catch (e) {`
			`// NOP`
			`}`
			`return null;`
			`}`
Add support for text-to-speech playback 2019-10-13 03:59:21 +00:00
			`function audioPrepareTextToSpeech(options) {`
			`if (`
			`audioPrepareTextToSpeech.state \|\|`
			`!options.audio.textToSpeechVoice \|\|`
			`!(`
			`options.audio.sources.includes('text-to-speech') \|\|`
			`options.audio.sources.includes('text-to-speech-reading')`
			`)`
			`) {`
			`// Text-to-speech not in use.`
			`return;`
			`}`

			`// Chrome needs this value called once before it will become populated.`
			`// The first call will return an empty list.`
			`audioPrepareTextToSpeech.state = true;`
			`try {`
			`speechSynthesis.getVoices();`
			`} catch (e) {`
			`// NOP`
			`}`
			`}`
			`audioPrepareTextToSpeech.state = false;`