diff --git a/ext/bg/background.html b/ext/bg/background.html index 7fd1c477..f2f70d4d 100644 --- a/ext/bg/background.html +++ b/ext/bg/background.html @@ -39,7 +39,7 @@ - + diff --git a/ext/bg/js/api.js b/ext/bg/js/api.js index 93e43a7d..4e5d81db 100644 --- a/ext/bg/js/api.js +++ b/ext/bg/js/api.js @@ -21,10 +21,6 @@ function apiTemplateRender(template, data) { return _apiInvoke('templateRender', {data, template}); } -function apiAudioGetUrl(definition, source, optionsContext) { - return _apiInvoke('audioGetUrl', {definition, source, optionsContext}); -} - function _apiInvoke(action, params={}) { const data = {action, params}; return new Promise((resolve, reject) => { diff --git a/ext/bg/js/audio.js b/ext/bg/js/audio.js index 972e2b8b..c94121ae 100644 --- a/ext/bg/js/audio.js +++ b/ext/bg/js/audio.js @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -/*global jpIsStringEntirelyKana, audioGetFromSources*/ +/*global jpIsStringEntirelyKana*/ const audioUrlBuilders = new Map([ ['jpod101', async (definition) => { @@ -154,7 +154,7 @@ function audioBuildFilename(definition) { return null; } -async function audioInject(definition, fields, sources, optionsContext) { +async function audioInject(definition, fields, sources, optionsContext, audioSystem) { let usesAudio = false; for (const fieldValue of Object.values(fields)) { if (fieldValue.includes('{audio}')) { @@ -171,12 +171,10 @@ async function audioInject(definition, fields, sources, optionsContext) { const expressions = definition.expressions; const audioSourceDefinition = Array.isArray(expressions) ? expressions[0] : definition; - const {url} = await audioGetFromSources(audioSourceDefinition, sources, optionsContext, true); - if (url !== null) { - const filename = audioBuildFilename(audioSourceDefinition); - if (filename !== null) { - definition.audio = {url, filename}; - } + const {uri} = await audioSystem.getDefinitionAudio(audioSourceDefinition, sources, {tts: false, optionsContext}); + const filename = audioBuildFilename(audioSourceDefinition); + if (filename !== null) { + definition.audio = {url: uri, filename}; } return true; diff --git a/ext/bg/js/backend.js b/ext/bg/js/backend.js index 04bf240d..60a87916 100644 --- a/ext/bg/js/backend.js +++ b/ext/bg/js/backend.js @@ -23,7 +23,7 @@ requestText, requestJson, optionsLoad dictConfigured, dictTermsSort, dictEnabledSet, dictNoteFormat audioGetUrl, audioInject jpConvertReading, jpDistributeFuriganaInflected, jpKatakanaToHiragana -Translator, AnkiConnect, AnkiNull, Mecab, BackendApiForwarder, JsonSchema, ClipboardMonitor*/ +AudioSystem, Translator, AnkiConnect, AnkiNull, Mecab, BackendApiForwarder, JsonSchema, ClipboardMonitor*/ class Backend { constructor() { @@ -34,6 +34,7 @@ class Backend { this.options = null; this.optionsSchema = null; this.defaultAnkiFieldTemplates = null; + this.audioSystem = new AudioSystem({getAudioUri: this._getAudioUri.bind(this)}); this.optionsContext = { depth: 0, url: window.location.href @@ -436,7 +437,8 @@ class Backend { definition, options.anki.terms.fields, options.audio.sources, - optionsContext + optionsContext, + this.audioSystem ); } @@ -762,6 +764,16 @@ class Backend { // Utilities + async _getAudioUri(definition, source, details) { + let optionsContext = (typeof details === 'object' && details !== null ? details.optionsContext : null); + if (!(typeof optionsContext === 'object' && optionsContext !== null)) { + optionsContext = this.optionsContext; + } + + const options = this.getOptions(optionsContext); + return await audioGetUrl(definition, source, options); + } + async _injectScreenshot(definition, fields, screenshot) { let usesScreenshot = false; for (const fieldValue of Object.values(fields)) { diff --git a/ext/bg/js/settings/audio.js b/ext/bg/js/settings/audio.js index 6d183a43..6f581d9b 100644 --- a/ext/bg/js/settings/audio.js +++ b/ext/bg/js/settings/audio.js @@ -16,12 +16,20 @@ * along with this program. If not, see . */ -/*global getOptionsContext, getOptionsMutable, settingsSaveOptions -AudioSourceUI, audioGetTextToSpeechVoice*/ +/*global getOptionsContext, getOptionsMutable, settingsSaveOptions, apiAudioGetUrl +AudioSystem, AudioSourceUI*/ let audioSourceUI = null; +let audioSystem = null; async function audioSettingsInitialize() { + audioSystem = new AudioSystem({ + getAudioUri: async (definition, source) => { + const optionsContext = getOptionsContext(); + return await apiAudioGetUrl(definition, source, optionsContext); + } + }); + const optionsContext = getOptionsContext(); const options = await getOptionsMutable(optionsContext); audioSourceUI = new AudioSourceUI.Container( @@ -100,16 +108,11 @@ function textToSpeechVoiceCompare(a, b) { function textToSpeechTest() { try { const text = document.querySelector('#text-to-speech-voice-test').dataset.speechText || ''; - const voiceURI = document.querySelector('#text-to-speech-voice').value; - const voice = audioGetTextToSpeechVoice(voiceURI); - if (voice === null) { return; } + const voiceUri = document.querySelector('#text-to-speech-voice').value; - const utterance = new SpeechSynthesisUtterance(text); - utterance.lang = 'ja-JP'; - utterance.voice = voice; - utterance.volume = 1.0; - - speechSynthesis.speak(utterance); + const audio = audioSystem.createTextToSpeechAudio({text, voiceUri}); + audio.volume = 1.0; + audio.play(); } catch (e) { // NOP } diff --git a/ext/bg/search.html b/ext/bg/search.html index d6336826..f4c1a737 100644 --- a/ext/bg/search.html +++ b/ext/bg/search.html @@ -80,7 +80,7 @@ - + diff --git a/ext/bg/settings.html b/ext/bg/settings.html index b048a36c..e9fc6be5 100644 --- a/ext/bg/settings.html +++ b/ext/bg/settings.html @@ -1098,7 +1098,7 @@ - + diff --git a/ext/fg/float.html b/ext/fg/float.html index 352a866a..7bbed565 100644 --- a/ext/fg/float.html +++ b/ext/fg/float.html @@ -46,7 +46,7 @@ - + diff --git a/ext/mixed/js/audio-system.js b/ext/mixed/js/audio-system.js new file mode 100644 index 00000000..31c476b1 --- /dev/null +++ b/ext/mixed/js/audio-system.js @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2019-2020 Alex Yatskov + * Author: Alex Yatskov + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +class TextToSpeechAudio { + constructor(text, voice) { + this.text = text; + this.voice = voice; + this._utterance = null; + this._volume = 1; + } + + get currentTime() { + return 0; + } + set currentTime(value) { + // NOP + } + + get volume() { + return this._volume; + } + set volume(value) { + this._volume = value; + if (this._utterance !== null) { + this._utterance.volume = value; + } + } + + play() { + try { + if (this._utterance === null) { + this._utterance = new SpeechSynthesisUtterance(this.text || ''); + this._utterance.lang = 'ja-JP'; + this._utterance.volume = this._volume; + this._utterance.voice = this.voice; + } + + speechSynthesis.cancel(); + speechSynthesis.speak(this._utterance); + } catch (e) { + // NOP + } + } + + pause() { + try { + speechSynthesis.cancel(); + } catch (e) { + // NOP + } + } +} + +class AudioSystem { + constructor({getAudioUri}) { + this._cache = new Map(); + this._cacheSizeMaximum = 32; + this._getAudioUri = getAudioUri; + + if (typeof speechSynthesis !== 'undefined') { + // speechSynthesis.getVoices() will not be populated unless some API call is made. + speechSynthesis.addEventListener('voiceschanged', this._onVoicesChanged.bind(this)); + } + } + + async getDefinitionAudio(definition, sources, details) { + const key = `${definition.expression}:${definition.reading}`; + const cacheValue = this._cache.get(definition); + if (typeof cacheValue !== 'undefined') { + const {audio, uri, source} = cacheValue; + return {audio, uri, source}; + } + + for (const source of sources) { + const uri = await this._getAudioUri(definition, source, details); + if (uri === null) { continue; } + + try { + const audio = await this._createAudio(uri, details); + this._cacheCheck(); + this._cache.set(key, {audio, uri, source}); + return {audio, uri, source}; + } catch (e) { + // NOP + } + } + + throw new Error('Could not create audio'); + } + + createTextToSpeechAudio({text, voiceUri}) { + const voice = this._getTextToSpeechVoiceFromVoiceUri(voiceUri); + if (voice === null) { + throw new Error('Invalid text-to-speech voice'); + } + return new TextToSpeechAudio(text, voice); + } + + _onVoicesChanged() { + // NOP + } + + async _createAudio(uri, details) { + const ttsParameters = this._getTextToSpeechParameters(uri); + if (ttsParameters !== null) { + if (typeof details === 'object' && details !== null) { + if (details.tts === false) { + throw new Error('Text-to-speech not permitted'); + } + } + return this.createTextToSpeechAudio(ttsParameters); + } + + return await this._createAudioFromUrl(uri); + } + + _createAudioFromUrl(url) { + return new Promise((resolve, reject) => { + const audio = new Audio(url); + audio.addEventListener('loadeddata', () => { + const duration = audio.duration; + if (duration === 5.694694 || duration === 5.720718) { + // Hardcoded values for invalid audio + reject(new Error('Could not retrieve audio')); + } else { + resolve(audio); + } + }); + audio.addEventListener('error', () => reject(audio.error)); + }); + } + + _getTextToSpeechVoiceFromVoiceUri(voiceUri) { + try { + for (const voice of speechSynthesis.getVoices()) { + if (voice.voiceURI === voiceUri) { + return voice; + } + } + } catch (e) { + // NOP + } + return null; + } + + _getTextToSpeechParameters(uri) { + const m = /^tts:[^#?]*\?([^#]*)/.exec(uri); + if (m === null) { return null; } + + const searchParameters = new URLSearchParams(m[1]); + const text = searchParameters.get('text'); + const voiceUri = searchParameters.get('voice'); + return (text !== null && voiceUri !== null ? {text, voiceUri} : null); + } + + _cacheCheck() { + const removeCount = this._cache.size - this._cacheSizeMaximum; + if (removeCount <= 0) { return; } + + const removeKeys = []; + for (const key of this._cache.keys()) { + removeKeys.push(key); + if (removeKeys.length >= removeCount) { break; } + } + + for (const key of removeKeys) { + this._cache.delete(key); + } + } +} diff --git a/ext/mixed/js/audio.js b/ext/mixed/js/audio.js deleted file mode 100644 index b5a025be..00000000 --- a/ext/mixed/js/audio.js +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright (C) 2019-2020 Alex Yatskov - * Author: Alex Yatskov - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/*global apiAudioGetUrl*/ - -class TextToSpeechAudio { - constructor(text, voice) { - this.text = text; - this.voice = voice; - this._utterance = null; - this._volume = 1; - } - - get currentTime() { - return 0; - } - set currentTime(value) { - // NOP - } - - get volume() { - return this._volume; - } - set volume(value) { - this._volume = value; - if (this._utterance !== null) { - this._utterance.volume = value; - } - } - - play() { - try { - if (this._utterance === null) { - this._utterance = new SpeechSynthesisUtterance(this.text || ''); - this._utterance.lang = 'ja-JP'; - this._utterance.volume = this._volume; - this._utterance.voice = this.voice; - } - - speechSynthesis.cancel(); - speechSynthesis.speak(this._utterance); - } catch (e) { - // NOP - } - } - - pause() { - try { - speechSynthesis.cancel(); - } catch (e) { - // NOP - } - } - - static createFromUri(ttsUri) { - const m = /^tts:[^#?]*\?([^#]*)/.exec(ttsUri); - if (m === null) { return null; } - - const searchParameters = new URLSearchParams(m[1]); - const text = searchParameters.get('text'); - let voice = searchParameters.get('voice'); - if (text === null || voice === null) { return null; } - - voice = audioGetTextToSpeechVoice(voice); - if (voice === null) { return null; } - - return new TextToSpeechAudio(text, voice); - } -} - -function audioGetFromUrl(url, willDownload) { - const tts = TextToSpeechAudio.createFromUri(url); - if (tts !== null) { - if (willDownload) { - throw new Error('AnkiConnect does not support downloading text-to-speech audio.'); - } - return Promise.resolve(tts); - } - - return new Promise((resolve, reject) => { - const audio = new Audio(url); - audio.addEventListener('loadeddata', () => { - if (audio.duration === 5.694694 || audio.duration === 5.720718) { - // Hardcoded values for invalid audio - reject(new Error('Could not retrieve audio')); - } else { - resolve(audio); - } - }); - audio.addEventListener('error', () => reject(audio.error)); - }); -} - -async function audioGetFromSources(expression, sources, optionsContext, willDownload, cache=null) { - const key = `${expression.expression}:${expression.reading}`; - if (cache !== null) { - const cacheValue = cache.get(expression); - if (typeof cacheValue !== 'undefined') { - return cacheValue; - } - } - - for (let i = 0, ii = sources.length; i < ii; ++i) { - const source = sources[i]; - const url = await apiAudioGetUrl(expression, source, optionsContext); - if (url === null) { - continue; - } - - try { - let audio = await audioGetFromUrl(url, willDownload); - if (willDownload) { - // AnkiConnect handles downloading URLs into cards - audio = null; - } - const result = {audio, url, source}; - if (cache !== null) { - cache.set(key, result); - } - return result; - } catch (e) { - // NOP - } - } - return {audio: null, url: null, source: null}; -} - -function audioGetTextToSpeechVoice(voiceURI) { - try { - for (const voice of speechSynthesis.getVoices()) { - if (voice.voiceURI === voiceURI) { - return voice; - } - } - } catch (e) { - // NOP - } - return null; -} - -function audioPrepareTextToSpeech(options) { - if ( - audioPrepareTextToSpeech.state || - !options.audio.textToSpeechVoice || - !( - options.audio.sources.includes('text-to-speech') || - options.audio.sources.includes('text-to-speech-reading') - ) - ) { - // Text-to-speech not in use. - return; - } - - // Chrome needs this value called once before it will become populated. - // The first call will return an empty list. - audioPrepareTextToSpeech.state = true; - try { - speechSynthesis.getVoices(); - } catch (e) { - // NOP - } -} -audioPrepareTextToSpeech.state = false; diff --git a/ext/mixed/js/display.js b/ext/mixed/js/display.js index 6a762a65..3fe8e684 100644 --- a/ext/mixed/js/display.js +++ b/ext/mixed/js/display.js @@ -18,9 +18,8 @@ /*global docRangeFromPoint, docSentenceExtract apiKanjiFind, apiTermsFind, apiNoteView, apiOptionsGet, apiDefinitionsAddable, apiDefinitionAdd -apiScreenshotGet, apiForward -audioPrepareTextToSpeech, audioGetFromSources -DisplayGenerator, WindowScroll, DisplayContext, DOM*/ +apiScreenshotGet, apiForward, apiAudioGetUrl +AudioSystem, DisplayGenerator, WindowScroll, DisplayContext, DOM*/ class Display { constructor(spinner, container) { @@ -32,7 +31,7 @@ class Display { this.index = 0; this.audioPlaying = null; this.audioFallback = null; - this.audioCache = new Map(); + this.audioSystem = new AudioSystem({getAudioUri: this._getAudioUri.bind(this)}); this.styleNode = null; this.eventListeners = new EventListenerCollection(); @@ -364,7 +363,6 @@ class Display { this.updateDocumentOptions(this.options); this.updateTheme(this.options.general.popupTheme); this.setCustomCss(this.options.general.customPopupCss); - audioPrepareTextToSpeech(this.options); } updateDocumentOptions(options) { @@ -775,16 +773,16 @@ class Display { } const sources = this.options.audio.sources; - let {audio, source} = await audioGetFromSources(expression, sources, this.getOptionsContext(), false, this.audioCache); - let info; - if (audio === null) { + let audio, source, info; + try { + ({audio, source} = await this.audioSystem.getDefinitionAudio(expression, sources)); + info = `From source ${1 + sources.indexOf(source)}: ${source}`; + } catch (e) { if (this.audioFallback === null) { this.audioFallback = new Audio('/mixed/mp3/button.mp3'); } audio = this.audioFallback; info = 'Could not find audio'; - } else { - info = `From source ${1 + sources.indexOf(source)}: ${source}`; } const button = this.audioButtonFindImage(entryIndex); @@ -918,4 +916,9 @@ class Display { const key = event.key; return (typeof key === 'string' ? (key.length === 1 ? key.toUpperCase() : key) : ''); } + + async _getAudioUri(definition, source) { + const optionsContext = this.getOptionsContext(); + return await apiAudioGetUrl(definition, source, optionsContext); + } }