Merge pull request #399 from toasted-nutbread/audio-system-refactor

Audio system refactor
This commit is contained in:
toasted-nutbread 2020-03-07 21:44:51 -05:00 committed by GitHub
commit d022d61b1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 236 additions and 217 deletions

View File

@ -39,7 +39,7 @@
<script src="/bg/js/request.js"></script> <script src="/bg/js/request.js"></script>
<script src="/bg/js/translator.js"></script> <script src="/bg/js/translator.js"></script>
<script src="/bg/js/util.js"></script> <script src="/bg/js/util.js"></script>
<script src="/mixed/js/audio.js"></script> <script src="/mixed/js/audio-system.js"></script>
<script src="/bg/js/backend.js"></script> <script src="/bg/js/backend.js"></script>
</body> </body>

View File

@ -21,10 +21,6 @@ function apiTemplateRender(template, data) {
return _apiInvoke('templateRender', {data, template}); return _apiInvoke('templateRender', {data, template});
} }
function apiAudioGetUrl(definition, source, optionsContext) {
return _apiInvoke('audioGetUrl', {definition, source, optionsContext});
}
function _apiInvoke(action, params={}) { function _apiInvoke(action, params={}) {
const data = {action, params}; const data = {action, params};
return new Promise((resolve, reject) => { return new Promise((resolve, reject) => {

View File

@ -16,7 +16,7 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
/*global jpIsStringEntirelyKana, audioGetFromSources*/ /*global jpIsStringEntirelyKana*/
const audioUrlBuilders = new Map([ const audioUrlBuilders = new Map([
['jpod101', async (definition) => { ['jpod101', async (definition) => {
@ -154,7 +154,7 @@ function audioBuildFilename(definition) {
return null; return null;
} }
async function audioInject(definition, fields, sources, optionsContext) { async function audioInject(definition, fields, sources, optionsContext, audioSystem) {
let usesAudio = false; let usesAudio = false;
for (const fieldValue of Object.values(fields)) { for (const fieldValue of Object.values(fields)) {
if (fieldValue.includes('{audio}')) { if (fieldValue.includes('{audio}')) {
@ -171,12 +171,10 @@ async function audioInject(definition, fields, sources, optionsContext) {
const expressions = definition.expressions; const expressions = definition.expressions;
const audioSourceDefinition = Array.isArray(expressions) ? expressions[0] : definition; const audioSourceDefinition = Array.isArray(expressions) ? expressions[0] : definition;
const {url} = await audioGetFromSources(audioSourceDefinition, sources, optionsContext, true); const {uri} = await audioSystem.getDefinitionAudio(audioSourceDefinition, sources, {tts: false, optionsContext});
if (url !== null) { const filename = audioBuildFilename(audioSourceDefinition);
const filename = audioBuildFilename(audioSourceDefinition); if (filename !== null) {
if (filename !== null) { definition.audio = {url: uri, filename};
definition.audio = {url, filename};
}
} }
return true; return true;

View File

@ -23,7 +23,7 @@ requestText, requestJson, optionsLoad
dictConfigured, dictTermsSort, dictEnabledSet, dictNoteFormat dictConfigured, dictTermsSort, dictEnabledSet, dictNoteFormat
audioGetUrl, audioInject audioGetUrl, audioInject
jpConvertReading, jpDistributeFuriganaInflected, jpKatakanaToHiragana jpConvertReading, jpDistributeFuriganaInflected, jpKatakanaToHiragana
Translator, AnkiConnect, AnkiNull, Mecab, BackendApiForwarder, JsonSchema, ClipboardMonitor*/ AudioSystem, Translator, AnkiConnect, AnkiNull, Mecab, BackendApiForwarder, JsonSchema, ClipboardMonitor*/
class Backend { class Backend {
constructor() { constructor() {
@ -34,6 +34,7 @@ class Backend {
this.options = null; this.options = null;
this.optionsSchema = null; this.optionsSchema = null;
this.defaultAnkiFieldTemplates = null; this.defaultAnkiFieldTemplates = null;
this.audioSystem = new AudioSystem({getAudioUri: this._getAudioUri.bind(this)});
this.optionsContext = { this.optionsContext = {
depth: 0, depth: 0,
url: window.location.href url: window.location.href
@ -436,7 +437,8 @@ class Backend {
definition, definition,
options.anki.terms.fields, options.anki.terms.fields,
options.audio.sources, options.audio.sources,
optionsContext optionsContext,
this.audioSystem
); );
} }
@ -762,6 +764,16 @@ class Backend {
// Utilities // Utilities
async _getAudioUri(definition, source, details) {
let optionsContext = (typeof details === 'object' && details !== null ? details.optionsContext : null);
if (!(typeof optionsContext === 'object' && optionsContext !== null)) {
optionsContext = this.optionsContext;
}
const options = this.getOptions(optionsContext);
return await audioGetUrl(definition, source, options);
}
async _injectScreenshot(definition, fields, screenshot) { async _injectScreenshot(definition, fields, screenshot) {
let usesScreenshot = false; let usesScreenshot = false;
for (const fieldValue of Object.values(fields)) { for (const fieldValue of Object.values(fields)) {

View File

@ -16,12 +16,20 @@
* along with this program. If not, see <https://www.gnu.org/licenses/>. * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/ */
/*global getOptionsContext, getOptionsMutable, settingsSaveOptions /*global getOptionsContext, getOptionsMutable, settingsSaveOptions, apiAudioGetUrl
AudioSourceUI, audioGetTextToSpeechVoice*/ AudioSystem, AudioSourceUI*/
let audioSourceUI = null; let audioSourceUI = null;
let audioSystem = null;
async function audioSettingsInitialize() { async function audioSettingsInitialize() {
audioSystem = new AudioSystem({
getAudioUri: async (definition, source) => {
const optionsContext = getOptionsContext();
return await apiAudioGetUrl(definition, source, optionsContext);
}
});
const optionsContext = getOptionsContext(); const optionsContext = getOptionsContext();
const options = await getOptionsMutable(optionsContext); const options = await getOptionsMutable(optionsContext);
audioSourceUI = new AudioSourceUI.Container( audioSourceUI = new AudioSourceUI.Container(
@ -100,16 +108,11 @@ function textToSpeechVoiceCompare(a, b) {
function textToSpeechTest() { function textToSpeechTest() {
try { try {
const text = document.querySelector('#text-to-speech-voice-test').dataset.speechText || ''; const text = document.querySelector('#text-to-speech-voice-test').dataset.speechText || '';
const voiceURI = document.querySelector('#text-to-speech-voice').value; const voiceUri = document.querySelector('#text-to-speech-voice').value;
const voice = audioGetTextToSpeechVoice(voiceURI);
if (voice === null) { return; }
const utterance = new SpeechSynthesisUtterance(text); const audio = audioSystem.createTextToSpeechAudio({text, voiceUri});
utterance.lang = 'ja-JP'; audio.volume = 1.0;
utterance.voice = voice; audio.play();
utterance.volume = 1.0;
speechSynthesis.speak(utterance);
} catch (e) { } catch (e) {
// NOP // NOP
} }

View File

@ -80,7 +80,7 @@
<script src="/bg/js/japanese.js"></script> <script src="/bg/js/japanese.js"></script>
<script src="/fg/js/document.js"></script> <script src="/fg/js/document.js"></script>
<script src="/fg/js/source.js"></script> <script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio.js"></script> <script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script> <script src="/mixed/js/display-context.js"></script>
<script src="/mixed/js/display.js"></script> <script src="/mixed/js/display.js"></script>
<script src="/mixed/js/display-generator.js"></script> <script src="/mixed/js/display-generator.js"></script>

View File

@ -1098,7 +1098,7 @@
<script src="/bg/js/page-exit-prevention.js"></script> <script src="/bg/js/page-exit-prevention.js"></script>
<script src="/bg/js/profile-conditions.js"></script> <script src="/bg/js/profile-conditions.js"></script>
<script src="/bg/js/util.js"></script> <script src="/bg/js/util.js"></script>
<script src="/mixed/js/audio.js"></script> <script src="/mixed/js/audio-system.js"></script>
<script src="/bg/js/settings/anki.js"></script> <script src="/bg/js/settings/anki.js"></script>
<script src="/bg/js/settings/anki-templates.js"></script> <script src="/bg/js/settings/anki-templates.js"></script>

View File

@ -46,7 +46,7 @@
<script src="/fg/js/document.js"></script> <script src="/fg/js/document.js"></script>
<script src="/fg/js/source.js"></script> <script src="/fg/js/source.js"></script>
<script src="/mixed/js/audio.js"></script> <script src="/mixed/js/audio-system.js"></script>
<script src="/mixed/js/display-context.js"></script> <script src="/mixed/js/display-context.js"></script>
<script src="/mixed/js/display.js"></script> <script src="/mixed/js/display.js"></script>
<script src="/mixed/js/display-generator.js"></script> <script src="/mixed/js/display-generator.js"></script>

View File

@ -0,0 +1,185 @@
/*
* Copyright (C) 2019-2020 Alex Yatskov <alex@foosoft.net>
* Author: Alex Yatskov <alex@foosoft.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
class TextToSpeechAudio {
constructor(text, voice) {
this.text = text;
this.voice = voice;
this._utterance = null;
this._volume = 1;
}
get currentTime() {
return 0;
}
set currentTime(value) {
// NOP
}
get volume() {
return this._volume;
}
set volume(value) {
this._volume = value;
if (this._utterance !== null) {
this._utterance.volume = value;
}
}
play() {
try {
if (this._utterance === null) {
this._utterance = new SpeechSynthesisUtterance(this.text || '');
this._utterance.lang = 'ja-JP';
this._utterance.volume = this._volume;
this._utterance.voice = this.voice;
}
speechSynthesis.cancel();
speechSynthesis.speak(this._utterance);
} catch (e) {
// NOP
}
}
pause() {
try {
speechSynthesis.cancel();
} catch (e) {
// NOP
}
}
}
class AudioSystem {
constructor({getAudioUri}) {
this._cache = new Map();
this._cacheSizeMaximum = 32;
this._getAudioUri = getAudioUri;
if (typeof speechSynthesis !== 'undefined') {
// speechSynthesis.getVoices() will not be populated unless some API call is made.
speechSynthesis.addEventListener('voiceschanged', this._onVoicesChanged.bind(this));
}
}
async getDefinitionAudio(definition, sources, details) {
const key = `${definition.expression}:${definition.reading}`;
const cacheValue = this._cache.get(definition);
if (typeof cacheValue !== 'undefined') {
const {audio, uri, source} = cacheValue;
return {audio, uri, source};
}
for (const source of sources) {
const uri = await this._getAudioUri(definition, source, details);
if (uri === null) { continue; }
try {
const audio = await this._createAudio(uri, details);
this._cacheCheck();
this._cache.set(key, {audio, uri, source});
return {audio, uri, source};
} catch (e) {
// NOP
}
}
throw new Error('Could not create audio');
}
createTextToSpeechAudio({text, voiceUri}) {
const voice = this._getTextToSpeechVoiceFromVoiceUri(voiceUri);
if (voice === null) {
throw new Error('Invalid text-to-speech voice');
}
return new TextToSpeechAudio(text, voice);
}
_onVoicesChanged() {
// NOP
}
async _createAudio(uri, details) {
const ttsParameters = this._getTextToSpeechParameters(uri);
if (ttsParameters !== null) {
if (typeof details === 'object' && details !== null) {
if (details.tts === false) {
throw new Error('Text-to-speech not permitted');
}
}
return this.createTextToSpeechAudio(ttsParameters);
}
return await this._createAudioFromUrl(uri);
}
_createAudioFromUrl(url) {
return new Promise((resolve, reject) => {
const audio = new Audio(url);
audio.addEventListener('loadeddata', () => {
const duration = audio.duration;
if (duration === 5.694694 || duration === 5.720718) {
// Hardcoded values for invalid audio
reject(new Error('Could not retrieve audio'));
} else {
resolve(audio);
}
});
audio.addEventListener('error', () => reject(audio.error));
});
}
_getTextToSpeechVoiceFromVoiceUri(voiceUri) {
try {
for (const voice of speechSynthesis.getVoices()) {
if (voice.voiceURI === voiceUri) {
return voice;
}
}
} catch (e) {
// NOP
}
return null;
}
_getTextToSpeechParameters(uri) {
const m = /^tts:[^#?]*\?([^#]*)/.exec(uri);
if (m === null) { return null; }
const searchParameters = new URLSearchParams(m[1]);
const text = searchParameters.get('text');
const voiceUri = searchParameters.get('voice');
return (text !== null && voiceUri !== null ? {text, voiceUri} : null);
}
_cacheCheck() {
const removeCount = this._cache.size - this._cacheSizeMaximum;
if (removeCount <= 0) { return; }
const removeKeys = [];
for (const key of this._cache.keys()) {
removeKeys.push(key);
if (removeKeys.length >= removeCount) { break; }
}
for (const key of removeKeys) {
this._cache.delete(key);
}
}
}

View File

@ -1,178 +0,0 @@
/*
* Copyright (C) 2019-2020 Alex Yatskov <alex@foosoft.net>
* Author: Alex Yatskov <alex@foosoft.net>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/*global apiAudioGetUrl*/
class TextToSpeechAudio {
constructor(text, voice) {
this.text = text;
this.voice = voice;
this._utterance = null;
this._volume = 1;
}
get currentTime() {
return 0;
}
set currentTime(value) {
// NOP
}
get volume() {
return this._volume;
}
set volume(value) {
this._volume = value;
if (this._utterance !== null) {
this._utterance.volume = value;
}
}
play() {
try {
if (this._utterance === null) {
this._utterance = new SpeechSynthesisUtterance(this.text || '');
this._utterance.lang = 'ja-JP';
this._utterance.volume = this._volume;
this._utterance.voice = this.voice;
}
speechSynthesis.cancel();
speechSynthesis.speak(this._utterance);
} catch (e) {
// NOP
}
}
pause() {
try {
speechSynthesis.cancel();
} catch (e) {
// NOP
}
}
static createFromUri(ttsUri) {
const m = /^tts:[^#?]*\?([^#]*)/.exec(ttsUri);
if (m === null) { return null; }
const searchParameters = new URLSearchParams(m[1]);
const text = searchParameters.get('text');
let voice = searchParameters.get('voice');
if (text === null || voice === null) { return null; }
voice = audioGetTextToSpeechVoice(voice);
if (voice === null) { return null; }
return new TextToSpeechAudio(text, voice);
}
}
function audioGetFromUrl(url, willDownload) {
const tts = TextToSpeechAudio.createFromUri(url);
if (tts !== null) {
if (willDownload) {
throw new Error('AnkiConnect does not support downloading text-to-speech audio.');
}
return Promise.resolve(tts);
}
return new Promise((resolve, reject) => {
const audio = new Audio(url);
audio.addEventListener('loadeddata', () => {
if (audio.duration === 5.694694 || audio.duration === 5.720718) {
// Hardcoded values for invalid audio
reject(new Error('Could not retrieve audio'));
} else {
resolve(audio);
}
});
audio.addEventListener('error', () => reject(audio.error));
});
}
async function audioGetFromSources(expression, sources, optionsContext, willDownload, cache=null) {
const key = `${expression.expression}:${expression.reading}`;
if (cache !== null) {
const cacheValue = cache.get(expression);
if (typeof cacheValue !== 'undefined') {
return cacheValue;
}
}
for (let i = 0, ii = sources.length; i < ii; ++i) {
const source = sources[i];
const url = await apiAudioGetUrl(expression, source, optionsContext);
if (url === null) {
continue;
}
try {
let audio = await audioGetFromUrl(url, willDownload);
if (willDownload) {
// AnkiConnect handles downloading URLs into cards
audio = null;
}
const result = {audio, url, source};
if (cache !== null) {
cache.set(key, result);
}
return result;
} catch (e) {
// NOP
}
}
return {audio: null, url: null, source: null};
}
function audioGetTextToSpeechVoice(voiceURI) {
try {
for (const voice of speechSynthesis.getVoices()) {
if (voice.voiceURI === voiceURI) {
return voice;
}
}
} catch (e) {
// NOP
}
return null;
}
function audioPrepareTextToSpeech(options) {
if (
audioPrepareTextToSpeech.state ||
!options.audio.textToSpeechVoice ||
!(
options.audio.sources.includes('text-to-speech') ||
options.audio.sources.includes('text-to-speech-reading')
)
) {
// Text-to-speech not in use.
return;
}
// Chrome needs this value called once before it will become populated.
// The first call will return an empty list.
audioPrepareTextToSpeech.state = true;
try {
speechSynthesis.getVoices();
} catch (e) {
// NOP
}
}
audioPrepareTextToSpeech.state = false;

View File

@ -18,9 +18,8 @@
/*global docRangeFromPoint, docSentenceExtract /*global docRangeFromPoint, docSentenceExtract
apiKanjiFind, apiTermsFind, apiNoteView, apiOptionsGet, apiDefinitionsAddable, apiDefinitionAdd apiKanjiFind, apiTermsFind, apiNoteView, apiOptionsGet, apiDefinitionsAddable, apiDefinitionAdd
apiScreenshotGet, apiForward apiScreenshotGet, apiForward, apiAudioGetUrl
audioPrepareTextToSpeech, audioGetFromSources AudioSystem, DisplayGenerator, WindowScroll, DisplayContext, DOM*/
DisplayGenerator, WindowScroll, DisplayContext, DOM*/
class Display { class Display {
constructor(spinner, container) { constructor(spinner, container) {
@ -32,7 +31,7 @@ class Display {
this.index = 0; this.index = 0;
this.audioPlaying = null; this.audioPlaying = null;
this.audioFallback = null; this.audioFallback = null;
this.audioCache = new Map(); this.audioSystem = new AudioSystem({getAudioUri: this._getAudioUri.bind(this)});
this.styleNode = null; this.styleNode = null;
this.eventListeners = new EventListenerCollection(); this.eventListeners = new EventListenerCollection();
@ -364,7 +363,6 @@ class Display {
this.updateDocumentOptions(this.options); this.updateDocumentOptions(this.options);
this.updateTheme(this.options.general.popupTheme); this.updateTheme(this.options.general.popupTheme);
this.setCustomCss(this.options.general.customPopupCss); this.setCustomCss(this.options.general.customPopupCss);
audioPrepareTextToSpeech(this.options);
} }
updateDocumentOptions(options) { updateDocumentOptions(options) {
@ -775,16 +773,16 @@ class Display {
} }
const sources = this.options.audio.sources; const sources = this.options.audio.sources;
let {audio, source} = await audioGetFromSources(expression, sources, this.getOptionsContext(), false, this.audioCache); let audio, source, info;
let info; try {
if (audio === null) { ({audio, source} = await this.audioSystem.getDefinitionAudio(expression, sources));
info = `From source ${1 + sources.indexOf(source)}: ${source}`;
} catch (e) {
if (this.audioFallback === null) { if (this.audioFallback === null) {
this.audioFallback = new Audio('/mixed/mp3/button.mp3'); this.audioFallback = new Audio('/mixed/mp3/button.mp3');
} }
audio = this.audioFallback; audio = this.audioFallback;
info = 'Could not find audio'; info = 'Could not find audio';
} else {
info = `From source ${1 + sources.indexOf(source)}: ${source}`;
} }
const button = this.audioButtonFindImage(entryIndex); const button = this.audioButtonFindImage(entryIndex);
@ -918,4 +916,9 @@ class Display {
const key = event.key; const key = event.key;
return (typeof key === 'string' ? (key.length === 1 ? key.toUpperCase() : key) : ''); return (typeof key === 'string' ? (key.length === 1 ? key.toUpperCase() : key) : '');
} }
async _getAudioUri(definition, source) {
const optionsContext = this.getOptionsContext();
return await apiAudioGetUrl(definition, source, optionsContext);
}
} }