1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
6 * @fileoverview Sends Text-To-Speech commands to Chrome's native TTS
11 goog.provide('cvox.TtsBackground');
13 goog.require('cvox.AbstractTts');
14 goog.require('cvox.ChromeTtsBase');
15 goog.require('cvox.ChromeVox');
16 goog.require('cvox.MathMap');
17 goog.require('goog.i18n.MessageFormat');
22 * @param {string} textString The string of text to be spoken.
23 * @param {Object} properties Speech properties to use for this utterance.
25 cvox.Utterance = function(textString, properties) {
26 this.textString = textString;
27 this.properties = properties;
28 this.id = cvox.Utterance.nextUtteranceId_++;
32 * The next utterance id to use.
36 cvox.Utterance.nextUtteranceId_ = 1;
40 * @param {boolean=} opt_enableMath Whether to process math. Used when running
41 * on forge. Defaults to true.
42 * @extends {cvox.ChromeTtsBase}
44 cvox.TtsBackground = function(opt_enableMath) {
45 opt_enableMath = opt_enableMath == undefined ? true : opt_enableMath;
48 this.ttsProperties['rate'] = (parseFloat(localStorage['rate']) ||
49 this.propertyDefault['rate']);
50 this.ttsProperties['pitch'] = (parseFloat(localStorage['pitch']) ||
51 this.propertyDefault['pitch']);
52 this.ttsProperties['volume'] = (parseFloat(localStorage['volume']) ||
53 this.propertyDefault['volume']);
55 // Use the current locale as the speech language if not otherwise
57 if (this.ttsProperties['lang'] == undefined) {
58 this.ttsProperties['lang'] =
59 chrome.i18n.getMessage('@@ui_locale').replace('_', '-');
62 this.lastEventType = 'end';
64 /** @private {number} */
65 this.currentPunctuationEcho_ =
66 parseInt(localStorage[cvox.AbstractTts.PUNCTUATION_ECHO] || 1, 10);
69 * @type {!Array.<{name:(string),
75 this.punctuationEchoes_ = [
77 * Punctuation echoed for the 'none' option.
81 msg: 'no_punctuation',
82 regexp: /[-$#"()*;:<>\n\\\/+='~`@_]/g,
87 * Punctuation echoed for the 'some' option.
91 msg: 'some_punctuation',
92 regexp: /[$#"*<>\\\/\{\}+=~`%]/g,
97 * Punctuation echoed for the 'all' option.
101 msg: 'all_punctuation',
102 regexp: /[-$#"()*;:<>\n\\\/\{\}\[\]+='~`!@_.,?%]/g,
108 * A list of punctuation characters that should always be spliced into output
109 * even with literal word substitutions.
110 * This is important for tts prosity.
111 * @type {!Array.<string>}
114 this.retainPunctuation_ =
115 [';', '?', '!', '\''];
118 * Mapping for math elements.
119 * @type {cvox.MathMap}
121 this.mathmap = opt_enableMath ? new cvox.MathMap() : null;
124 * The id of a callback returned from setTimeout.
125 * @type {number|undefined}
131 * @type {Object.<string, string>}
135 this.PHONETIC_MAP_ = /** @type {Object.<string, string>} */(
136 JSON.parse(cvox.ChromeVox.msgs.getMsg('phonetic_map')));
138 console.log('Error; unable to parse phonetic map msg.');
142 * Capturing tts event listeners.
143 * @type {Array.<cvox.TtsCapturingEventListener>}
146 this.capturingTtsEventListeners_ = [];
149 * The current utterance.
150 * @type {cvox.Utterance}
153 this.currentUtterance_ = null;
156 * The utterance queue.
157 * @type {Array.<cvox.Utterance>}
160 this.utteranceQueue_ = [];
162 // TODO(dtseng): Done while migrating away from using localStorage.
163 if (localStorage['voiceName']) {
164 chrome.storage.local.set({voiceName: localStorage['voiceName']});
165 delete localStorage['voiceName'];
168 window.speechSynthesis.onvoiceschanged = function() {
169 chrome.storage.local.get({voiceName: ''}, function(items) {
170 this.updateVoice_(items.voiceName);
174 chrome.storage.onChanged.addListener(function(changes, namespace) {
175 if (changes.voiceName) {
176 this.updateVoice_(changes.voiceName.newValue);
180 goog.inherits(cvox.TtsBackground, cvox.ChromeTtsBase);
184 * The amount of time to wait before speaking a phonetic word for a
190 cvox.TtsBackground.PHONETIC_DELAY_MS_ = 1000;
193 * The list of properties allowed to be passed to the chrome.tts.speak API.
194 * Anything outside this list will be stripped.
195 * @type {Array.<string>}
199 cvox.TtsBackground.ALLOWED_PROPERTIES_ = [
208 'requiredEventTypes',
214 cvox.TtsBackground.prototype.speak = function(
215 textString, queueMode, properties) {
216 goog.base(this, 'speak', textString, queueMode, properties);
221 if (queueMode === undefined) {
222 queueMode = cvox.AbstractTts.QUEUE_MODE_QUEUE;
225 // Chunk to improve responsiveness. Use a replace/split pattern in order to
226 // retain the original punctuation.
227 var splitTextString = textString.replace(/([-\n\r.,!?;])(\s)/g, '$1$2|');
228 splitTextString = splitTextString.split('|');
229 // Since we are substituting the chunk delimiters back into the string, only
230 // recurse when there are more than 2 split items. This should result in only
231 // one recursive call.
232 if (splitTextString.length > 2) {
233 var startCallback = properties['startCallback'];
234 var endCallback = properties['endCallback'];
235 for (var i = 0; i < splitTextString.length; i++) {
236 var propertiesCopy = {};
237 for (var p in properties) {
238 propertiesCopy[p] = properties[p];
240 propertiesCopy['startCallback'] = i == 0 ? startCallback : null;
241 propertiesCopy['endCallback'] =
242 i == (splitTextString.length - 1) ? endCallback : null;
243 this.speak(splitTextString[i], queueMode, propertiesCopy);
244 queueMode = cvox.AbstractTts.QUEUE_MODE_QUEUE;
249 textString = this.preprocess(textString, properties);
251 // TODO(dtseng): Google TTS has bad performance when speaking numbers. This
252 // pattern causes ChromeVox to read numbers as digits rather than words.
253 textString = this.getNumberAsDigits_(textString);
255 // TODO(dtseng): Google TTS flushes the queue when encountering strings of
256 // this pattern which stops ChromeVox speech.
257 if (!textString || !textString.match(/\w+/g)) {
258 // We still want to callback for listeners in our content script.
259 if (properties['startCallback']) {
261 properties['startCallback']();
265 if (properties['endCallback']) {
267 properties['endCallback']();
271 if (queueMode === cvox.AbstractTts.QUEUE_MODE_FLUSH) {
277 var mergedProperties = this.mergeProperties(properties);
279 if (this.currentVoice) {
280 mergedProperties['voiceName'] = this.currentVoice;
283 if (queueMode == cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH &&
284 !mergedProperties['category']) {
285 queueMode = cvox.AbstractTts.QUEUE_MODE_FLUSH;
288 var utterance = new cvox.Utterance(textString, mergedProperties);
289 this.speakUsingQueue_(utterance, queueMode);
293 * Use the speech queue to handle the given speech request.
294 * @param {cvox.Utterance} utterance The utterance to speak.
295 * @param {number} queueMode The queue mode.
298 cvox.TtsBackground.prototype.speakUsingQueue_ = function(utterance, queueMode) {
299 // First, take care of removing the current utterance and flushing
300 // anything from the queue we need to. If we remove the current utterance,
301 // make a note that we're going to stop speech.
302 if (queueMode == cvox.AbstractTts.QUEUE_MODE_FLUSH ||
303 queueMode == cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH) {
304 if (this.shouldCancel_(this.currentUtterance_, utterance, queueMode)) {
305 this.cancelUtterance_(this.currentUtterance_);
306 this.currentUtterance_ = null;
309 while (i < this.utteranceQueue_.length) {
310 if (this.shouldCancel_(
311 this.utteranceQueue_[i], utterance, queueMode)) {
312 this.cancelUtterance_(this.utteranceQueue_[i]);
313 this.utteranceQueue_.splice(i, 1);
320 // Next, add the new utterance to the queue.
321 this.utteranceQueue_.push(utterance);
323 // Now start speaking the next item in the queue.
324 this.startSpeakingNextItemInQueue_();
328 * If nothing is speaking, pop the first item off the speech queue and
329 * start speaking it. This is called when a speech request is made and
330 * when the current utterance finishes speaking.
333 cvox.TtsBackground.prototype.startSpeakingNextItemInQueue_ = function() {
334 if (this.currentUtterance_) {
338 if (this.utteranceQueue_.length == 0) {
342 // There is no voice to speak with (e.g. the tts system has not fully
344 if (!this.currentVoice) {
348 this.currentUtterance_ = this.utteranceQueue_.shift();
349 var utteranceId = this.currentUtterance_.id;
351 this.currentUtterance_.properties['onEvent'] = goog.bind(function(event) {
352 this.onTtsEvent_(event, utteranceId);
355 var validatedProperties = {};
356 for (var i = 0; i < cvox.TtsBackground.ALLOWED_PROPERTIES_.length; i++) {
357 var p = cvox.TtsBackground.ALLOWED_PROPERTIES_[i];
358 if (this.currentUtterance_.properties[p]) {
359 validatedProperties[p] = this.currentUtterance_.properties[p];
363 chrome.tts.speak(this.currentUtterance_.textString,
364 validatedProperties);
368 * Called when we get a speech event from Chrome. We ignore any event
369 * that doesn't pertain to the current utterance, but when speech starts
370 * or ends we optionally call callback functions, and start speaking the
371 * next utterance if there's another one enqueued.
372 * @param {Object} event The TTS event from chrome.
373 * @param {number} utteranceId The id of the associated utterance.
376 cvox.TtsBackground.prototype.onTtsEvent_ = function(event, utteranceId) {
377 this.lastEventType = event['type'];
379 // Ignore events sent on utterances other than the current one.
380 if (!this.currentUtterance_ ||
381 utteranceId != this.currentUtterance_.id) {
385 var utterance = this.currentUtterance_;
387 switch (event.type) {
389 this.capturingTtsEventListeners_.forEach(function(listener) {
390 listener.onTtsStart();
392 if (utterance.properties['startCallback']) {
394 utterance.properties['startCallback']();
400 this.capturingTtsEventListeners_.forEach(function(listener) {
403 // Intentionally falls through.
405 this.cancelUtterance_(utterance);
406 this.currentUtterance_ = null;
407 this.startSpeakingNextItemInQueue_();
410 this.onError_(event['errorMessage']);
411 this.startSpeakingNextItemInQueue_();
417 * Determines if |utteranceToCancel| should be canceled (interrupted if
418 * currently speaking, or removed from the queue if not), given the new
419 * utterance we want to speak and the queue mode. If the queue mode is
420 * QUEUE or FLUSH, the logic is straightforward. If the queue mode is
421 * CATEGORY_FLUSH, we only flush utterances with the same category.
423 * @param {cvox.Utterance} utteranceToCancel The utterance in question.
424 * @param {cvox.Utterance} newUtterance The new utterance we're enqueueing.
425 * @param {number} queueMode The queue mode.
426 * @return {boolean} True if this utterance should be canceled.
429 cvox.TtsBackground.prototype.shouldCancel_ =
430 function(utteranceToCancel, newUtterance, queueMode) {
431 if (!utteranceToCancel) {
434 if (utteranceToCancel.properties['doNotInterrupt']) {
438 case cvox.AbstractTts.QUEUE_MODE_QUEUE:
440 case cvox.AbstractTts.QUEUE_MODE_FLUSH:
442 case cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH:
443 return (utteranceToCancel.properties['category'] ==
444 newUtterance.properties['category']);
449 * Do any cleanup necessary to cancel an utterance, like callings its
450 * callback function if any.
451 * @param {cvox.Utterance} utterance The utterance to cancel.
454 cvox.TtsBackground.prototype.cancelUtterance_ = function(utterance) {
455 if (utterance && utterance.properties['endCallback']) {
457 utterance.properties['endCallback']();
464 cvox.TtsBackground.prototype.increaseOrDecreaseProperty =
465 function(propertyName, increase) {
466 goog.base(this, 'increaseOrDecreaseProperty', propertyName, increase);
467 localStorage[propertyName] = this.ttsProperties[propertyName];
471 cvox.TtsBackground.prototype.isSpeaking = function() {
472 goog.base(this, 'isSpeaking');
473 return this.lastEventType != 'end';
477 cvox.TtsBackground.prototype.stop = function() {
478 goog.base(this, 'stop');
480 this.cancelUtterance_(this.currentUtterance_);
481 this.currentUtterance_ = null;
483 for (var i = 0; i < this.utteranceQueue_.length; i++) {
484 this.cancelUtterance_(this.utteranceQueue_[i]);
486 this.utteranceQueue_.length = 0;
492 cvox.TtsBackground.prototype.addCapturingEventListener = function(listener) {
493 this.capturingTtsEventListeners_.push(listener);
497 * An error handler passed as a callback to chrome.tts.speak.
498 * @param {string} errorMessage Describes the error (set by onEvent).
501 cvox.TtsBackground.prototype.onError_ = function(errorMessage) {
502 this.updateVoice_(this.currentVoice);
506 * Converts an engine property value to a percentage from 0.00 to 1.00.
507 * @param {string} property The property to convert.
508 * @return {?number} The percentage of the property.
510 cvox.TtsBackground.prototype.propertyToPercentage = function(property) {
511 return (this.ttsProperties[property] - this.propertyMin[property]) /
512 Math.abs(this.propertyMax[property] - this.propertyMin[property]);
519 cvox.TtsBackground.prototype.preprocess = function(text, properties) {
520 properties = properties ? properties : {};
522 // Perform specialized processing, such as mathematics.
523 if (properties.math) {
524 text = this.preprocessMath_(text, properties.math);
527 // Perform generic processing.
528 text = goog.base(this, 'preprocess', text, properties);
530 // Perform any remaining processing such as punctuation expansion.
532 if (properties[cvox.AbstractTts.PUNCTUATION_ECHO]) {
533 for (var i = 0; pE = this.punctuationEchoes_[i]; i++) {
534 if (properties[cvox.AbstractTts.PUNCTUATION_ECHO] == pE.name) {
539 pE = this.punctuationEchoes_[this.currentPunctuationEcho_];
542 text.replace(pE.regexp, this.createPunctuationReplace_(pE.clear));
544 // Try pronouncing phonetically for single characters. Cancel previous calls
545 // to pronouncePhonetically_ if we fail to pronounce on this invokation or if
546 // this text is math which should never be pronounced phonetically.
547 if (properties.math ||
548 !properties['phoneticCharacters'] ||
549 !this.pronouncePhonetically_(text)) {
550 this.clearTimeout_();
553 // Try looking up in our unicode tables for a short description.
554 if (!properties.math && text.length == 1 && this.mathmap) {
555 text = this.mathmap.store.lookupString(
557 cvox.MathStore.createDynamicConstraint('default', 'short')) || text;
560 // Remove all whitespace from the beginning and end, and collapse all
561 // inner strings of whitespace to a single space.
562 text = text.replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, '');
569 * Method that cycles among the available punctuation echo levels.
570 * @return {string} The resulting punctuation level message id.
572 cvox.TtsBackground.prototype.cyclePunctuationEcho = function() {
573 this.currentPunctuationEcho_ =
574 (this.currentPunctuationEcho_ + 1) % this.punctuationEchoes_.length;
575 localStorage[cvox.AbstractTts.PUNCTUATION_ECHO] =
576 this.currentPunctuationEcho_;
577 return this.punctuationEchoes_[this.currentPunctuationEcho_].msg;
582 * Process a math expression into a string suitable for a speech engine.
583 * @param {string} text Text representing a math expression.
584 * @param {Object= } math Parameter containing information how to
585 * process the math expression.
586 * @return {string} The string with a spoken version of the math expression.
589 cvox.TtsBackground.prototype.preprocessMath_ = function(text, math) {
594 var dynamicCstr = cvox.MathStore.createDynamicConstraint(
595 math['domain'], math['style']);
596 result = this.mathmap.store.lookupString(text, dynamicCstr);
605 * Converts a number into space-separated digits.
606 * For numbers containing 4 or fewer digits, we return the original number.
607 * This ensures that numbers like 123,456 or 2011 are not "digitized" while
609 * @param {string} text The text to process.
610 * @return {string} A string with all numbers converted.
613 cvox.TtsBackground.prototype.getNumberAsDigits_ = function(text) {
614 return text.replace(/\d+/g, function(num) {
615 if (num.length <= 4) {
618 return num.split('').join(' ');
624 * Constructs a function for string.replace that handles description of a
625 * punctuation character.
626 * @param {boolean} clear Whether we want to use whitespace in place of match.
627 * @return {function(string): string} The replacement function.
630 cvox.TtsBackground.prototype.createPunctuationReplace_ = function(clear) {
631 return goog.bind(function(match) {
632 var retain = this.retainPunctuation_.indexOf(match) != -1 ?
634 return clear ? retain :
635 ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
636 cvox.AbstractTts.CHARACTER_DICTIONARY[match])))
637 .format({'COUNT': 1}) + retain + ' ';
643 * Pronounces single letters phonetically after some timeout.
644 * @param {string} text The text.
645 * @return {boolean} True if the text resulted in speech.
648 cvox.TtsBackground.prototype.pronouncePhonetically_ = function(text) {
649 if (!this.PHONETIC_MAP_) {
652 text = text.toLowerCase();
653 text = this.PHONETIC_MAP_[text];
655 this.clearTimeout_();
657 this.timeoutId_ = setTimeout(function() {
659 }, cvox.TtsBackground.PHONETIC_DELAY_MS_);
667 * Clears the last timeout set via setTimeout.
670 cvox.TtsBackground.prototype.clearTimeout_ = function() {
671 if (goog.isDef(this.timeoutId_)) {
672 clearTimeout(this.timeoutId_);
673 this.timeoutId_ = undefined;
679 * Update the current voice used to speak based upon values in storage. If that
680 * does not succeed, fallback to use system locale when picking a voice.
681 * @param {string} voiceName Voice name to set.
684 cvox.TtsBackground.prototype.updateVoice_ = function(voiceName) {
685 chrome.tts.getVoices(
686 goog.bind(function(voices) {
687 for (var i = 0, v; v = voices[i]; i++) {
688 if (v['voiceName'] == voiceName) {
689 this.currentVoice = v['voiceName'];
690 this.startSpeakingNextItemInQueue_();
696 chrome.i18n.getMessage('@@ui_locale').replace('_', '-');
697 voices.sort(function(v1, v2) {
698 if (v1['remote'] && !v2['remote']) {
701 if (!v1['remote'] && v2['remote']) {
704 if (v1['lang'] == currentLocale && v2['lang'] != currentLocale) {
707 if (v1['lang'] != currentLocale && v2['lang'] == currentLocale) {
713 this.currentVoice = voices[0].voiceName;
714 this.startSpeakingNextItemInQueue_();