From 05b602d0ce62ca340b8d4754f99cbc83f438dac3 Mon Sep 17 00:00:00 2001 From: Cheng Zhao Date: Tue, 7 Oct 2014 21:18:44 +0800 Subject: [PATCH] Import Chrome's tts code --- atom.gyp | 14 + atom/common/common_message_generator.h | 1 + atom/renderer/atom_renderer_client.cc | 5 + atom/renderer/atom_renderer_client.h | 2 + .../chrome/browser/speech/tts_controller.h | 343 +++++++++++++ .../browser/speech/tts_controller_impl.cc | 464 ++++++++++++++++++ .../browser/speech/tts_controller_impl.h | 104 ++++ chromium_src/chrome/browser/speech/tts_mac.mm | 352 +++++++++++++ .../browser/speech/tts_message_filter.cc | 176 +++++++ .../browser/speech/tts_message_filter.h | 64 +++ .../chrome/browser/speech/tts_platform.cc | 28 ++ .../chrome/browser/speech/tts_platform.h | 81 +++ chromium_src/chrome/browser/speech/tts_win.cc | 257 ++++++++++ chromium_src/chrome/common/tts_messages.h | 69 +++ .../chrome/common/tts_utterance_request.cc | 30 ++ .../chrome/common/tts_utterance_request.h | 44 ++ .../chrome/renderer/tts_dispatcher.cc | 200 ++++++++ chromium_src/chrome/renderer/tts_dispatcher.h | 78 +++ 18 files changed, 2312 insertions(+) create mode 100644 chromium_src/chrome/browser/speech/tts_controller.h create mode 100644 chromium_src/chrome/browser/speech/tts_controller_impl.cc create mode 100644 chromium_src/chrome/browser/speech/tts_controller_impl.h create mode 100644 chromium_src/chrome/browser/speech/tts_mac.mm create mode 100644 chromium_src/chrome/browser/speech/tts_message_filter.cc create mode 100644 chromium_src/chrome/browser/speech/tts_message_filter.h create mode 100644 chromium_src/chrome/browser/speech/tts_platform.cc create mode 100644 chromium_src/chrome/browser/speech/tts_platform.h create mode 100644 chromium_src/chrome/browser/speech/tts_win.cc create mode 100644 chromium_src/chrome/common/tts_messages.h create mode 100644 chromium_src/chrome/common/tts_utterance_request.cc create mode 100644 chromium_src/chrome/common/tts_utterance_request.h create mode 100644 chromium_src/chrome/renderer/tts_dispatcher.cc create mode 100644 chromium_src/chrome/renderer/tts_dispatcher.h diff --git a/atom.gyp b/atom.gyp index b98b5e2b64..14ad0a8e82 100644 --- a/atom.gyp +++ b/atom.gyp @@ -279,6 +279,15 @@ 'chromium_src/chrome/browser/printing/printing_message_filter.h', 'chromium_src/chrome/browser/printing/printing_ui_web_contents_observer.cc', 'chromium_src/chrome/browser/printing/printing_ui_web_contents_observer.h', + 'chromium_src/chrome/browser/speech/tts_controller.h', + 'chromium_src/chrome/browser/speech/tts_controller_impl.cc', + 'chromium_src/chrome/browser/speech/tts_controller_impl.h', + 'chromium_src/chrome/browser/speech/tts_mac.mm', + 'chromium_src/chrome/browser/speech/tts_message_filter.cc', + 'chromium_src/chrome/browser/speech/tts_message_filter.h', + 'chromium_src/chrome/browser/speech/tts_platform.cc', + 'chromium_src/chrome/browser/speech/tts_platform.h', + 'chromium_src/chrome/browser/speech/tts_win.cc', 'chromium_src/chrome/browser/ui/libgtk2ui/app_indicator_icon_menu.cc', 'chromium_src/chrome/browser/ui/libgtk2ui/app_indicator_icon_menu.h', 'chromium_src/chrome/browser/ui/libgtk2ui/gtk2_status_icon.cc', @@ -289,11 +298,16 @@ 'chromium_src/chrome/browser/ui/views/status_icons/status_tray_state_changer_win.h', 'chromium_src/chrome/common/print_messages.cc', 'chromium_src/chrome/common/print_messages.h', + 'chromium_src/chrome/common/tts_messages.h', + 'chromium_src/chrome/common/tts_utterance_request.cc', + 'chromium_src/chrome/common/tts_utterance_request.h', 'chromium_src/chrome/renderer/printing/print_web_view_helper.cc', 'chromium_src/chrome/renderer/printing/print_web_view_helper_linux.cc', 'chromium_src/chrome/renderer/printing/print_web_view_helper_mac.mm', 'chromium_src/chrome/renderer/printing/print_web_view_helper_win.cc', 'chromium_src/chrome/renderer/printing/print_web_view_helper.h', + 'chromium_src/chrome/renderer/tts_dispatcher.cc', + 'chromium_src/chrome/renderer/tts_dispatcher.h', '<@(native_mate_files)', ], 'framework_sources': [ diff --git a/atom/common/common_message_generator.h b/atom/common/common_message_generator.h index b6e8240b3b..2f80cfcd49 100644 --- a/atom/common/common_message_generator.h +++ b/atom/common/common_message_generator.h @@ -6,3 +6,4 @@ #include "atom/common/api/api_messages.h" #include "chrome/common/print_messages.h" +#include "chrome/common/tts_messages.h" diff --git a/atom/renderer/atom_renderer_client.cc b/atom/renderer/atom_renderer_client.cc index e344cf11c9..e32eef7964 100644 --- a/atom/renderer/atom_renderer_client.cc +++ b/atom/renderer/atom_renderer_client.cc @@ -128,6 +128,11 @@ void AtomRendererClient::RenderViewCreated(content::RenderView* render_view) { new AtomRenderViewObserver(render_view, this); } +blink::WebSpeechSynthesizer* AtomRendererClient::OverrideSpeechSynthesizer( + blink::WebSpeechSynthesizerClient* client) { + return NULL; +} + void AtomRendererClient::DidCreateScriptContext(blink::WebFrame* frame, v8::Handle context, int extension_group, diff --git a/atom/renderer/atom_renderer_client.h b/atom/renderer/atom_renderer_client.h index ca76147ddf..f1c3b79a02 100644 --- a/atom/renderer/atom_renderer_client.h +++ b/atom/renderer/atom_renderer_client.h @@ -50,6 +50,8 @@ class AtomRendererClient : public content::ContentRendererClient, virtual void RenderThreadStarted() OVERRIDE; virtual void RenderFrameCreated(content::RenderFrame* render_frame) OVERRIDE; virtual void RenderViewCreated(content::RenderView*) OVERRIDE; + virtual blink::WebSpeechSynthesizer* OverrideSpeechSynthesizer( + blink::WebSpeechSynthesizerClient* client); virtual void DidCreateScriptContext(blink::WebFrame* frame, v8::Handle context, int extension_group, diff --git a/chromium_src/chrome/browser/speech/tts_controller.h b/chromium_src/chrome/browser/speech/tts_controller.h new file mode 100644 index 0000000000..3c40b9e547 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_controller.h @@ -0,0 +1,343 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ +#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ + +#include +#include +#include +#include + +#include "base/memory/scoped_ptr.h" +#include "base/memory/singleton.h" +#include "base/memory/weak_ptr.h" +#include "url/gurl.h" + +class Utterance; +class TtsPlatformImpl; + +namespace base { +class Value; +} + +namespace content { +class BrowserContext; +} + +// Events sent back from the TTS engine indicating the progress. +enum TtsEventType { + TTS_EVENT_START, + TTS_EVENT_END, + TTS_EVENT_WORD, + TTS_EVENT_SENTENCE, + TTS_EVENT_MARKER, + TTS_EVENT_INTERRUPTED, + TTS_EVENT_CANCELLED, + TTS_EVENT_ERROR, + TTS_EVENT_PAUSE, + TTS_EVENT_RESUME +}; + +enum TtsGenderType { + TTS_GENDER_NONE, + TTS_GENDER_MALE, + TTS_GENDER_FEMALE +}; + +// Returns true if this event type is one that indicates an utterance +// is finished and can be destroyed. +bool IsFinalTtsEventType(TtsEventType event_type); + +// The continuous parameters that apply to a given utterance. +struct UtteranceContinuousParameters { + UtteranceContinuousParameters(); + + double rate; + double pitch; + double volume; +}; + +// Information about one voice. +struct VoiceData { + VoiceData(); + ~VoiceData(); + + std::string name; + std::string lang; + TtsGenderType gender; + std::string extension_id; + std::set events; + + // If true, the synthesis engine is a remote network resource. + // It may be higher latency and may incur bandwidth costs. + bool remote; + + // If true, this is implemented by this platform's subclass of + // TtsPlatformImpl. If false, this is implemented by an extension. + bool native; + std::string native_voice_identifier; +}; + +// Interface that delegates TTS requests to user-installed extensions. +class TtsEngineDelegate { + public: + virtual ~TtsEngineDelegate() {} + + // Return a list of all available voices registered. + virtual void GetVoices(content::BrowserContext* browser_context, + std::vector* out_voices) = 0; + + // Speak the given utterance by sending an event to the given TTS engine. + virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0; + + // Stop speaking the given utterance by sending an event to the target + // associated with this utterance. + virtual void Stop(Utterance* utterance) = 0; + + // Pause in the middle of speaking this utterance. + virtual void Pause(Utterance* utterance) = 0; + + // Resume speaking this utterance. + virtual void Resume(Utterance* utterance) = 0; + + // Load the built-in component extension for ChromeOS. + virtual bool LoadBuiltInTtsExtension( + content::BrowserContext* browser_context) = 0; +}; + +// Class that wants to receive events on utterances. +class UtteranceEventDelegate { + public: + virtual ~UtteranceEventDelegate() {} + virtual void OnTtsEvent(Utterance* utterance, + TtsEventType event_type, + int char_index, + const std::string& error_message) = 0; +}; + +// Class that wants to be notified when the set of +// voices has changed. +class VoicesChangedDelegate { + public: + virtual ~VoicesChangedDelegate() {} + virtual void OnVoicesChanged() = 0; +}; + +// One speech utterance. +class Utterance { + public: + // Construct an utterance given a profile and a completion task to call + // when the utterance is done speaking. Before speaking this utterance, + // its other parameters like text, rate, pitch, etc. should all be set. + explicit Utterance(content::BrowserContext* browser_context); + ~Utterance(); + + // Sends an event to the delegate. If the event type is TTS_EVENT_END + // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1, + // uses the last good value. + void OnTtsEvent(TtsEventType event_type, + int char_index, + const std::string& error_message); + + // Finish an utterance without sending an event to the delegate. + void Finish(); + + // Getters and setters for the text to speak and other speech options. + void set_text(const std::string& text) { text_ = text; } + const std::string& text() const { return text_; } + + void set_options(const base::Value* options); + const base::Value* options() const { return options_.get(); } + + void set_src_extension_id(const std::string& src_extension_id) { + src_extension_id_ = src_extension_id; + } + const std::string& src_extension_id() { return src_extension_id_; } + + void set_src_id(int src_id) { src_id_ = src_id; } + int src_id() { return src_id_; } + + void set_src_url(const GURL& src_url) { src_url_ = src_url; } + const GURL& src_url() { return src_url_; } + + void set_voice_name(const std::string& voice_name) { + voice_name_ = voice_name; + } + const std::string& voice_name() const { return voice_name_; } + + void set_lang(const std::string& lang) { + lang_ = lang; + } + const std::string& lang() const { return lang_; } + + void set_gender(TtsGenderType gender) { + gender_ = gender; + } + TtsGenderType gender() const { return gender_; } + + void set_continuous_parameters(const UtteranceContinuousParameters& params) { + continuous_parameters_ = params; + } + const UtteranceContinuousParameters& continuous_parameters() { + return continuous_parameters_; + } + + void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; } + bool can_enqueue() const { return can_enqueue_; } + + void set_required_event_types(const std::set& types) { + required_event_types_ = types; + } + const std::set& required_event_types() const { + return required_event_types_; + } + + void set_desired_event_types(const std::set& types) { + desired_event_types_ = types; + } + const std::set& desired_event_types() const { + return desired_event_types_; + } + + const std::string& extension_id() const { return extension_id_; } + void set_extension_id(const std::string& extension_id) { + extension_id_ = extension_id; + } + + UtteranceEventDelegate* event_delegate() const { + return event_delegate_.get(); + } + void set_event_delegate( + base::WeakPtr event_delegate) { + event_delegate_ = event_delegate; + } + + // Getters and setters for internal state. + content::BrowserContext* browser_context() const { return browser_context_; } + int id() const { return id_; } + bool finished() const { return finished_; } + + private: + // The BrowserContext that initiated this utterance. + content::BrowserContext* browser_context_; + + // The extension ID of the extension providing TTS for this utterance, or + // empty if native TTS is being used. + std::string extension_id_; + + // The unique ID of this utterance, used to associate callback functions + // with utterances. + int id_; + + // The id of the next utterance, so we can associate requests with + // responses. + static int next_utterance_id_; + + // The text to speak. + std::string text_; + + // The full options arg passed to tts.speak, which may include fields + // other than the ones we explicitly parse, below. + scoped_ptr options_; + + // The extension ID of the extension that called speak() and should + // receive events. + std::string src_extension_id_; + + // The source extension's ID of this utterance, so that it can associate + // events with the appropriate callback. + int src_id_; + + // The URL of the page where the source extension called speak. + GURL src_url_; + + // The delegate to be called when an utterance event is fired. + base::WeakPtr event_delegate_; + + // The parsed options. + std::string voice_name_; + std::string lang_; + TtsGenderType gender_; + UtteranceContinuousParameters continuous_parameters_; + bool can_enqueue_; + std::set required_event_types_; + std::set desired_event_types_; + + // The index of the current char being spoken. + int char_index_; + + // True if this utterance received an event indicating it's done. + bool finished_; +}; + +// Singleton class that manages text-to-speech for the TTS and TTS engine +// extension APIs, maintaining a queue of pending utterances and keeping +// track of all state. +class TtsController { + public: + // Get the single instance of this class. + static TtsController* GetInstance(); + + // Returns true if we're currently speaking an utterance. + virtual bool IsSpeaking() = 0; + + // Speak the given utterance. If the utterance's can_enqueue flag is true + // and another utterance is in progress, adds it to the end of the queue. + // Otherwise, interrupts any current utterance and speaks this one + // immediately. + virtual void SpeakOrEnqueue(Utterance* utterance) = 0; + + // Stop all utterances and flush the queue. Implies leaving pause mode + // as well. + virtual void Stop() = 0; + + // Pause the speech queue. Some engines may support pausing in the middle + // of an utterance. + virtual void Pause() = 0; + + // Resume speaking. + virtual void Resume() = 0; + + // Handle events received from the speech engine. Events are forwarded to + // the callback function, and in addition, completion and error events + // trigger finishing the current utterance and starting the next one, if + // any. + virtual void OnTtsEvent(int utterance_id, + TtsEventType event_type, + int char_index, + const std::string& error_message) = 0; + + // Return a list of all available voices, including the native voice, + // if supported, and all voices registered by extensions. + virtual void GetVoices(content::BrowserContext* browser_context, + std::vector* out_voices) = 0; + + // Called by the extension system or platform implementation when the + // list of voices may have changed and should be re-queried. + virtual void VoicesChanged() = 0; + + // Add a delegate that wants to be notified when the set of voices changes. + virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0; + + // Remove delegate that wants to be notified when the set of voices changes. + virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0; + + // Set the delegate that processes TTS requests with user-installed + // extensions. + virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0; + + // Get the delegate that processes TTS requests with user-installed + // extensions. + virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0; + + // For unit testing. + virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0; + virtual int QueueSize() = 0; + + protected: + virtual ~TtsController() {} +}; + +#endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_ \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_controller_impl.cc b/chromium_src/chrome/browser/speech/tts_controller_impl.cc new file mode 100644 index 0000000000..272cafddb9 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_controller_impl.cc @@ -0,0 +1,464 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/speech/tts_controller_impl.h" + +#include +#include + +#include "base/float_util.h" +#include "base/values.h" +#include "chrome/browser/browser_process.h" +#include "chrome/browser/speech/tts_platform.h" + +namespace { +// A value to be used to indicate that there is no char index available. +const int kInvalidCharIndex = -1; + +// Given a language/region code of the form 'fr-FR', returns just the basic +// language portion, e.g. 'fr'. +std::string TrimLanguageCode(std::string lang) { + if (lang.size() >= 5 && lang[2] == '-') + return lang.substr(0, 2); + else + return lang; +} + +} // namespace + +bool IsFinalTtsEventType(TtsEventType event_type) { + return (event_type == TTS_EVENT_END || + event_type == TTS_EVENT_INTERRUPTED || + event_type == TTS_EVENT_CANCELLED || + event_type == TTS_EVENT_ERROR); +} + +// +// UtteranceContinuousParameters +// + + +UtteranceContinuousParameters::UtteranceContinuousParameters() + : rate(-1), + pitch(-1), + volume(-1) {} + + +// +// VoiceData +// + + +VoiceData::VoiceData() + : gender(TTS_GENDER_NONE), + remote(false), + native(false) {} + +VoiceData::~VoiceData() {} + + +// +// Utterance +// + +// static +int Utterance::next_utterance_id_ = 0; + +Utterance::Utterance(content::BrowserContext* browser_context) + : browser_context_(browser_context), + id_(next_utterance_id_++), + src_id_(-1), + gender_(TTS_GENDER_NONE), + can_enqueue_(false), + char_index_(0), + finished_(false) { + options_.reset(new base::DictionaryValue()); +} + +Utterance::~Utterance() { + DCHECK(finished_); +} + +void Utterance::OnTtsEvent(TtsEventType event_type, + int char_index, + const std::string& error_message) { + if (char_index >= 0) + char_index_ = char_index; + if (IsFinalTtsEventType(event_type)) + finished_ = true; + + if (event_delegate_) + event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); + if (finished_) + event_delegate_.reset(); +} + +void Utterance::Finish() { + finished_ = true; +} + +void Utterance::set_options(const base::Value* options) { + options_.reset(options->DeepCopy()); +} + +TtsController* TtsController::GetInstance() { + return TtsControllerImpl::GetInstance(); +} + +// +// TtsControllerImpl +// + +// static +TtsControllerImpl* TtsControllerImpl::GetInstance() { + return Singleton::get(); +} + +TtsControllerImpl::TtsControllerImpl() + : current_utterance_(NULL), + paused_(false), + platform_impl_(NULL), + tts_engine_delegate_(NULL) { +} + +TtsControllerImpl::~TtsControllerImpl() { + if (current_utterance_) { + current_utterance_->Finish(); + delete current_utterance_; + } + + // Clear any queued utterances too. + ClearUtteranceQueue(false); // Don't sent events. +} + +void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) { + // If we're paused and we get an utterance that can't be queued, + // flush the queue but stay in the paused state. + if (paused_ && !utterance->can_enqueue()) { + Stop(); + paused_ = true; + delete utterance; + return; + } + + if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { + utterance_queue_.push(utterance); + } else { + Stop(); + SpeakNow(utterance); + } +} + +void TtsControllerImpl::SpeakNow(Utterance* utterance) { + // Ensure we have all built-in voices loaded. This is a no-op if already + // loaded. + bool loaded_built_in = + GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context()); + + // Get all available voices and try to find a matching voice. + std::vector voices; + GetVoices(utterance->browser_context(), &voices); + int index = GetMatchingVoice(utterance, voices); + + VoiceData voice; + if (index != -1) { + // Select the matching voice. + voice = voices[index]; + } else { + // However, if no match was found on a platform without native tts voices, + // attempt to get a voice based only on the current locale without respect + // to any supplied voice names. + std::vector native_voices; + + if (GetPlatformImpl()->PlatformImplAvailable()) + GetPlatformImpl()->GetVoices(&native_voices); + + if (native_voices.empty() && !voices.empty()) { + // TODO(dtseng): Notify extension caller of an error. + utterance->set_voice_name(""); + // TODO(gaochun): Replace the global variable g_browser_process with + // GetContentClient()->browser() to eliminate the dependency of browser + // once TTS implementation was moved to content. + utterance->set_lang(g_browser_process->GetApplicationLocale()); + index = GetMatchingVoice(utterance, voices); + + // If even that fails, just take the first available voice. + if (index == -1) + index = 0; + voice = voices[index]; + } else { + // Otherwise, simply give native voices a chance to handle this utterance. + voice.native = true; + } + } + + GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice); + + if (!voice.native) { +#if !defined(OS_ANDROID) + DCHECK(!voice.extension_id.empty()); + current_utterance_ = utterance; + utterance->set_extension_id(voice.extension_id); + if (tts_engine_delegate_) + tts_engine_delegate_->Speak(utterance, voice); + bool sends_end_event = + voice.events.find(TTS_EVENT_END) != voice.events.end(); + if (!sends_end_event) { + utterance->Finish(); + delete utterance; + current_utterance_ = NULL; + SpeakNextUtterance(); + } +#endif + } else { + // It's possible for certain platforms to send start events immediately + // during |speak|. + current_utterance_ = utterance; + GetPlatformImpl()->clear_error(); + bool success = GetPlatformImpl()->Speak( + utterance->id(), + utterance->text(), + utterance->lang(), + voice, + utterance->continuous_parameters()); + if (!success) + current_utterance_ = NULL; + + // If the native voice wasn't able to process this speech, see if + // the browser has built-in TTS that isn't loaded yet. + if (!success && loaded_built_in) { + utterance_queue_.push(utterance); + return; + } + + if (!success) { + utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, + GetPlatformImpl()->error()); + delete utterance; + return; + } + } +} + +void TtsControllerImpl::Stop() { + paused_ = false; + if (current_utterance_ && !current_utterance_->extension_id().empty()) { +#if !defined(OS_ANDROID) + if (tts_engine_delegate_) + tts_engine_delegate_->Stop(current_utterance_); +#endif + } else { + GetPlatformImpl()->clear_error(); + GetPlatformImpl()->StopSpeaking(); + } + + if (current_utterance_) + current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, + std::string()); + FinishCurrentUtterance(); + ClearUtteranceQueue(true); // Send events. +} + +void TtsControllerImpl::Pause() { + paused_ = true; + if (current_utterance_ && !current_utterance_->extension_id().empty()) { +#if !defined(OS_ANDROID) + if (tts_engine_delegate_) + tts_engine_delegate_->Pause(current_utterance_); +#endif + } else if (current_utterance_) { + GetPlatformImpl()->clear_error(); + GetPlatformImpl()->Pause(); + } +} + +void TtsControllerImpl::Resume() { + paused_ = false; + if (current_utterance_ && !current_utterance_->extension_id().empty()) { +#if !defined(OS_ANDROID) + if (tts_engine_delegate_) + tts_engine_delegate_->Resume(current_utterance_); +#endif + } else if (current_utterance_) { + GetPlatformImpl()->clear_error(); + GetPlatformImpl()->Resume(); + } else { + SpeakNextUtterance(); + } +} + +void TtsControllerImpl::OnTtsEvent(int utterance_id, + TtsEventType event_type, + int char_index, + const std::string& error_message) { + // We may sometimes receive completion callbacks "late", after we've + // already finished the utterance (for example because another utterance + // interrupted or we got a call to Stop). This is normal and we can + // safely just ignore these events. + if (!current_utterance_ || utterance_id != current_utterance_->id()) { + return; + } + current_utterance_->OnTtsEvent(event_type, char_index, error_message); + if (current_utterance_->finished()) { + FinishCurrentUtterance(); + SpeakNextUtterance(); + } +} + +void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context, + std::vector* out_voices) { +#if !defined(OS_ANDROID) + if (browser_context && tts_engine_delegate_) + tts_engine_delegate_->GetVoices(browser_context, out_voices); +#endif + + TtsPlatformImpl* platform_impl = GetPlatformImpl(); + if (platform_impl) { + // Ensure we have all built-in voices loaded. This is a no-op if already + // loaded. + platform_impl->LoadBuiltInTtsExtension(browser_context); + if (platform_impl->PlatformImplAvailable()) + platform_impl->GetVoices(out_voices); + } +} + +bool TtsControllerImpl::IsSpeaking() { + return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); +} + +void TtsControllerImpl::FinishCurrentUtterance() { + if (current_utterance_) { + if (!current_utterance_->finished()) + current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, + std::string()); + delete current_utterance_; + current_utterance_ = NULL; + } +} + +void TtsControllerImpl::SpeakNextUtterance() { + if (paused_) + return; + + // Start speaking the next utterance in the queue. Keep trying in case + // one fails but there are still more in the queue to try. + while (!utterance_queue_.empty() && !current_utterance_) { + Utterance* utterance = utterance_queue_.front(); + utterance_queue_.pop(); + SpeakNow(utterance); + } +} + +void TtsControllerImpl::ClearUtteranceQueue(bool send_events) { + while (!utterance_queue_.empty()) { + Utterance* utterance = utterance_queue_.front(); + utterance_queue_.pop(); + if (send_events) + utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, + std::string()); + else + utterance->Finish(); + delete utterance; + } +} + +void TtsControllerImpl::SetPlatformImpl( + TtsPlatformImpl* platform_impl) { + platform_impl_ = platform_impl; +} + +int TtsControllerImpl::QueueSize() { + return static_cast(utterance_queue_.size()); +} + +TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() { + if (!platform_impl_) + platform_impl_ = TtsPlatformImpl::GetInstance(); + return platform_impl_; +} + +int TtsControllerImpl::GetMatchingVoice( + const Utterance* utterance, std::vector& voices) { + // Make two passes: the first time, do strict language matching + // ('fr-FR' does not match 'fr-CA'). The second time, do prefix + // language matching ('fr-FR' matches 'fr' and 'fr-CA') + for (int pass = 0; pass < 2; ++pass) { + for (size_t i = 0; i < voices.size(); ++i) { + const VoiceData& voice = voices[i]; + + if (!utterance->extension_id().empty() && + utterance->extension_id() != voice.extension_id) { + continue; + } + + if (!voice.name.empty() && + !utterance->voice_name().empty() && + voice.name != utterance->voice_name()) { + continue; + } + if (!voice.lang.empty() && !utterance->lang().empty()) { + std::string voice_lang = voice.lang; + std::string utterance_lang = utterance->lang(); + if (pass == 1) { + voice_lang = TrimLanguageCode(voice_lang); + utterance_lang = TrimLanguageCode(utterance_lang); + } + if (voice_lang != utterance_lang) { + continue; + } + } + if (voice.gender != TTS_GENDER_NONE && + utterance->gender() != TTS_GENDER_NONE && + voice.gender != utterance->gender()) { + continue; + } + + if (utterance->required_event_types().size() > 0) { + bool has_all_required_event_types = true; + for (std::set::const_iterator iter = + utterance->required_event_types().begin(); + iter != utterance->required_event_types().end(); + ++iter) { + if (voice.events.find(*iter) == voice.events.end()) { + has_all_required_event_types = false; + break; + } + } + if (!has_all_required_event_types) + continue; + } + + return static_cast(i); + } + } + + return -1; +} + +void TtsControllerImpl::VoicesChanged() { + for (std::set::iterator iter = + voices_changed_delegates_.begin(); + iter != voices_changed_delegates_.end(); ++iter) { + (*iter)->OnVoicesChanged(); + } +} + +void TtsControllerImpl::AddVoicesChangedDelegate( + VoicesChangedDelegate* delegate) { + voices_changed_delegates_.insert(delegate); +} + +void TtsControllerImpl::RemoveVoicesChangedDelegate( + VoicesChangedDelegate* delegate) { + voices_changed_delegates_.erase(delegate); +} + +void TtsControllerImpl::SetTtsEngineDelegate( + TtsEngineDelegate* delegate) { + tts_engine_delegate_ = delegate; +} + +TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() { + return tts_engine_delegate_; +} \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_controller_impl.h b/chromium_src/chrome/browser/speech/tts_controller_impl.h new file mode 100644 index 0000000000..f6ddf584cc --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_controller_impl.h @@ -0,0 +1,104 @@ +// Copyright 2014 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_ +#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_ + +#include +#include +#include +#include + +#include "base/memory/scoped_ptr.h" +#include "base/memory/singleton.h" +#include "base/memory/weak_ptr.h" +#include "chrome/browser/speech/tts_controller.h" +#include "url/gurl.h" + +namespace content { +class BrowserContext; +} + +// Singleton class that manages text-to-speech for the TTS and TTS engine +// extension APIs, maintaining a queue of pending utterances and keeping +// track of all state. +class TtsControllerImpl : public TtsController { + public: + // Get the single instance of this class. + static TtsControllerImpl* GetInstance(); + + // TtsController methods + virtual bool IsSpeaking() OVERRIDE; + virtual void SpeakOrEnqueue(Utterance* utterance) OVERRIDE; + virtual void Stop() OVERRIDE; + virtual void Pause() OVERRIDE; + virtual void Resume() OVERRIDE; + virtual void OnTtsEvent(int utterance_id, + TtsEventType event_type, + int char_index, + const std::string& error_message) OVERRIDE; + virtual void GetVoices(content::BrowserContext* browser_context, + std::vector* out_voices) OVERRIDE; + virtual void VoicesChanged() OVERRIDE; + virtual void AddVoicesChangedDelegate( + VoicesChangedDelegate* delegate) OVERRIDE; + virtual void RemoveVoicesChangedDelegate( + VoicesChangedDelegate* delegate) OVERRIDE; + virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) OVERRIDE; + virtual TtsEngineDelegate* GetTtsEngineDelegate() OVERRIDE; + virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) OVERRIDE; + virtual int QueueSize() OVERRIDE; + + protected: + TtsControllerImpl(); + virtual ~TtsControllerImpl(); + + private: + // Get the platform TTS implementation (or injected mock). + TtsPlatformImpl* GetPlatformImpl(); + + // Start speaking the given utterance. Will either take ownership of + // |utterance| or delete it if there's an error. Returns true on success. + void SpeakNow(Utterance* utterance); + + // Clear the utterance queue. If send_events is true, will send + // TTS_EVENT_CANCELLED events on each one. + void ClearUtteranceQueue(bool send_events); + + // Finalize and delete the current utterance. + void FinishCurrentUtterance(); + + // Start speaking the next utterance in the queue. + void SpeakNextUtterance(); + + // Given an utterance and a vector of voices, return the + // index of the voice that best matches the utterance. + int GetMatchingVoice(const Utterance* utterance, + std::vector& voices); + + friend struct DefaultSingletonTraits; + + // The current utterance being spoken. + Utterance* current_utterance_; + + // Whether the queue is paused or not. + bool paused_; + + // A queue of utterances to speak after the current one finishes. + std::queue utterance_queue_; + + // A set of delegates that want to be notified when the voices change. + std::set voices_changed_delegates_; + + // A pointer to the platform implementation of text-to-speech, for + // dependency injection. + TtsPlatformImpl* platform_impl_; + + // The delegate that processes TTS requests with user-installed extensions. + TtsEngineDelegate* tts_engine_delegate_; + + DISALLOW_COPY_AND_ASSIGN(TtsControllerImpl); +}; + +#endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_IMPL_H_ \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_mac.mm b/chromium_src/chrome/browser/speech/tts_mac.mm new file mode 100644 index 0000000000..08786fe1e3 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_mac.mm @@ -0,0 +1,352 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "base/mac/scoped_nsobject.h" +#include "base/memory/singleton.h" +#include "base/strings/sys_string_conversions.h" +#include "base/values.h" +#include "chrome/browser/speech/tts_controller.h" +#include "chrome/browser/speech/tts_platform.h" + +#import + +class TtsPlatformImplMac; + +@interface ChromeTtsDelegate : NSObject { + @private + TtsPlatformImplMac* ttsImplMac_; // weak. +} + +- (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac; + +@end + +// Subclass of NSSpeechSynthesizer that takes an utterance +// string on initialization, retains it and only allows it +// to be spoken once. +// +// We construct a new NSSpeechSynthesizer for each utterance, for +// two reasons: +// 1. To associate delegate callbacks with a particular utterance, +// without assuming anything undocumented about the protocol. +// 2. To work around http://openradar.appspot.com/radar?id=2854403, +// where Nuance voices don't retain the utterance string and +// crash when trying to call willSpeakWord. +@interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer { + @private + base::scoped_nsobject utterance_; + bool didSpeak_; +} + +- (id)initWithUtterance:(NSString*)utterance; +- (bool)startSpeakingRetainedUtterance; +- (bool)startSpeakingString:(NSString*)utterance; + +@end + +class TtsPlatformImplMac : public TtsPlatformImpl { + public: + virtual bool PlatformImplAvailable() OVERRIDE { + return true; + } + + virtual bool Speak( + int utterance_id, + const std::string& utterance, + const std::string& lang, + const VoiceData& voice, + const UtteranceContinuousParameters& params) OVERRIDE; + + virtual bool StopSpeaking() OVERRIDE; + + virtual void Pause() OVERRIDE; + + virtual void Resume() OVERRIDE; + + virtual bool IsSpeaking() OVERRIDE; + + virtual void GetVoices(std::vector* out_voices) OVERRIDE; + + // Called by ChromeTtsDelegate when we get a callback from the + // native speech engine. + void OnSpeechEvent(NSSpeechSynthesizer* sender, + TtsEventType event_type, + int char_index, + const std::string& error_message); + + // Get the single instance of this class. + static TtsPlatformImplMac* GetInstance(); + + private: + TtsPlatformImplMac(); + virtual ~TtsPlatformImplMac(); + + base::scoped_nsobject speech_synthesizer_; + base::scoped_nsobject delegate_; + int utterance_id_; + std::string utterance_; + int last_char_index_; + bool paused_; + + friend struct DefaultSingletonTraits; + + DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac); +}; + +// static +TtsPlatformImpl* TtsPlatformImpl::GetInstance() { + return TtsPlatformImplMac::GetInstance(); +} + +bool TtsPlatformImplMac::Speak( + int utterance_id, + const std::string& utterance, + const std::string& lang, + const VoiceData& voice, + const UtteranceContinuousParameters& params) { + // TODO: convert SSML to SAPI xml. http://crbug.com/88072 + utterance_ = utterance; + paused_ = false; + + NSString* utterance_nsstring = + [NSString stringWithUTF8String:utterance_.c_str()]; + + // Deliberately construct a new speech synthesizer every time Speak is + // called, otherwise there's no way to know whether calls to the delegate + // apply to the current utterance or a previous utterance. In + // experimentation, the overhead of constructing and destructing a + // NSSpeechSynthesizer is minimal. + speech_synthesizer_.reset( + [[SingleUseSpeechSynthesizer alloc] + initWithUtterance:utterance_nsstring]); + [speech_synthesizer_ setDelegate:delegate_]; + + if (!voice.native_voice_identifier.empty()) { + NSString* native_voice_identifier = + [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()]; + [speech_synthesizer_ setVoice:native_voice_identifier]; + } + + utterance_id_ = utterance_id; + + // TODO: support languages other than the default: crbug.com/88059 + + if (params.rate >= 0.0) { + // The TTS api defines rate via words per minute. Let 200 be the default. + [speech_synthesizer_ + setObject:[NSNumber numberWithInt:params.rate * 200] + forProperty:NSSpeechRateProperty error:nil]; + } + + if (params.pitch >= 0.0) { + // The input is a float from 0.0 to 2.0, with 1.0 being the default. + // Get the default pitch for this voice and modulate it by 50% - 150%. + NSError* errorCode; + NSNumber* defaultPitchObj = + [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty + error:&errorCode]; + int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48; + int newPitch = static_cast(defaultPitch * (0.5 * params.pitch + 0.5)); + [speech_synthesizer_ + setObject:[NSNumber numberWithInt:newPitch] + forProperty:NSSpeechPitchBaseProperty error:nil]; + } + + if (params.volume >= 0.0) { + [speech_synthesizer_ + setObject: [NSNumber numberWithFloat:params.volume] + forProperty:NSSpeechVolumeProperty error:nil]; + } + + bool success = [speech_synthesizer_ startSpeakingRetainedUtterance]; + if (success) { + TtsController* controller = TtsController::GetInstance(); + controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, ""); + } + return success; +} + +bool TtsPlatformImplMac::StopSpeaking() { + if (speech_synthesizer_.get()) { + [speech_synthesizer_ stopSpeaking]; + speech_synthesizer_.reset(nil); + } + paused_ = false; + return true; +} + +void TtsPlatformImplMac::Pause() { + if (speech_synthesizer_.get() && utterance_id_ && !paused_) { + [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary]; + paused_ = true; + TtsController::GetInstance()->OnTtsEvent( + utterance_id_, TTS_EVENT_PAUSE, last_char_index_, ""); + } +} + +void TtsPlatformImplMac::Resume() { + if (speech_synthesizer_.get() && utterance_id_ && paused_) { + [speech_synthesizer_ continueSpeaking]; + paused_ = false; + TtsController::GetInstance()->OnTtsEvent( + utterance_id_, TTS_EVENT_RESUME, last_char_index_, ""); + } +} + +bool TtsPlatformImplMac::IsSpeaking() { + if (speech_synthesizer_) + return [speech_synthesizer_ isSpeaking]; + return false; +} + +void TtsPlatformImplMac::GetVoices(std::vector* outVoices) { + NSArray* voices = [NSSpeechSynthesizer availableVoices]; + + // Create a new temporary array of the available voices with + // the default voice first. + NSMutableArray* orderedVoices = + [NSMutableArray arrayWithCapacity:[voices count]]; + NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice]; + if (defaultVoice) { + [orderedVoices addObject:defaultVoice]; + } + for (NSString* voiceIdentifier in voices) { + if (![voiceIdentifier isEqualToString:defaultVoice]) + [orderedVoices addObject:voiceIdentifier]; + } + + for (NSString* voiceIdentifier in orderedVoices) { + outVoices->push_back(VoiceData()); + VoiceData& data = outVoices->back(); + + NSDictionary* attributes = + [NSSpeechSynthesizer attributesForVoice:voiceIdentifier]; + NSString* name = [attributes objectForKey:NSVoiceName]; + NSString* gender = [attributes objectForKey:NSVoiceGender]; + NSString* localeIdentifier = + [attributes objectForKey:NSVoiceLocaleIdentifier]; + + data.native = true; + data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier); + data.name = base::SysNSStringToUTF8(name); + + NSDictionary* localeComponents = + [NSLocale componentsFromLocaleIdentifier:localeIdentifier]; + NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode]; + NSString* country = [localeComponents objectForKey:NSLocaleCountryCode]; + if (language && country) { + data.lang = + [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String]; + } else { + data.lang = base::SysNSStringToUTF8(language); + } + if ([gender isEqualToString:NSVoiceGenderMale]) + data.gender = TTS_GENDER_MALE; + else if ([gender isEqualToString:NSVoiceGenderFemale]) + data.gender = TTS_GENDER_FEMALE; + else + data.gender = TTS_GENDER_NONE; + data.events.insert(TTS_EVENT_START); + data.events.insert(TTS_EVENT_END); + data.events.insert(TTS_EVENT_WORD); + data.events.insert(TTS_EVENT_ERROR); + data.events.insert(TTS_EVENT_CANCELLED); + data.events.insert(TTS_EVENT_INTERRUPTED); + data.events.insert(TTS_EVENT_PAUSE); + data.events.insert(TTS_EVENT_RESUME); + } +} + +void TtsPlatformImplMac::OnSpeechEvent( + NSSpeechSynthesizer* sender, + TtsEventType event_type, + int char_index, + const std::string& error_message) { + // Don't send events from an utterance that's already completed. + // This depends on the fact that we construct a new NSSpeechSynthesizer + // each time we call Speak. + if (sender != speech_synthesizer_.get()) + return; + + if (event_type == TTS_EVENT_END) + char_index = utterance_.size(); + TtsController* controller = TtsController::GetInstance(); +controller->OnTtsEvent( + utterance_id_, event_type, char_index, error_message); + last_char_index_ = char_index; +} + +TtsPlatformImplMac::TtsPlatformImplMac() { + utterance_id_ = -1; + paused_ = false; + + delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]); +} + +TtsPlatformImplMac::~TtsPlatformImplMac() { +} + +// static +TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() { + return Singleton::get(); +} + +@implementation ChromeTtsDelegate + +- (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac { + if ((self = [super init])) { + ttsImplMac_ = ttsImplMac; + } + return self; +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender + didFinishSpeaking:(BOOL)finished_speaking { + ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, ""); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender + willSpeakWord:(NSRange)character_range + ofString:(NSString*)string { + ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD, + character_range.location, ""); +} + +- (void)speechSynthesizer:(NSSpeechSynthesizer*)sender + didEncounterErrorAtIndex:(NSUInteger)character_index + ofString:(NSString*)string + message:(NSString*)message { + std::string message_utf8 = base::SysNSStringToUTF8(message); + ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index, + message_utf8); +} + +@end + +@implementation SingleUseSpeechSynthesizer + +- (id)initWithUtterance:(NSString*)utterance { + self = [super init]; + if (self) { + utterance_.reset([utterance retain]); + didSpeak_ = false; + } + return self; +} + +- (bool)startSpeakingRetainedUtterance { + CHECK(!didSpeak_); + CHECK(utterance_); + didSpeak_ = true; + return [super startSpeakingString:utterance_]; +} + +- (bool)startSpeakingString:(NSString*)utterance { + CHECK(false); + return false; +} + +@end diff --git a/chromium_src/chrome/browser/speech/tts_message_filter.cc b/chromium_src/chrome/browser/speech/tts_message_filter.cc new file mode 100644 index 0000000000..66e4c40bd5 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_message_filter.cc @@ -0,0 +1,176 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/speech/tts_message_filter.h" + +#include "base/bind.h" +#include "base/logging.h" +#include "content/public/browser/browser_context.h" +#include "content/public/browser/render_process_host.h" + +using content::BrowserThread; + +TtsMessageFilter::TtsMessageFilter(int render_process_id, + content::BrowserContext* browser_context) + : BrowserMessageFilter(TtsMsgStart), + render_process_id_(render_process_id), + browser_context_(browser_context), + weak_ptr_factory_(this) { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + TtsController::GetInstance()->AddVoicesChangedDelegate(this); + + // Balanced in OnChannelClosingInUIThread() to keep the ref-count be non-zero + // until all WeakPtr's are invalidated. + AddRef(); +} + +void TtsMessageFilter::OverrideThreadForMessage( + const IPC::Message& message, BrowserThread::ID* thread) { + switch (message.type()) { + case TtsHostMsg_InitializeVoiceList::ID: + case TtsHostMsg_Speak::ID: + case TtsHostMsg_Pause::ID: + case TtsHostMsg_Resume::ID: + case TtsHostMsg_Cancel::ID: + *thread = BrowserThread::UI; + break; + } +} + +bool TtsMessageFilter::OnMessageReceived(const IPC::Message& message) { + bool handled = true; + IPC_BEGIN_MESSAGE_MAP(TtsMessageFilter, message) + IPC_MESSAGE_HANDLER(TtsHostMsg_InitializeVoiceList, OnInitializeVoiceList) + IPC_MESSAGE_HANDLER(TtsHostMsg_Speak, OnSpeak) + IPC_MESSAGE_HANDLER(TtsHostMsg_Pause, OnPause) + IPC_MESSAGE_HANDLER(TtsHostMsg_Resume, OnResume) + IPC_MESSAGE_HANDLER(TtsHostMsg_Cancel, OnCancel) + IPC_MESSAGE_UNHANDLED(handled = false) + IPC_END_MESSAGE_MAP() + return handled; +} + +void TtsMessageFilter::OnChannelClosing() { + BrowserThread::PostTask( + BrowserThread::UI, FROM_HERE, + base::Bind(&TtsMessageFilter::OnChannelClosingInUIThread, this)); +} + +void TtsMessageFilter::OnDestruct() const { + BrowserThread::DeleteOnUIThread::Destruct(this); +} + +void TtsMessageFilter::OnInitializeVoiceList() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + TtsController* tts_controller = TtsController::GetInstance(); + std::vector voices; + tts_controller->GetVoices(browser_context_, &voices); + + std::vector out_voices; + out_voices.resize(voices.size()); + for (size_t i = 0; i < voices.size(); ++i) { + TtsVoice& out_voice = out_voices[i]; + out_voice.voice_uri = voices[i].name; + out_voice.name = voices[i].name; + out_voice.lang = voices[i].lang; + out_voice.local_service = !voices[i].remote; + out_voice.is_default = (i == 0); + } + Send(new TtsMsg_SetVoiceList(out_voices)); +} + +void TtsMessageFilter::OnSpeak(const TtsUtteranceRequest& request) { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + + scoped_ptr utterance(new Utterance(browser_context_)); + utterance->set_src_id(request.id); + utterance->set_text(request.text); + utterance->set_lang(request.lang); + utterance->set_voice_name(request.voice); + utterance->set_can_enqueue(true); + + UtteranceContinuousParameters params; + params.rate = request.rate; + params.pitch = request.pitch; + params.volume = request.volume; + utterance->set_continuous_parameters(params); + + utterance->set_event_delegate(weak_ptr_factory_.GetWeakPtr()); + + TtsController::GetInstance()->SpeakOrEnqueue(utterance.release()); +} + +void TtsMessageFilter::OnPause() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + TtsController::GetInstance()->Pause(); +} + +void TtsMessageFilter::OnResume() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + TtsController::GetInstance()->Resume(); +} + +void TtsMessageFilter::OnCancel() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + TtsController::GetInstance()->Stop(); +} + +void TtsMessageFilter::OnTtsEvent(Utterance* utterance, + TtsEventType event_type, + int char_index, + const std::string& error_message) { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + switch (event_type) { + case TTS_EVENT_START: + Send(new TtsMsg_DidStartSpeaking(utterance->src_id())); + break; + case TTS_EVENT_END: + Send(new TtsMsg_DidFinishSpeaking(utterance->src_id())); + break; + case TTS_EVENT_WORD: + Send(new TtsMsg_WordBoundary(utterance->src_id(), char_index)); + break; + case TTS_EVENT_SENTENCE: + Send(new TtsMsg_SentenceBoundary(utterance->src_id(), char_index)); + break; + case TTS_EVENT_MARKER: + Send(new TtsMsg_MarkerEvent(utterance->src_id(), char_index)); + break; + case TTS_EVENT_INTERRUPTED: + Send(new TtsMsg_WasInterrupted(utterance->src_id())); + break; + case TTS_EVENT_CANCELLED: + Send(new TtsMsg_WasCancelled(utterance->src_id())); + break; + case TTS_EVENT_ERROR: + Send(new TtsMsg_SpeakingErrorOccurred( + utterance->src_id(), error_message)); + break; + case TTS_EVENT_PAUSE: + Send(new TtsMsg_DidPauseSpeaking(utterance->src_id())); + break; + case TTS_EVENT_RESUME: + Send(new TtsMsg_DidResumeSpeaking(utterance->src_id())); + break; + } +} + +void TtsMessageFilter::OnVoicesChanged() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + OnInitializeVoiceList(); +} + +void TtsMessageFilter::OnChannelClosingInUIThread() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + TtsController::GetInstance()->RemoveVoicesChangedDelegate(this); + + weak_ptr_factory_.InvalidateWeakPtrs(); + Release(); // Balanced in TtsMessageFilter(). +} + +TtsMessageFilter::~TtsMessageFilter() { + CHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); + DCHECK(!weak_ptr_factory_.HasWeakPtrs()); + TtsController::GetInstance()->RemoveVoicesChangedDelegate(this); +} \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_message_filter.h b/chromium_src/chrome/browser/speech/tts_message_filter.h new file mode 100644 index 0000000000..ba3f98b331 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_message_filter.h @@ -0,0 +1,64 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SPEECH_TTS_MESSAGE_FILTER_H_ +#define CHROME_BROWSER_SPEECH_TTS_MESSAGE_FILTER_H_ + +#include "base/memory/weak_ptr.h" +#include "chrome/browser/speech/tts_controller.h" +#include "chrome/common/tts_messages.h" +#include "content/public/browser/browser_message_filter.h" + +namespace content { +class BrowserContext; +} + +class TtsMessageFilter + : public content::BrowserMessageFilter, + public UtteranceEventDelegate, + public VoicesChangedDelegate { + public: + explicit TtsMessageFilter(int render_process_id, + content::BrowserContext* browser_context); + + // content::BrowserMessageFilter implementation. + virtual void OverrideThreadForMessage( + const IPC::Message& message, + content::BrowserThread::ID* thread) OVERRIDE; + virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; + virtual void OnChannelClosing() OVERRIDE; + virtual void OnDestruct() const OVERRIDE; + + // UtteranceEventDelegate implementation. + virtual void OnTtsEvent(Utterance* utterance, + TtsEventType event_type, + int char_index, + const std::string& error_message) OVERRIDE; + + // VoicesChangedDelegate implementation. + virtual void OnVoicesChanged() OVERRIDE; + + private: + friend class content::BrowserThread; + friend class base::DeleteHelper; + + virtual ~TtsMessageFilter(); + + void OnInitializeVoiceList(); + void OnSpeak(const TtsUtteranceRequest& utterance); + void OnPause(); + void OnResume(); + void OnCancel(); + + void OnChannelClosingInUIThread(); + + int render_process_id_; + content::BrowserContext* browser_context_; + + base::WeakPtrFactory weak_ptr_factory_; + + DISALLOW_COPY_AND_ASSIGN(TtsMessageFilter); +}; + +#endif // CHROME_BROWSER_SPEECH_TTS_MESSAGE_FILTER_H_ \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_platform.cc b/chromium_src/chrome/browser/speech/tts_platform.cc new file mode 100644 index 0000000000..220e005f18 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_platform.cc @@ -0,0 +1,28 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/browser/speech/tts_platform.h" + +#include + +bool TtsPlatformImpl::LoadBuiltInTtsExtension( + content::BrowserContext* browser_context) { + return false; +} + +std::string TtsPlatformImpl::error() { + return error_; +} + +void TtsPlatformImpl::clear_error() { + error_ = std::string(); +} + +void TtsPlatformImpl::set_error(const std::string& error) { + error_ = error; +} + +void TtsPlatformImpl::WillSpeakUtteranceWithVoice(const Utterance* utterance, + const VoiceData& voice_data) { +} \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_platform.h b/chromium_src/chrome/browser/speech/tts_platform.h new file mode 100644 index 0000000000..f33eab1c18 --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_platform.h @@ -0,0 +1,81 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_BROWSER_SPEECH_TTS_PLATFORM_H_ +#define CHROME_BROWSER_SPEECH_TTS_PLATFORM_H_ + +#include + +#include "chrome/browser/speech/tts_controller.h" + +// Abstract class that defines the native platform TTS interface, +// subclassed by specific implementations on Win, Mac, etc. +class TtsPlatformImpl { + public: + static TtsPlatformImpl* GetInstance(); + + // Returns true if this platform implementation is supported and available. + virtual bool PlatformImplAvailable() = 0; + + // Some platforms may provide a built-in TTS extension. Returns true + // if the extension was not previously loaded and is now loading, and + // false if it's already loaded or if there's no extension to load. + // Will call TtsController::RetrySpeakingQueuedUtterances when + // the extension finishes loading. + virtual bool LoadBuiltInTtsExtension( + content::BrowserContext* browser_context); + + // Speak the given utterance with the given parameters if possible, + // and return true on success. Utterance will always be nonempty. + // If rate, pitch, or volume are -1.0, they will be ignored. + // + // The TtsController will only try to speak one utterance at + // a time. If it wants to interrupt speech, it will always call Stop + // before speaking again. + virtual bool Speak( + int utterance_id, + const std::string& utterance, + const std::string& lang, + const VoiceData& voice, + const UtteranceContinuousParameters& params) = 0; + + // Stop speaking immediately and return true on success. + virtual bool StopSpeaking() = 0; + + // Returns whether any speech is on going. + virtual bool IsSpeaking() = 0; + + // Append information about voices provided by this platform implementation + // to |out_voices|. + virtual void GetVoices(std::vector* out_voices) = 0; + + // Pause the current utterance, if any, until a call to Resume, + // Speak, or StopSpeaking. + virtual void Pause() = 0; + + // Resume speaking the current utterance, if it was paused. + virtual void Resume() = 0; + + // Allows the platform to monitor speech commands and the voices used + // for each one. + virtual void WillSpeakUtteranceWithVoice(const Utterance* utterance, + const VoiceData& voice_data); + + virtual std::string error(); + virtual void clear_error(); + virtual void set_error(const std::string& error); + + protected: + TtsPlatformImpl() {} + + // On some platforms this may be a leaky singleton - do not rely on the + // destructor being called! http://crbug.com/122026 + virtual ~TtsPlatformImpl() {} + + std::string error_; + + DISALLOW_COPY_AND_ASSIGN(TtsPlatformImpl); +}; + +#endif // CHROME_BROWSER_SPEECH_TTS_PLATFORM_H_ \ No newline at end of file diff --git a/chromium_src/chrome/browser/speech/tts_win.cc b/chromium_src/chrome/browser/speech/tts_win.cc new file mode 100644 index 0000000000..9b9ce2584c --- /dev/null +++ b/chromium_src/chrome/browser/speech/tts_win.cc @@ -0,0 +1,257 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "base/memory/singleton.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/utf_string_conversions.h" +#include "base/values.h" +#include "base/win/scoped_comptr.h" +#include "chrome/browser/speech/tts_controller.h" +#include "chrome/browser/speech/tts_platform.h" + +class TtsPlatformImplWin : public TtsPlatformImpl { + public: + virtual bool PlatformImplAvailable() { + return true; + } + + virtual bool Speak( + int utterance_id, + const std::string& utterance, + const std::string& lang, + const VoiceData& voice, + const UtteranceContinuousParameters& params); + + virtual bool StopSpeaking(); + + virtual void Pause(); + + virtual void Resume(); + + virtual bool IsSpeaking(); + + virtual void GetVoices(std::vector* out_voices) OVERRIDE; + + // Get the single instance of this class. + static TtsPlatformImplWin* GetInstance(); + + static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param); + + private: + TtsPlatformImplWin(); + virtual ~TtsPlatformImplWin() {} + + void OnSpeechEvent(); + + base::win::ScopedComPtr speech_synthesizer_; + + // These apply to the current utterance only. + std::wstring utterance_; + int utterance_id_; + int prefix_len_; + ULONG stream_number_; + int char_position_; + bool paused_; + + friend struct DefaultSingletonTraits; + + DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin); +}; + +// static +TtsPlatformImpl* TtsPlatformImpl::GetInstance() { + return TtsPlatformImplWin::GetInstance(); +} + +bool TtsPlatformImplWin::Speak( + int utterance_id, + const std::string& src_utterance, + const std::string& lang, + const VoiceData& voice, + const UtteranceContinuousParameters& params) { + std::wstring prefix; + std::wstring suffix; + + if (!speech_synthesizer_.get()) + return false; + + // TODO(dmazzoni): support languages other than the default: crbug.com/88059 + + if (params.rate >= 0.0) { + // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's + // linear range of -10 to 10: + // 0.1 -> -10 + // 1.0 -> 0 + // 10.0 -> 10 + speech_synthesizer_->SetRate(static_cast(10 * log10(params.rate))); + } + + if (params.pitch >= 0.0) { + // The TTS api allows a range of -10 to 10 for speech pitch. + // TODO(dtseng): cleanup if we ever use any other properties that + // require xml. + std::wstring pitch_value = + base::IntToString16(static_cast(params.pitch * 10 - 10)); + prefix = L""; + suffix = L""; + } + + if (params.volume >= 0.0) { + // The TTS api allows a range of 0 to 100 for speech volume. + speech_synthesizer_->SetVolume(static_cast(params.volume * 100)); + } + + // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072 + + utterance_ = base::UTF8ToWide(src_utterance); + utterance_id_ = utterance_id; + char_position_ = 0; + std::wstring merged_utterance = prefix + utterance_ + suffix; + prefix_len_ = prefix.size(); + + HRESULT result = speech_synthesizer_->Speak( + merged_utterance.c_str(), + SPF_ASYNC, + &stream_number_); + return (result == S_OK); +} + +bool TtsPlatformImplWin::StopSpeaking() { + if (speech_synthesizer_.get()) { + // Clear the stream number so that any further events relating to this + // utterance are ignored. + stream_number_ = 0; + + if (IsSpeaking()) { + // Stop speech by speaking the empty string with the purge flag. + speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL); + } + if (paused_) { + speech_synthesizer_->Resume(); + paused_ = false; + } + } + return true; +} + +void TtsPlatformImplWin::Pause() { + if (speech_synthesizer_.get() && utterance_id_ && !paused_) { + speech_synthesizer_->Pause(); + paused_ = true; + TtsController::GetInstance()->OnTtsEvent( + utterance_id_, TTS_EVENT_PAUSE, char_position_, ""); + } +} + +void TtsPlatformImplWin::Resume() { + if (speech_synthesizer_.get() && utterance_id_ && paused_) { + speech_synthesizer_->Resume(); + paused_ = false; + TtsController::GetInstance()->OnTtsEvent( + utterance_id_, TTS_EVENT_RESUME, char_position_, ""); + } +} + +bool TtsPlatformImplWin::IsSpeaking() { + if (speech_synthesizer_.get()) { + SPVOICESTATUS status; + HRESULT result = speech_synthesizer_->GetStatus(&status, NULL); + if (result == S_OK) { + if (status.dwRunningState == 0 || // 0 == waiting to speak + status.dwRunningState == SPRS_IS_SPEAKING) { + return true; + } + } + } + return false; +} + +void TtsPlatformImplWin::GetVoices( + std::vector* out_voices) { + // TODO: get all voices, not just default voice. + // http://crbug.com/88059 + out_voices->push_back(VoiceData()); + VoiceData& voice = out_voices->back(); + voice.native = true; + voice.name = "native"; + voice.events.insert(TTS_EVENT_START); + voice.events.insert(TTS_EVENT_END); + voice.events.insert(TTS_EVENT_MARKER); + voice.events.insert(TTS_EVENT_WORD); + voice.events.insert(TTS_EVENT_SENTENCE); + voice.events.insert(TTS_EVENT_PAUSE); + voice.events.insert(TTS_EVENT_RESUME); +} + +void TtsPlatformImplWin::OnSpeechEvent() { + TtsController* controller = TtsController::GetInstance(); + SPEVENT event; + while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) { + if (event.ulStreamNum != stream_number_) + continue; + + switch (event.eEventId) { + case SPEI_START_INPUT_STREAM: + controller->OnTtsEvent( + utterance_id_, TTS_EVENT_START, 0, std::string()); + break; + case SPEI_END_INPUT_STREAM: + char_position_ = utterance_.size(); + controller->OnTtsEvent( + utterance_id_, TTS_EVENT_END, char_position_, std::string()); + break; + case SPEI_TTS_BOOKMARK: + controller->OnTtsEvent( + utterance_id_, TTS_EVENT_MARKER, char_position_, std::string()); + break; + case SPEI_WORD_BOUNDARY: + char_position_ = static_cast(event.lParam) - prefix_len_; + controller->OnTtsEvent( + utterance_id_, TTS_EVENT_WORD, char_position_, + std::string()); + break; + case SPEI_SENTENCE_BOUNDARY: + char_position_ = static_cast(event.lParam) - prefix_len_; + controller->OnTtsEvent( + utterance_id_, TTS_EVENT_SENTENCE, char_position_, + std::string()); + break; + } + } +} + +TtsPlatformImplWin::TtsPlatformImplWin() + : utterance_id_(0), + prefix_len_(0), + stream_number_(0), + char_position_(0), + paused_(false) { + speech_synthesizer_.CreateInstance(CLSID_SpVoice); + if (speech_synthesizer_.get()) { + ULONGLONG event_mask = + SPFEI(SPEI_START_INPUT_STREAM) | + SPFEI(SPEI_TTS_BOOKMARK) | + SPFEI(SPEI_WORD_BOUNDARY) | + SPFEI(SPEI_SENTENCE_BOUNDARY) | + SPFEI(SPEI_END_INPUT_STREAM); + speech_synthesizer_->SetInterest(event_mask, event_mask); + speech_synthesizer_->SetNotifyCallbackFunction( + TtsPlatformImplWin::SpeechEventCallback, 0, 0); + } +} + +// static +TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() { + return Singleton >::get(); +} + +// static +void TtsPlatformImplWin::SpeechEventCallback( + WPARAM w_param, LPARAM l_param) { + GetInstance()->OnSpeechEvent(); +} \ No newline at end of file diff --git a/chromium_src/chrome/common/tts_messages.h b/chromium_src/chrome/common/tts_messages.h new file mode 100644 index 0000000000..2132d80a07 --- /dev/null +++ b/chromium_src/chrome/common/tts_messages.h @@ -0,0 +1,69 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Multiply-included message file, hence no include guard. + +#include + +#include "chrome/common/tts_utterance_request.h" +#include "ipc/ipc_message_macros.h" +#include "ipc/ipc_param_traits.h" + +#define IPC_MESSAGE_START TtsMsgStart + +IPC_STRUCT_TRAITS_BEGIN(TtsUtteranceRequest) +IPC_STRUCT_TRAITS_MEMBER(id) +IPC_STRUCT_TRAITS_MEMBER(text) +IPC_STRUCT_TRAITS_MEMBER(lang) +IPC_STRUCT_TRAITS_MEMBER(voice) +IPC_STRUCT_TRAITS_MEMBER(volume) +IPC_STRUCT_TRAITS_MEMBER(rate) +IPC_STRUCT_TRAITS_MEMBER(pitch) +IPC_STRUCT_TRAITS_END() + +IPC_STRUCT_TRAITS_BEGIN(TtsVoice) +IPC_STRUCT_TRAITS_MEMBER(voice_uri) +IPC_STRUCT_TRAITS_MEMBER(name) +IPC_STRUCT_TRAITS_MEMBER(lang) +IPC_STRUCT_TRAITS_MEMBER(local_service) +IPC_STRUCT_TRAITS_MEMBER(is_default) +IPC_STRUCT_TRAITS_END() + +// Renderer -> Browser messages. + +IPC_MESSAGE_CONTROL0(TtsHostMsg_InitializeVoiceList) +IPC_MESSAGE_CONTROL1(TtsHostMsg_Speak, + TtsUtteranceRequest) +IPC_MESSAGE_CONTROL0(TtsHostMsg_Pause) +IPC_MESSAGE_CONTROL0(TtsHostMsg_Resume) +IPC_MESSAGE_CONTROL0(TtsHostMsg_Cancel) + +// Browser -> Renderer messages. + +IPC_MESSAGE_CONTROL1(TtsMsg_SetVoiceList, + std::vector) +IPC_MESSAGE_CONTROL1(TtsMsg_DidStartSpeaking, + int /* utterance id */) +IPC_MESSAGE_CONTROL1(TtsMsg_DidFinishSpeaking, + int /* utterance id */) +IPC_MESSAGE_CONTROL1(TtsMsg_DidPauseSpeaking, + int /* utterance id */) +IPC_MESSAGE_CONTROL1(TtsMsg_DidResumeSpeaking, + int /* utterance id */) +IPC_MESSAGE_CONTROL2(TtsMsg_WordBoundary, + int /* utterance id */, + int /* char index */) +IPC_MESSAGE_CONTROL2(TtsMsg_SentenceBoundary, + int /* utterance id */, + int /* char index */) +IPC_MESSAGE_CONTROL2(TtsMsg_MarkerEvent, + int /* utterance id */, + int /* char index */) +IPC_MESSAGE_CONTROL1(TtsMsg_WasInterrupted, + int /* utterance id */) +IPC_MESSAGE_CONTROL1(TtsMsg_WasCancelled, + int /* utterance id */) +IPC_MESSAGE_CONTROL2(TtsMsg_SpeakingErrorOccurred, + int /* utterance id */, + std::string /* error message */) \ No newline at end of file diff --git a/chromium_src/chrome/common/tts_utterance_request.cc b/chromium_src/chrome/common/tts_utterance_request.cc new file mode 100644 index 0000000000..a2e3e7fcea --- /dev/null +++ b/chromium_src/chrome/common/tts_utterance_request.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/common/tts_utterance_request.h" + +TtsUtteranceRequest::TtsUtteranceRequest() + : id(0), + volume(1.0), + rate(1.0), + pitch(1.0) { +} + +TtsUtteranceRequest::~TtsUtteranceRequest() { +} + +TtsVoice::TtsVoice() + : local_service(true), + is_default(false) { +} + +TtsVoice::~TtsVoice() { +} + +TtsUtteranceResponse::TtsUtteranceResponse() + : id(0) { +} + +TtsUtteranceResponse::~TtsUtteranceResponse() { +} \ No newline at end of file diff --git a/chromium_src/chrome/common/tts_utterance_request.h b/chromium_src/chrome/common/tts_utterance_request.h new file mode 100644 index 0000000000..e0b7adfa4a --- /dev/null +++ b/chromium_src/chrome/common/tts_utterance_request.h @@ -0,0 +1,44 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_COMMON_TTS_UTTERANCE_REQUEST_H_ +#define CHROME_COMMON_TTS_UTTERANCE_REQUEST_H_ + +#include + +#include "base/basictypes.h" +#include "base/strings/string16.h" + +struct TtsUtteranceRequest { + TtsUtteranceRequest(); + ~TtsUtteranceRequest(); + + int id; + std::string text; + std::string lang; + std::string voice; + float volume; + float rate; + float pitch; +}; + +struct TtsVoice { + TtsVoice(); + ~TtsVoice(); + + std::string voice_uri; + std::string name; + std::string lang; + bool local_service; + bool is_default; +}; + +struct TtsUtteranceResponse { + TtsUtteranceResponse(); + ~TtsUtteranceResponse(); + + int id; +}; + +#endif // CHROME_COMMON_TTS_UTTERANCE_REQUEST_H_ \ No newline at end of file diff --git a/chromium_src/chrome/renderer/tts_dispatcher.cc b/chromium_src/chrome/renderer/tts_dispatcher.cc new file mode 100644 index 0000000000..91b67ba167 --- /dev/null +++ b/chromium_src/chrome/renderer/tts_dispatcher.cc @@ -0,0 +1,200 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "chrome/renderer/tts_dispatcher.h" + +#include "base/basictypes.h" +#include "base/strings/utf_string_conversions.h" +#include "chrome/common/tts_messages.h" +#include "chrome/common/tts_utterance_request.h" +#include "content/public/renderer/render_thread.h" +#include "third_party/WebKit/public/platform/WebCString.h" +#include "third_party/WebKit/public/platform/WebSpeechSynthesisUtterance.h" +#include "third_party/WebKit/public/platform/WebSpeechSynthesisVoice.h" +#include "third_party/WebKit/public/platform/WebString.h" +#include "third_party/WebKit/public/platform/WebVector.h" + +using content::RenderThread; +using blink::WebSpeechSynthesizerClient; +using blink::WebSpeechSynthesisUtterance; +using blink::WebSpeechSynthesisVoice; +using blink::WebString; +using blink::WebVector; + +int TtsDispatcher::next_utterance_id_ = 1; + +TtsDispatcher::TtsDispatcher(WebSpeechSynthesizerClient* client) + : synthesizer_client_(client) { + RenderThread::Get()->AddObserver(this); +} + +TtsDispatcher::~TtsDispatcher() { + RenderThread::Get()->RemoveObserver(this); +} + +bool TtsDispatcher::OnControlMessageReceived(const IPC::Message& message) { + IPC_BEGIN_MESSAGE_MAP(TtsDispatcher, message) + IPC_MESSAGE_HANDLER(TtsMsg_SetVoiceList, OnSetVoiceList) + IPC_MESSAGE_HANDLER(TtsMsg_DidStartSpeaking, OnDidStartSpeaking) + IPC_MESSAGE_HANDLER(TtsMsg_DidFinishSpeaking, OnDidFinishSpeaking) + IPC_MESSAGE_HANDLER(TtsMsg_DidPauseSpeaking, OnDidPauseSpeaking) + IPC_MESSAGE_HANDLER(TtsMsg_DidResumeSpeaking, OnDidResumeSpeaking) + IPC_MESSAGE_HANDLER(TtsMsg_WordBoundary, OnWordBoundary) + IPC_MESSAGE_HANDLER(TtsMsg_SentenceBoundary, OnSentenceBoundary) + IPC_MESSAGE_HANDLER(TtsMsg_MarkerEvent, OnMarkerEvent) + IPC_MESSAGE_HANDLER(TtsMsg_WasInterrupted, OnWasInterrupted) + IPC_MESSAGE_HANDLER(TtsMsg_WasCancelled, OnWasCancelled) + IPC_MESSAGE_HANDLER(TtsMsg_SpeakingErrorOccurred, OnSpeakingErrorOccurred) + IPC_END_MESSAGE_MAP() + + // Always return false because there may be multiple TtsDispatchers + // and we want them all to have a chance to handle this message. + return false; +} + +void TtsDispatcher::updateVoiceList() { + RenderThread::Get()->Send(new TtsHostMsg_InitializeVoiceList()); +} + +void TtsDispatcher::speak(const WebSpeechSynthesisUtterance& web_utterance) { + int id = next_utterance_id_++; + + utterance_id_map_[id] = web_utterance; + + TtsUtteranceRequest utterance; + utterance.id = id; + utterance.text = web_utterance.text().utf8(); + utterance.lang = web_utterance.lang().utf8(); + utterance.voice = web_utterance.voice().utf8(); + utterance.volume = web_utterance.volume(); + utterance.rate = web_utterance.rate(); + utterance.pitch = web_utterance.pitch(); + RenderThread::Get()->Send(new TtsHostMsg_Speak(utterance)); +} + +void TtsDispatcher::pause() { + RenderThread::Get()->Send(new TtsHostMsg_Pause()); +} + +void TtsDispatcher::resume() { + RenderThread::Get()->Send(new TtsHostMsg_Resume()); +} + +void TtsDispatcher::cancel() { + RenderThread::Get()->Send(new TtsHostMsg_Cancel()); +} + +WebSpeechSynthesisUtterance TtsDispatcher::FindUtterance(int utterance_id) { + base::hash_map::const_iterator iter = + utterance_id_map_.find(utterance_id); + if (iter == utterance_id_map_.end()) + return WebSpeechSynthesisUtterance(); + return iter->second; +} + +void TtsDispatcher::OnSetVoiceList(const std::vector& voices) { + WebVector out_voices(voices.size()); + for (size_t i = 0; i < voices.size(); ++i) { + out_voices[i] = WebSpeechSynthesisVoice(); + out_voices[i].setVoiceURI(WebString::fromUTF8(voices[i].voice_uri)); + out_voices[i].setName(WebString::fromUTF8(voices[i].name)); + out_voices[i].setLanguage(WebString::fromUTF8(voices[i].lang)); + out_voices[i].setIsLocalService(voices[i].local_service); + out_voices[i].setIsDefault(voices[i].is_default); + } + synthesizer_client_->setVoiceList(out_voices); +} + +void TtsDispatcher::OnDidStartSpeaking(int utterance_id) { + if (utterance_id_map_.find(utterance_id) == utterance_id_map_.end()) + return; + + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + synthesizer_client_->didStartSpeaking(utterance); +} + +void TtsDispatcher::OnDidFinishSpeaking(int utterance_id) { + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + synthesizer_client_->didFinishSpeaking(utterance); + utterance_id_map_.erase(utterance_id); +} + +void TtsDispatcher::OnDidPauseSpeaking(int utterance_id) { + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + synthesizer_client_->didPauseSpeaking(utterance); +} + +void TtsDispatcher::OnDidResumeSpeaking(int utterance_id) { + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + synthesizer_client_->didResumeSpeaking(utterance); +} + +void TtsDispatcher::OnWordBoundary(int utterance_id, int char_index) { + CHECK(char_index >= 0); + + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + synthesizer_client_->wordBoundaryEventOccurred( + utterance, static_cast(char_index)); +} + +void TtsDispatcher::OnSentenceBoundary(int utterance_id, int char_index) { + CHECK(char_index >= 0); + + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + synthesizer_client_->sentenceBoundaryEventOccurred( + utterance, static_cast(char_index)); +} + +void TtsDispatcher::OnMarkerEvent(int utterance_id, int char_index) { + // Not supported yet. +} + +void TtsDispatcher::OnWasInterrupted(int utterance_id) { + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + // The web speech API doesn't support "interrupted". + synthesizer_client_->didFinishSpeaking(utterance); + utterance_id_map_.erase(utterance_id); +} + +void TtsDispatcher::OnWasCancelled(int utterance_id) { + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + // The web speech API doesn't support "cancelled". + synthesizer_client_->didFinishSpeaking(utterance); + utterance_id_map_.erase(utterance_id); +} + +void TtsDispatcher::OnSpeakingErrorOccurred(int utterance_id, + const std::string& error_message) { + WebSpeechSynthesisUtterance utterance = FindUtterance(utterance_id); + if (utterance.isNull()) + return; + + // The web speech API doesn't support an error message. + synthesizer_client_->speakingErrorOccurred(utterance); + utterance_id_map_.erase(utterance_id); +} \ No newline at end of file diff --git a/chromium_src/chrome/renderer/tts_dispatcher.h b/chromium_src/chrome/renderer/tts_dispatcher.h new file mode 100644 index 0000000000..0b9bb1af00 --- /dev/null +++ b/chromium_src/chrome/renderer/tts_dispatcher.h @@ -0,0 +1,78 @@ +// Copyright (c) 2013 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef CHROME_RENDERER_TTS_DISPATCHER_H_ +#define CHROME_RENDERER_TTS_DISPATCHER_H_ + +#include + +#include "base/basictypes.h" +#include "base/compiler_specific.h" +#include "base/containers/hash_tables.h" +#include "content/public/renderer/render_process_observer.h" +#include "third_party/WebKit/public/platform/WebSpeechSynthesizer.h" +#include "third_party/WebKit/public/platform/WebSpeechSynthesizerClient.h" + +namespace IPC { +class Message; +} + +struct TtsVoice; + +// TtsDispatcher is a delegate for methods used by Blink for speech synthesis +// APIs. It's the complement of TtsDispatcherHost (owned by RenderViewHost). +// Each TtsDispatcher is owned by the WebSpeechSynthesizerClient in Blink; +// it registers itself to listen to IPC upon construction and unregisters +// itself when deleted. There can be multiple TtsDispatchers alive at once, +// so each one routes IPC messages to its WebSpeechSynthesizerClient only if +// the utterance id (which is globally unique) matches. +class TtsDispatcher + : public blink::WebSpeechSynthesizer, + public content::RenderProcessObserver { + public: + explicit TtsDispatcher(blink::WebSpeechSynthesizerClient* client); + + private: + virtual ~TtsDispatcher(); + + // RenderProcessObserver override. + virtual bool OnControlMessageReceived(const IPC::Message& message) OVERRIDE; + + // blink::WebSpeechSynthesizer implementation. + virtual void updateVoiceList() OVERRIDE; + virtual void speak(const blink::WebSpeechSynthesisUtterance& utterance) + OVERRIDE; + virtual void pause() OVERRIDE; + virtual void resume() OVERRIDE; + virtual void cancel() OVERRIDE; + + blink::WebSpeechSynthesisUtterance FindUtterance(int utterance_id); + + void OnSetVoiceList(const std::vector& voices); + void OnDidStartSpeaking(int utterance_id); + void OnDidFinishSpeaking(int utterance_id); + void OnDidPauseSpeaking(int utterance_id); + void OnDidResumeSpeaking(int utterance_id); + void OnWordBoundary(int utterance_id, int char_index); + void OnSentenceBoundary(int utterance_id, int char_index); + void OnMarkerEvent(int utterance_id, int char_index); + void OnWasInterrupted(int utterance_id); + void OnWasCancelled(int utterance_id); + void OnSpeakingErrorOccurred(int utterance_id, + const std::string& error_message); + + // The WebKit client class that we use to send events back to the JS world. + // Weak reference, this will be valid as long as this object exists. + blink::WebSpeechSynthesizerClient* synthesizer_client_; + + // Next utterance id, used to map response IPCs to utterance objects. + static int next_utterance_id_; + + // Map from id to utterance objects. + base::hash_map utterance_id_map_; + + DISALLOW_COPY_AND_ASSIGN(TtsDispatcher); +}; + +#endif // CHROME_RENDERER_TTS_DISPATCHER_H_ \ No newline at end of file