From a0617660f5d7bfbe1f5a47eb017d5e07b60866bb Mon Sep 17 00:00:00 2001 From: Adam Reed Date: Mon, 28 Mar 2016 18:44:27 +0000 Subject: [PATCH] Add IBM Watson as a STT option --- client/stt.py | 145 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) diff --git a/client/stt.py b/client/stt.py index a48696099..eae42a372 100644 --- a/client/stt.py +++ b/client/stt.py @@ -427,6 +427,151 @@ def is_available(cls): return diagnose.check_network_connection() +class WatsonSTT(AbstractSTTEngine): + """ + Speech-To-Text implementation which relies on the IBM Watson Speech-To-Text + API. This requires an IBM Bluemix account, but the first 1000 minutes of + transcribing per month are free. + + To obtain a login: + 1. Register for IBM Bluemix here: + https://console.ng.bluemix.net/registration/ + 2. Once you've logged in, click the "Use Services & APIs" link on the + dashboard + 3. Click the "Speech To Text" icon + 4. In the form on the right, leave all options as defaults and click Create + 5. You'll now have a new service listed on your dashboard. If you click + that service there will be a navigation option for "Service Credentials" + in the left hand nav. Find your username and password there. + + Excerpt from sample profile.yml: + + ... + timezone: US/Pacific + stt_engine: watson + watson: + username: $YOUR_USERNAME_HERE + password: $YOUR_PASSWORD_HERE + + """ + + SLUG = 'watson' + + def __init__(self, username=None, password=None, language='en-us'): + # FIXME: get init args from config + """ + Arguments: + username - the watson api username credential + password - the watson api password credential + """ + self._logger = logging.getLogger(__name__) + self._username = None + self._password = None + self._http = requests.Session() + self.username = username + self.password = password + + @property + def request_url(self): + return self._request_url + + @property + def username(self): + return self._username + + @username.setter + def username(self, value): + self._username = value + + @property + def password(self): + return self._password + + @password.setter + def password(self, value): + self._password = value + + @classmethod + def get_config(cls): + # FIXME: Replace this as soon as we have a config module + config = {} + # HMM dir + # Try to get hmm_dir from config + profile_path = jasperpath.config('profile.yml') + if os.path.exists(profile_path): + with open(profile_path, 'r') as f: + profile = yaml.safe_load(f) + if 'watson' in profile: + if 'username' in profile['watson']: + config['username'] = profile['watson']['username'] + if 'password' in profile['watson']: + config['password'] = profile['watson']['password'] + return config + + def transcribe(self, fp): + """ + Performs STT via the Watson Speech-to-Text API, transcribing an audio + file and returning an English string. + + Arguments: + fp -- the path to the .wav file to be transcribed + """ + + if not self.username: + self._logger.critical('Username missing, transcription request ' + + 'aborted.') + return [] + elif not self.password: + self._logger.critical('Password missing, transcription ' + + 'request aborted.') + return [] + + wav = wave.open(fp, 'rb') + frame_rate = wav.getframerate() + wav.close() + data = fp.read() + + headers = {'content-type': + 'audio/l16; rate=%s; channels=1' % frame_rate} + r = self._http.post( + 'https://stream.watsonplatform.net/' + + 'speech-to-text/api/v1/recognize?continuous=true', + data=data, headers=headers, auth=(self.username, self.password) + ) + try: + r.raise_for_status() + except requests.exceptions.HTTPError as e: + self._logger.critical('Request failed with http status %d', + r.status_code) + if r.status_code == requests.codes['forbidden']: + self._logger.warning('Status 403 is probably caused by ' + + 'invalid credentials.') + return [] + r.encoding = 'utf-8' + try: + response = r.json() + if len(response['results']) == 0: + # Response result is empty + raise ValueError('Nothing has been transcribed.') + results = [alt['transcript'] for alt + in response['results'][0]['alternatives']] + except ValueError as e: + self._logger.warning('Empty response: %s', e.args[0]) + results = [] + except (KeyError, IndexError): + self._logger.warning('Cannot parse response.', exc_info=True) + results = [] + else: + # Convert all results to uppercase + results = tuple(result.strip().upper() for result in results) + self._logger.info('Transcribed: %r', results) + return results + + @classmethod + def is_available(cls): + return diagnose.check_network_connection() + + class AttSTT(AbstractSTTEngine): """ Speech-To-Text implementation which relies on the AT&T Speech API.