Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add IBM Watson as a STT option #477

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 145 additions & 0 deletions client/stt.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,151 @@ def is_available(cls):
return diagnose.check_network_connection()


class WatsonSTT(AbstractSTTEngine):
"""
Speech-To-Text implementation which relies on the IBM Watson Speech-To-Text
API. This requires an IBM Bluemix account, but the first 1000 minutes of
transcribing per month are free.

To obtain a login:
1. Register for IBM Bluemix here:
https://console.ng.bluemix.net/registration/
2. Once you've logged in, click the "Use Services & APIs" link on the
dashboard
3. Click the "Speech To Text" icon
4. In the form on the right, leave all options as defaults and click Create
5. You'll now have a new service listed on your dashboard. If you click
that service there will be a navigation option for "Service Credentials"
in the left hand nav. Find your username and password there.

Excerpt from sample profile.yml:

...
timezone: US/Pacific
stt_engine: watson
watson:
username: $YOUR_USERNAME_HERE
password: $YOUR_PASSWORD_HERE

"""

SLUG = 'watson'

def __init__(self, username=None, password=None, language='en-us'):
# FIXME: get init args from config
"""
Arguments:
username - the watson api username credential
password - the watson api password credential
"""
self._logger = logging.getLogger(__name__)
self._username = None
self._password = None
self._http = requests.Session()
self.username = username
self.password = password

@property
def request_url(self):
return self._request_url

@property
def username(self):
return self._username

@username.setter
def username(self, value):
self._username = value

@property
def password(self):
return self._password

@password.setter
def password(self, value):
self._password = value

@classmethod
def get_config(cls):
# FIXME: Replace this as soon as we have a config module
config = {}
# HMM dir
# Try to get hmm_dir from config
profile_path = jasperpath.config('profile.yml')
if os.path.exists(profile_path):
with open(profile_path, 'r') as f:
profile = yaml.safe_load(f)
if 'watson' in profile:
if 'username' in profile['watson']:
config['username'] = profile['watson']['username']
if 'password' in profile['watson']:
config['password'] = profile['watson']['password']
return config

def transcribe(self, fp):
"""
Performs STT via the Watson Speech-to-Text API, transcribing an audio
file and returning an English string.

Arguments:
fp -- the path to the .wav file to be transcribed
"""

if not self.username:
self._logger.critical('Username missing, transcription request ' +
'aborted.')
return []
elif not self.password:
self._logger.critical('Password missing, transcription ' +
'request aborted.')
return []

wav = wave.open(fp, 'rb')
frame_rate = wav.getframerate()
wav.close()
data = fp.read()

headers = {'content-type':
'audio/l16; rate=%s; channels=1' % frame_rate}
r = self._http.post(
'https://stream.watsonplatform.net/' +
'speech-to-text/api/v1/recognize?continuous=true',
data=data, headers=headers, auth=(self.username, self.password)
)
try:
r.raise_for_status()
except requests.exceptions.HTTPError as e:
self._logger.critical('Request failed with http status %d',
r.status_code)
if r.status_code == requests.codes['forbidden']:
self._logger.warning('Status 403 is probably caused by ' +
'invalid credentials.')
return []
r.encoding = 'utf-8'
try:
response = r.json()
if len(response['results']) == 0:
# Response result is empty
raise ValueError('Nothing has been transcribed.')
results = [alt['transcript'] for alt
in response['results'][0]['alternatives']]
except ValueError as e:
self._logger.warning('Empty response: %s', e.args[0])
results = []
except (KeyError, IndexError):
self._logger.warning('Cannot parse response.', exc_info=True)
results = []
else:
# Convert all results to uppercase
results = tuple(result.strip().upper() for result in results)
self._logger.info('Transcribed: %r', results)
return results

@classmethod
def is_available(cls):
return diagnose.check_network_connection()


class AttSTT(AbstractSTTEngine):
"""
Speech-To-Text implementation which relies on the AT&T Speech API.
Expand Down