Finish the dictate tool. Integrated with the listening method

yorevs · Dec 18, 2024 · 577205a · 577205a
1 parent 7e15c22
commit 577205a
Show file tree

Hide file tree

Showing 5 changed files with 52 additions and 65 deletions.
diff --git a/src/demo/components/recorder_demo.py b/src/demo/components/recorder_demo.py
@@ -5,7 +5,6 @@
 from clitt.core.term.cursor import cursor
 from clitt.core.tui.line_input.line_input import line_input
 
-from askai.core.askai_messages import msg
 from askai.core.component.recorder import recorder
 from utils import init_context
 
@@ -27,7 +26,6 @@
         cursor.write()
         match opt:
             case "1":
-                cursor.write(msg.listening())
                 audio_path, stt_text = recorder.listen()
                 cursor.writeln(f"Audio path: {audio_path}")
                 cursor.writeln(f"Transcribed text: {stt_text}")

diff --git a/src/main/askai/core/askai_cli.py b/src/main/askai/core/askai_cli.py
@@ -151,8 +151,7 @@ def _cb_mic_listening_event(self, ev: Event) -> None:
         """Callback to handle microphone listening events.
         :param ev: The event object representing the microphone listening event.
         """
-        if ev.args.listening:
-            self._reply(AIReply.info(msg.listening()))
+        pass  # CLI don't really care about this event
 
     def _cb_device_changed_event(self, ev: Event) -> None:
         """Callback to handle audio input device change events.

diff --git a/src/main/askai/core/askai_messages.py b/src/main/askai/core/askai_messages.py
@@ -81,13 +81,13 @@ def welcome_back(self) -> str:
         return "How may I further assist you ?"
 
     def listening(self) -> str:
-        return "I'm listening…"
+        return "Listening…"
 
     def dictating(self) -> str:
         return "Dictating…"
 
     def transcribing(self) -> str:
-        return "I'm processing your voice…"
+        return "Processing your voice…"
 
     def goodbye(self) -> str:
         return "Goodbye, have a nice day !"

diff --git a/src/main/askai/core/component/recorder.py b/src/main/askai/core/component/recorder.py
@@ -96,6 +96,16 @@ def __device_watcher() -> None:
                         break
             recorder.devices = new_list
 
+    @staticmethod
+    def countdown(sec: int, ev: threading.Event, counter_msg: str | None) -> None:
+        i = sec
+        if counter_msg:
+            events.reply.emit(reply=AIReply.mute(counter_msg + f" {i}"))
+        while not ev.is_set() and (i := (i - 1)) >= 0:
+            pause.milliseconds(990)
+            if counter_msg:
+                events.reply.emit(reply=AIReply.mute(counter_msg + f" {i}"), erase_last=True)
+
     def __init__(self):
         self._rec: Recognizer = Recognizer()
         self._devices: list[InputDevice] = []
@@ -167,40 +177,59 @@ def listen(
         self,
         recognition_api: RecognitionApi = RecognitionApi.GOOGLE,
         language: Language = Language.EN_US,
+        audio_path: Path | None = None,
+        raise_on_timeout: bool = True,
+        raise_on_intelligible: bool = True,
+        counter_msg: str = msg.listening(),
     ) -> tuple[Path, Optional[str]]:
         """Listen to the microphone, save the recorded audio as a WAV file, and transcribe the speech.
         :param recognition_api: The API to use for recognizing the speech. Defaults to GOOGLE.
         :param language: The spoken language. Defaults to EN_US.
+        :param audio_path: The audio file destination, generated by the listening process.
+        :param raise_on_timeout: If True, raises an error when no speech is detected within the timeout period.
+        :param raise_on_intelligible: If True, raises an error when the speech is unintelligible.
+        :param counter_msg: The message to display during the listening countdown.
         :return: A tuple containing the path to the saved audio file and the transcribed text.
                  If transcription fails, the second element of the tuple will be None.
         """
+        stop_event: threading.Event = threading.Event()
+        timeout: float = seconds(configs.recorder_silence_timeout_millis)
+        limit: float = seconds(configs.recorder_phrase_limit_millis)
+
         with Microphone(device_index=self._device_index) as mic_source:
             try:
-                audio_path = Path(f"{REC_DIR}/askai-stt-{now_ms()}.wav")
+                stop_event.clear()
+                counter_thread = threading.Thread(target=self.countdown, args=(limit, stop_event, counter_msg))
+                audio_path = audio_path or Path(f"{REC_DIR}/askai-stt-{now_ms()}.wav")
                 self._detect_noise()
+                counter_thread.start()
                 events.listening.emit()
-                audio: AudioData = self._rec.listen(
-                    mic_source,
-                    phrase_time_limit=seconds(configs.recorder_phrase_limit_millis),
-                    timeout=seconds(configs.recorder_silence_timeout_millis),
-                )
+                audio: AudioData = self._rec.listen(mic_source, phrase_time_limit=limit, timeout=timeout)
+                events.listening.emit(listening=False)
+                stop_event.set()
+                counter_thread.join()
+                events.reply.emit(reply=AIReply.mute(f"{counter_msg}  "), erase_last=True)
                 stt_text = self._write_audio_file(audio, audio_path, language, recognition_api)
             except WaitTimeoutError as err:
-                err_msg: str = msg.timeout(f"waiting for a speech input => '{err}'")
                 log.warning("Timed out while waiting for a speech input!")
-                events.reply.emit(reply=AIReply.error(message=err_msg), erase_last=True)
+                if raise_on_timeout:
+                    err_msg: str = msg.timeout(f"waiting for a speech input => '{err}'")
+                    events.reply.emit(reply=AIReply.error(message=err_msg), erase_last=True)
                 stt_text = None
             except UnknownValueError as err:
-                err_msg: str = msg.intelligible(err)
                 log.warning("Speech was not intelligible!")
-                events.reply.emit(reply=AIReply.error(message=err_msg), erase_last=True)
+                if raise_on_intelligible:
+                    err_msg: str = msg.intelligible(err)
+                    events.reply.emit(reply=AIReply.error(message=err_msg), erase_last=True)
                 stt_text = None
             except AttributeError as err:
                 raise InvalidInputDevice(str(err)) from err
             except RequestError as err:
                 raise InvalidRecognitionApiError(str(err)) from err
             finally:
-                events.listening.emit(listening=False)
+                if not stop_event.is_set():
+                    stop_event.set()
+                    counter_thread.join()
 
         return audio_path, stt_text
 
@@ -209,80 +238,43 @@ def dictate(
         recognition_api: RecognitionApi = RecognitionApi.GOOGLE,
         language: Language = Language.EN_US,
     ) -> Optional[str]:
-        """Listen to the microphone, save the recorded audio as a WAV file, and transcribe the speech.
+        """Captures dictated speech, processes it into text, and returns the transcribed text.
         :param recognition_api: The API to use for recognizing the speech. Defaults to GOOGLE.
-        :param language: The spoken language. Defaults to EN_US.
+        :param language: The spoken language. Defaults to en_US.
         :return: A string containing the dictated text. If transcription fails, None will be returned.
         """
-        phrase: str = ""
         dictated_text: str = ""
-        limit: float = 10.0
-        noise_limit: float = 0.2
-        stop_event: threading.Event = threading.Event()
         audio_path: Path = Path(f"{REC_DIR}/askai-dictate-{now_ms()}.wav")
 
-        def _countdown_(sec: int):
-            i = sec
-            sysout(msg.listening() + " ", end="")
-            sysout(f"{i}", end="")
-            while not stop_event.is_set() and (i := (i - 1)) >= 0:
-                pause.seconds(1)
-                sysout(f"%CUB({len(str(i + 1))})%{i}%EL0%", end="")
-            if not stop_event.is_set():
-                sysout(f"%CUB({len(str(i + 1))})%%EL0%", end="")
-
-        events.listening.emit()
         while True:
-            with Microphone(device_index=self._device_index) as mic_source:
-                try:
-                    stop_event.clear()
-                    new_thread = threading.Thread(target=_countdown_, args=(limit,))
-                    sysout(("…" if dictated_text else "") + phrase)
-                    self._rec.adjust_for_ambient_noise(mic_source, duration=noise_limit)
-                    new_thread.start()
-                    audio: AudioData = self._rec.listen(mic_source, phrase_time_limit=limit)
-                    sysout(f"%CUB({len(str(limit))})%%EL0%   ")
-                    phrase = self._write_audio_file(audio, audio_path, language, recognition_api, True)
-                except (WaitTimeoutError, UnknownValueError):
-                    phrase = ""
-                except AttributeError as err:
-                    raise InvalidInputDevice(str(err)) from err
-                except RequestError as err:
-                    raise InvalidRecognitionApiError(str(err)) from err
-                finally:
-                    stop_event.set()
-                    if not phrase or phrase in ["quit", "exit"]:
-                        break
-                    else:
-                        dictated_text += (". " if dictated_text else "") + phrase.capitalize()
-
-        events.listening.emit(listening=False)
+            _, phrase = self.listen(recognition_api, language, audio_path, False, False, msg.dictating())
+            if not phrase or phrase in [msg.t("quit"), msg.t("exit"), msg.t("bye")]:
+                break
+            else:
+                sysout(f" {('…' if dictated_text else '') + phrase}  ")
+                dictated_text += (". " if dictated_text else "") + phrase.capitalize()
 
-        return ensure_endswith(dictated_text, "." + os.linesep)
+        return ensure_endswith(dictated_text, "." + os.linesep) if dictated_text else dictated_text
 
     def _write_audio_file(
         self,
         audio: AudioData,
         audio_path: AnyPath,
         language: Language,
         recognition_api: RecognitionApi,
-        mute: bool = False,
     ) -> Optional[str]:
         """Write the provided audio data to disk as a file and transcribe the contents into text using the specified
         recognition API.
         :param audio: The audio data to be saved.
         :param audio_path: The path where the audio file will be saved.
         :param language: The language of the spoken content in the audio.
         :param recognition_api: The API used for speech recognition.
-        :param mute: Weather or not to emit a 'transcribing' reply; Defaults to False.
         :return: The transcribed text from the audio file. If transcription fails, returns None.
         """
         with open(str(audio_path), "wb") as f_rec:
             f_rec.write(audio.get_wav_data())
             log.debug("Voice recorded and saved as %s", audio_path)
             if api := getattr(self._rec, recognition_api.value):
-                if not mute:
-                    events.reply.emit(reply=AIReply.debug(message=msg.transcribing()), erase_last=True)
                 log.debug("Recognizing voice using %s", recognition_api)
                 assert isinstance(api, Callable)
                 return api(audio, language=language.language)

diff --git a/src/main/askai/tui/askai_app.py b/src/main/askai/tui/askai_app.py
@@ -390,8 +390,6 @@ def _cb_mic_listening_event(self, ev: Event) -> None:
         :param ev: The event object representing the microphone listening event.
         """
         self.header.notifications.listening = ev.args.listening
-        if ev.args.listening:
-            self._reply(AIReply.info(msg.listening()))
 
     def _cb_device_changed_event(self, ev: Event) -> None:
         """Callback to handle audio input device change events.