appointment_tool/src/workers/SpeechToText.py

import logging

import speech_recognition as sr

number_text_dict = {'eight': 8,
                    'take': 8,
                    'eighty': 8,
                    'zero': 0,
                    'one': 1, 'juan': 1,
                    'wanna': 1, 'won': 1, 'to': 2, 'two': 2,
                    'free': 3,
                    'three': 3,
                    'four': 4,
                    'for': 4,
                    'five': 5,
                    'by': 5,
                    'six': 6,
                    'seven': 7, 'i': 9, 'nine': 9}


class SpeechToText:

    def __init__(self):
        self.logger = logging.getLogger("SpeechToText")

    def to_text(self, file_name: str):
        r = sr.Recognizer()
        with sr.AudioFile(file_name) as source:
            audio = r.record(source)  # read the entire audio file
        # recognize speech using Sphinx
        try:
            text = r.recognize_sphinx(audio)
            number_list = text.split(" ")[-6:]
            self.logger.info(str(number_list))
            number_to_type = []
            for number in number_list:
                number_to_type.append(number_text_dict[number])
            print(number_to_type)
            if len(number_to_type) == 6:
                self.logger.info("App thinks you said " + text)
                return number_to_type
        except sr.UnknownValueError:
            print("App could not understand audio")
            return []
        except sr.RequestError as e:
            print("App error; {0}".format(e))
            return []
        except Exception as error:
            print(error)
            self.logger.info("error on the file_name:" + file_name)
            return []