From 50a2db610de5a3084f2aaa13f81271addf6f7b30 Mon Sep 17 00:00:00 2001 From: PAN Lei Date: Mon, 26 Sep 2022 18:06:34 +0200 Subject: [PATCH] can run speechtotext server --- requirements.txt | 1 + server.py | 32 +++++++++++++++++++++++ src/workers/SpeechToText.py | 51 +++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+) create mode 100644 server.py create mode 100644 src/workers/SpeechToText.py diff --git a/requirements.txt b/requirements.txt index f7517a9..c83e935 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ dataclasses_json==0.5.7 +flask-cors==3.0.10 firebase_admin==5.2.0 pandas~=1.3.5 playwright==1.25.0 diff --git a/server.py b/server.py new file mode 100644 index 0000000..2480b01 --- /dev/null +++ b/server.py @@ -0,0 +1,32 @@ +import os +import uuid + +from flask import Flask +from flask import jsonify +from flask import request +from flask_cors import CORS, cross_origin + +from src.workers.SpeechToText import SpeechToText + +app = Flask(__name__) +cors = CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' + + +@app.route('/', methods=['POST']) +@cross_origin() +def post(): + file_name = str(uuid.uuid4()) + ".wav" + with open(file_name, "wb") as vid: + vid.write(request.data) + speech_to_text = SpeechToText() + result = speech_to_text.to_text(file_name) + try: + os.remove(file_name) + except OSError: + print(OSError) + return jsonify(result) + + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8000, debug=True) diff --git a/src/workers/SpeechToText.py b/src/workers/SpeechToText.py new file mode 100644 index 0000000..9c8d717 --- /dev/null +++ b/src/workers/SpeechToText.py @@ -0,0 +1,51 @@ +import logging + +import speech_recognition as sr + +number_text_dict = {'eight': 8, + 'take': 8, + 'eighty': 8, + 'zero': 0, + 'one': 1, 'juan': 1, + 'wanna': 1, 'won': 1, 'to': 2, 'two': 2, + 'free': 3, + 'three': 3, + 'four': 4, + 'for': 4, + 'five': 5, + 'by': 5, + 'six': 6, + 'seven': 7, 'i': 9, 'nine': 9} + + +class SpeechToText: + + def __init__(self): + self.logger = logging.getLogger("SpeechToText") + + def to_text(self, file_name: str): + r = sr.Recognizer() + with sr.AudioFile(file_name) as source: + audio = r.record(source) # read the entire audio file + # recognize speech using Sphinx + try: + text = r.recognize_sphinx(audio) + number_list = text.split(" ")[-6:] + self.logger.info(str(number_list)) + number_to_type = [] + for number in number_list: + number_to_type.append(number_text_dict[number]) + print(number_to_type) + if len(number_to_type) == 6: + self.logger.info("App thinks you said " + text) + return number_to_type + except sr.UnknownValueError: + print("App could not understand audio") + return [] + except sr.RequestError as e: + print("App error; {0}".format(e)) + return [] + except Exception as error: + print(error) + self.logger.info("error on the file_name:" + file_name) + return []