From c328ccfeac6f124906e2637a21ea18c2cb5b0e7d Mon Sep 17 00:00:00 2001 From: wassname Date: Sat, 29 Dec 2018 14:20:04 +0800 Subject: [PATCH] working and using google cloud speech --- .gitignore | 1 + BitLit_decoder.py | 31 +++----- BitLit_main.py | 178 ++++++++++++++++++++++++++++++---------------- README.md | 8 +-- logger.py | 37 ++++++++++ poem_generator.py | 27 ++++--- requirements.txt | 13 +++- snowboydecoder.py | 5 +- 8 files changed, 192 insertions(+), 108 deletions(-) create mode 100644 logger.py diff --git a/.gitignore b/.gitignore index 5615668..a0f52f7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /outputs/ /snowboy/ /secrets/ +.pytest_cache/ # Created by https://www.gitignore.io/api/c,c++,cmake,linux,macos,python # Edit at https://www.gitignore.io/?templates=c,c++,cmake,linux,macos,python diff --git a/BitLit_decoder.py b/BitLit_decoder.py index 10b2811..ae1d60e 100644 --- a/BitLit_decoder.py +++ b/BitLit_decoder.py @@ -10,10 +10,11 @@ import signal #### from gtts import gTTS ## Packages for Text to voice -import speech_recognition as sr ## Packages for voice recognizer -import tensorflow as tf -tf.enable_eager_execution() -from tensorflow.keras.layers import Embedding, GRU, Dense +import speech_recognition as sr ## Packages for voice recognizer +import BitLit_main +# import tensorflow as tf +# tf.enable_eager_execution() +# from tensorflow.keras.layers import Embedding, GRU, Dense import numpy as np import re from textblob import TextBlob @@ -21,11 +22,11 @@ import random import poem_generator ## POEM GENERATOR IMPORT from poem_generator import* import time -#### + + t0=time.time() ## Time counter interrupted = False - def signal_handler(signal, frame): global interrupted interrupted = True @@ -48,20 +49,10 @@ signal.signal(signal.SIGINT, signal_handler) detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5) print('Listening... Press Ctrl+C to exit') -# main loop -detector.start(detected_callback=snowboydecoder.run, - interrupt_check=interrupt_callback, - sleep_time=0.01) -##detector.start(detected_callback=snowboydecoder.play_audio_file, -## interrupt_check=interrupt_callback, -## sleep_time=0.00) -##detector.start(detected_callback= os.system('python BitLit_main.py'), -## interrupt_check=interrupt_callback, -## sleep_time=0.00) ##0.03 -#import BitLit_main -##detector.start(detected_callback=BitLit_main, -## interrupt_check=interrupt_callback, -## sleep_time=0.03) + +detector.start(detected_callback=BitLit_main.generate_poem, + interrupt_check=interrupt_callback, + sleep_time=0.03) detector.terminate() t1 =time.time() diff --git a/BitLit_main.py b/BitLit_main.py index d2614d8..f8134ba 100644 --- a/BitLit_main.py +++ b/BitLit_main.py @@ -1,76 +1,128 @@ -''' +""" Voice to text to poem to speech Credits: Michel, Lauren, Thomas -''' +""" -#https://pythonprogramminglanguage.com/text-to-speech/ +# https://pythonprogramminglanguage.com/text-to-speech/ ## cmd 1:::: sudo pip install gTTS ## cmd 2:::: sudo pip install pyttsx import sys -from gtts import gTTS ## Packages for Text to voice +from gtts import gTTS ## Packages for Text to voice import os -import speech_recognition as sr ## Packages for voice recognizer -import tensorflow as tf -tf.enable_eager_execution() -from tensorflow.keras.layers import Embedding, GRU, Dense + + import numpy as np import re from textblob import TextBlob import random -from poem_generator import* +import pyglet +import json import time -##### KNOWN PARAMETERS -####################################################### -##sys.path -##sys.path.append('/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python') -##sys.path.append('/Users/ShebMichel/Library/Python/2.7/lib/python/site-packages' -################################################################################ -############ AUDIO CONVERSION TO TEST -t0=time.time() -r = sr.Recognizer() -with sr.Microphone() as source: - tts = gTTS(text='HELLO! My Name is BIT-LIT. PLEASE SPEAK IN ABOUT 3 SECONDS.', lang='en') - tts.save("outputs/BitLit.mp3") - os.system("afplay outputs/BitLit.mp3") -# ###### - - print("SPEAK NOW-SPEAK NOW-SPEAK NOW:") - audio = r.listen(source) - tts = gTTS(text='THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM', lang='en') - tts.save("outputs/BitLit.mp3") - os.system("afplay outputs/BitLit.mp3") -try: - # for testing purposes, we're just using the default API key - # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")` - # instead of `r.recognize_google(audio) - AA0=r.recognize_google(audio) - USER_INPUT=AA0 - print("You said: " + r.recognize_google(audio)) -except sr.UnknownValueError: - print("Could not understand audio") -except sr.RequestError as e: - print("Could not request results; {0}".format(e)) -text_generated=poem(USER_INPUT) -#### END CODE -######################################################### -################# TEXT CONVERSION IN AUDIO -################# FEED POEM TO TRANSCRIBER -print('ML POEM is:', text_generated) -tts = gTTS(text=text_generated, lang='en') -tts.save("outputs/BitLit.mp3") -os.system("afplay outputs/BitLit.mp3") -######################################################### -#### -print("BIT-LIT ENDING STATEMENT:") -tts = gTTS(text='THANK YOU! CHECK ME OUT IN THE NEWS SOON.', lang='en') -tts.save("outputs/BitLit.mp3") -os.system("afplay outputs/BitLit.mp3") -###### -t1 =time.time() -total=t1-t0 -print 'Time spent is about:', np.round(total), 'seconds' +import datetime +import hashlib +import tempfile +from logger import logger -### USING JUPITER -# import IPython.display as ipd -# ipd.Audio(filename='path/to/file.mp3') -#tk.mainloop() +from snowboydecoder import play_audio_file + +import speech_recognition as sr ## Packages for voice recognizer +for index, name in enumerate(sr.Microphone.list_microphone_names()): + logger.debug("Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name)) + +from poem_generator import poem + + +# Load credentials +try: + GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read() +except: + print('you should place google cloud json credentials at "secrets/google_cloud_credentials.json", make sure you enable the speech recognition api') + GOOGLE_CLOUD_SPEECH_CREDENTIALS = None + + +def play_mp3(mp3_file): + """Play mp3 file with pyglet.""" + source = pyglet.media.load(filename=mp3_file, streaming=False) + source.play() + time.sleep(source.duration + 2) # must be a better way to wait untill the media has played + + +def cache_gtts(text, lang="en", cache_file=None): + """ + Cache calls to gtts. + + Saves each to a temporary file + """ + if not cache_file: + hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3' + cache_file = os.path.join(tempfile.gettempdir(), hash_filename) + if not os.path.isfile(cache_file): + tts = gTTS(text=text, lang="en") + tts.save(cache_file) + assert os.path.isfile(cache_file) + return cache_file + + +def generate_poem(): + + # https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst + # snowboy_configuration = (SNOWBOY_LOCATION, LIST_OF_HOT_WORD_FILES) + snowboy_configuration = ('./snowboy', ['./HiBitLit.pmdl', './snowboy/resources/alexa.umdl', './snowboy/resources/snowboy.umdl']) + + ############ AUDIO CONVERSION TO TEST + play_audio_file() + t0 = time.time() + r = sr.Recognizer() + with sr.Microphone() as source: + outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK. You have 20 seconds before the beep.", lang="en") + play_mp3(outfile1) + + print("SPEAK NOW-SPEAK:", source) + audio = r.listen(source, timeout=5, phrase_time_limit=20)#, snowboy_configuration=snowboy_configuration) + # speech_recognition.WaitTimeoutError + print('Recorded audio', audio) + + outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM", lang="en") + play_mp3(outfile2) + + t1 = time.time() + print('listen took', t1 - t0) + + try: + print("using google speech to text...") + USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS) + print("Google thinks you said: " + USER_INPUT) + except sr.UnknownValueError as e: + print("Could not understand audio. {}".format(e)) + except sr.RequestError as e: + print("Could not request results; {0}".format(e)) + # speech_recognition.WaitTimeoutError + + t1b = time.time() + print('transcribe took', t1b - t1) + + # Generate poem from user seed + text_generated = poem(USER_INPUT) + t2 = time.time() + print("ML POEM is:", text_generated, 'in', t2 - t1) + + # TEXT CONVERSION IN AUDIO + # FEED POEM TO TRANSCRIBER + tts = gTTS(text=text_generated, lang="en") + ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S') + poem_mp3 = "outputs/BitLit_{}.mp3".format(ts) + tts.save(poem_mp3) + play_mp3(poem_mp3) + + print("BIT-LIT ENDING STATEMENT:") + outfile = cache_gtts(text="THANK YOU! CHECK ME OUT IN THE NEWS SOON.", lang="en") + play_mp3(outfile) + + ###### + t3 = time.time() + print('Poem to speech took', t3 - t2) + print("Time spent is about:", np.round(t3 - t0), "seconds") + + +if __name__ == "__main__": + generate_poem() diff --git a/README.md b/README.md index ae25f9b..dc73874 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Run To run the program once installed ->> python BitLit_decoder1.py HiBitLit.pmdl +>> python BitLit_decoder.py HiBitLit.pmdl After sometimes you should say " HI BitLit" then wait till the poetbot speak to you..and enjoy the rest.. At the end you just need to say again " Hi BitLit" to a different vocal input for so that the poetbot can generate a new poem and the cycle can go days and weeks.. WARNING::: Might be slow depending on your computer capabilities.. @@ -31,7 +31,7 @@ API Keys: - `cp secrets.template.json to secrets.json` - get an API key for google cloud - make a project or use an existing project - - download a credentials json for that project - - enable the speech recognition api https://console.cloud.google.com/apis/api/speech.googleapis.com/overview - - put the credentials in `./secrets/google_cloud_credintials.json` + - download a credentials json for that project (using edit, create key) https://console.cloud.google.com/apis/api/speech.googleapis.com/credentials + - enable the speech recognition api for that project (make sure you've selected your project) https://console.cloud.google.com/apis/api/speech.googleapis.com/overview + - place the credientials json at this path `./secrets/google_cloud_credintials.json` - never commit this file! diff --git a/logger.py b/logger.py new file mode 100644 index 0000000..05f054f --- /dev/null +++ b/logger.py @@ -0,0 +1,37 @@ +""" +from https://gist.github.com/wassname/d17325f36c36fa663dd7de3c09a55e74 +Setup simple logging in python. This logs info message to stdout and debug messages to file. +Sure it's long but this is as simple as I could make it for this outcome. +Note: We must set the root logger at DEBUG level, since it must be higher than it's children to pass them on. +Then set filehandler at debug and stream handler at info. +""" +import logging +import sys +import os +import datetime +import tempfile + +# To use differen't log level for file and console +timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S') +filename = os.path.join(tempfile.gettempdir(), '/tmp/bitlit_log_{}.log'.format(timestamp)) +formatter = logging.Formatter('[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s') + +file_handler = logging.FileHandler(filename=filename) +file_handler.setLevel(logging.DEBUG) +file_handler.setFormatter(formatter) + +stream_handler = logging.StreamHandler(sys.stdout) +stream_handler.setLevel(logging.INFO) + +# The handlers have to be at a root level since they are the final output +logging.basicConfig( + level=logging.DEBUG, + format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s', + handlers=[ + file_handler, + stream_handler + ] +) + +logger = logging.getLogger('bitlit') +logger.info('Logging to STDOUT and {}'.format(filename)) diff --git a/poem_generator.py b/poem_generator.py index 6483c5f..15dc880 100644 --- a/poem_generator.py +++ b/poem_generator.py @@ -7,14 +7,16 @@ import sys from gtts import gTTS ## Packages for Text to voice import os import numpy as np -import speech_recognition as sr ## Packages for voice recognizer +import speech_recognition as sr ## Packages for voice recognizer + +os.environ['CUDA_VISIBLE_DEVICES']="" import tensorflow as tf tf.enable_eager_execution() from tensorflow.keras.layers import Embedding, GRU, Dense import re from textblob import TextBlob import random -from BitLit_param import* +from BitLit_param import * # Architechture of the GRU @@ -103,10 +105,9 @@ def poem(USER_INPUT): ### ML POEM PREDICTOR - ########################### - # USER INPUT a line # - ########################### - + ########################### + # USER INPUT a line # + ########################### USER_INPUT = USER_INPUT.lower() USER_INPUT = re.sub('[^a-z\n]', ' ', USER_INPUT) text_generated = USER_INPUT[::-1] @@ -114,11 +115,9 @@ def poem(USER_INPUT): - ###################### - # RHYMES GENERATION # - ###################### - - + ###################### + # RHYMES GENERATION # + ###################### temperature = 0.09 num_generate = 5 # number of characters to generate @@ -145,9 +144,9 @@ def poem(USER_INPUT): print('rhymes:', rhymes) - #################### - # POEM GENERATION # - #################### + #################### + # POEM GENERATION # + #################### temperature = 0.8 diff --git a/requirements.txt b/requirements.txt index c5c5fc1..08d635d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,11 @@ PyAudio==0.2.9 - - - +gTTS==2.0.1 +SpeechRecognition==3.8.1 +tensorflow==1.10.1 +# or tensorflow-gpu==1.10.1 +textblob==0.15.2 +# for playing mp3s +pyglet==1.2.4 +# using google cloud speech recognition +google-api-python-client==1.7.7 +oauth2client==4.1.3 diff --git a/snowboydecoder.py b/snowboydecoder.py index 8221692..9d8d3c8 100644 --- a/snowboydecoder.py +++ b/snowboydecoder.py @@ -19,7 +19,6 @@ TOP_DIR = os.path.dirname(os.path.abspath(__file__)) RESOURCE_FILE = os.path.join(TOP_DIR, "snowboy/resources/common.res") DETECT_DING = os.path.join(TOP_DIR, "snowboy/resources/ding.wav") DETECT_DONG = os.path.join(TOP_DIR, "snowboy/resources/dong.wav") -DETECT_PY = os.path.join(TOP_DIR, "BitLit_main.py") ## ADDED class RingBuffer(object): @@ -37,9 +36,7 @@ class RingBuffer(object): self._buf.clear() return tmp -### -def run(fname=DETECT_PY): - Py_wav=os.system('python BitLit_main.py') + ### def play_audio_file(fname=DETECT_DING): """Simple callback function to play a wave file. By default it plays