working and using google cloud speech

This commit is contained in:
wassname
2018-12-29 14:20:04 +08:00
parent 76bee538ca
commit c328ccfeac
8 changed files with 192 additions and 108 deletions
+1
View File
@@ -2,6 +2,7 @@
/outputs/
/snowboy/
/secrets/
.pytest_cache/
# Created by https://www.gitignore.io/api/c,c++,cmake,linux,macos,python
# Edit at https://www.gitignore.io/?templates=c,c++,cmake,linux,macos,python
+11 -20
View File
@@ -10,10 +10,11 @@ import signal
####
from gtts import gTTS ## Packages for Text to voice
import speech_recognition as sr ## Packages for voice recognizer
import tensorflow as tf
tf.enable_eager_execution()
from tensorflow.keras.layers import Embedding, GRU, Dense
import speech_recognition as sr ## Packages for voice recognizer
import BitLit_main
# import tensorflow as tf
# tf.enable_eager_execution()
# from tensorflow.keras.layers import Embedding, GRU, Dense
import numpy as np
import re
from textblob import TextBlob
@@ -21,11 +22,11 @@ import random
import poem_generator ## POEM GENERATOR IMPORT
from poem_generator import*
import time
####
t0=time.time() ## Time counter
interrupted = False
def signal_handler(signal, frame):
global interrupted
interrupted = True
@@ -48,20 +49,10 @@ signal.signal(signal.SIGINT, signal_handler)
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
print('Listening... Press Ctrl+C to exit')
# main loop
detector.start(detected_callback=snowboydecoder.run,
interrupt_check=interrupt_callback,
sleep_time=0.01)
##detector.start(detected_callback=snowboydecoder.play_audio_file,
## interrupt_check=interrupt_callback,
## sleep_time=0.00)
##detector.start(detected_callback= os.system('python BitLit_main.py'),
## interrupt_check=interrupt_callback,
## sleep_time=0.00) ##0.03
#import BitLit_main
##detector.start(detected_callback=BitLit_main,
## interrupt_check=interrupt_callback,
## sleep_time=0.03)
detector.start(detected_callback=BitLit_main.generate_poem,
interrupt_check=interrupt_callback,
sleep_time=0.03)
detector.terminate()
t1 =time.time()
+115 -63
View File
@@ -1,76 +1,128 @@
'''
"""
Voice to text to poem to speech
Credits: Michel, Lauren, Thomas
'''
"""
#https://pythonprogramminglanguage.com/text-to-speech/
# https://pythonprogramminglanguage.com/text-to-speech/
## cmd 1:::: sudo pip install gTTS
## cmd 2:::: sudo pip install pyttsx
import sys
from gtts import gTTS ## Packages for Text to voice
from gtts import gTTS ## Packages for Text to voice
import os
import speech_recognition as sr ## Packages for voice recognizer
import tensorflow as tf
tf.enable_eager_execution()
from tensorflow.keras.layers import Embedding, GRU, Dense
import numpy as np
import re
from textblob import TextBlob
import random
from poem_generator import*
import pyglet
import json
import time
##### KNOWN PARAMETERS
#######################################################
##sys.path
##sys.path.append('/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python')
##sys.path.append('/Users/ShebMichel/Library/Python/2.7/lib/python/site-packages'
################################################################################
############ AUDIO CONVERSION TO TEST
t0=time.time()
r = sr.Recognizer()
with sr.Microphone() as source:
tts = gTTS(text='HELLO! My Name is BIT-LIT. PLEASE SPEAK IN ABOUT 3 SECONDS.', lang='en')
tts.save("outputs/BitLit.mp3")
os.system("afplay outputs/BitLit.mp3")
# ######
print("SPEAK NOW-SPEAK NOW-SPEAK NOW:")
audio = r.listen(source)
tts = gTTS(text='THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM', lang='en')
tts.save("outputs/BitLit.mp3")
os.system("afplay outputs/BitLit.mp3")
try:
# for testing purposes, we're just using the default API key
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
# instead of `r.recognize_google(audio)
AA0=r.recognize_google(audio)
USER_INPUT=AA0
print("You said: " + r.recognize_google(audio))
except sr.UnknownValueError:
print("Could not understand audio")
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
text_generated=poem(USER_INPUT)
#### END CODE
#########################################################
################# TEXT CONVERSION IN AUDIO
################# FEED POEM TO TRANSCRIBER
print('ML POEM is:', text_generated)
tts = gTTS(text=text_generated, lang='en')
tts.save("outputs/BitLit.mp3")
os.system("afplay outputs/BitLit.mp3")
#########################################################
####
print("BIT-LIT ENDING STATEMENT:")
tts = gTTS(text='THANK YOU! CHECK ME OUT IN THE NEWS SOON.', lang='en')
tts.save("outputs/BitLit.mp3")
os.system("afplay outputs/BitLit.mp3")
######
t1 =time.time()
total=t1-t0
print 'Time spent is about:', np.round(total), 'seconds'
import datetime
import hashlib
import tempfile
from logger import logger
### USING JUPITER
# import IPython.display as ipd
# ipd.Audio(filename='path/to/file.mp3')
#tk.mainloop()
from snowboydecoder import play_audio_file
import speech_recognition as sr ## Packages for voice recognizer
for index, name in enumerate(sr.Microphone.list_microphone_names()):
logger.debug("Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
from poem_generator import poem
# Load credentials
try:
GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read()
except:
print('you should place google cloud json credentials at "secrets/google_cloud_credentials.json", make sure you enable the speech recognition api')
GOOGLE_CLOUD_SPEECH_CREDENTIALS = None
def play_mp3(mp3_file):
"""Play mp3 file with pyglet."""
source = pyglet.media.load(filename=mp3_file, streaming=False)
source.play()
time.sleep(source.duration + 2) # must be a better way to wait untill the media has played
def cache_gtts(text, lang="en", cache_file=None):
"""
Cache calls to gtts.
Saves each to a temporary file
"""
if not cache_file:
hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3'
cache_file = os.path.join(tempfile.gettempdir(), hash_filename)
if not os.path.isfile(cache_file):
tts = gTTS(text=text, lang="en")
tts.save(cache_file)
assert os.path.isfile(cache_file)
return cache_file
def generate_poem():
# https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst
# snowboy_configuration = (SNOWBOY_LOCATION, LIST_OF_HOT_WORD_FILES)
snowboy_configuration = ('./snowboy', ['./HiBitLit.pmdl', './snowboy/resources/alexa.umdl', './snowboy/resources/snowboy.umdl'])
############ AUDIO CONVERSION TO TEST
play_audio_file()
t0 = time.time()
r = sr.Recognizer()
with sr.Microphone() as source:
outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK. You have 20 seconds before the beep.", lang="en")
play_mp3(outfile1)
print("SPEAK NOW-SPEAK:", source)
audio = r.listen(source, timeout=5, phrase_time_limit=20)#, snowboy_configuration=snowboy_configuration)
# speech_recognition.WaitTimeoutError
print('Recorded audio', audio)
outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM", lang="en")
play_mp3(outfile2)
t1 = time.time()
print('listen took', t1 - t0)
try:
print("using google speech to text...")
USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
print("Google thinks you said: " + USER_INPUT)
except sr.UnknownValueError as e:
print("Could not understand audio. {}".format(e))
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
# speech_recognition.WaitTimeoutError
t1b = time.time()
print('transcribe took', t1b - t1)
# Generate poem from user seed
text_generated = poem(USER_INPUT)
t2 = time.time()
print("ML POEM is:", text_generated, 'in', t2 - t1)
# TEXT CONVERSION IN AUDIO
# FEED POEM TO TRANSCRIBER
tts = gTTS(text=text_generated, lang="en")
ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
poem_mp3 = "outputs/BitLit_{}.mp3".format(ts)
tts.save(poem_mp3)
play_mp3(poem_mp3)
print("BIT-LIT ENDING STATEMENT:")
outfile = cache_gtts(text="THANK YOU! CHECK ME OUT IN THE NEWS SOON.", lang="en")
play_mp3(outfile)
######
t3 = time.time()
print('Poem to speech took', t3 - t2)
print("Time spent is about:", np.round(t3 - t0), "seconds")
if __name__ == "__main__":
generate_poem()
+4 -4
View File
@@ -3,7 +3,7 @@
# Run
To run the program once installed
>> python BitLit_decoder1.py HiBitLit.pmdl
>> python BitLit_decoder.py HiBitLit.pmdl
After sometimes you should say " HI BitLit" then wait till the poetbot speak to you..and enjoy the rest.. At the end you just need to say again " Hi BitLit" to a different vocal input for so that the poetbot can generate a new poem and the cycle can go days and weeks..
WARNING::: Might be slow depending on your computer capabilities..
@@ -31,7 +31,7 @@ API Keys:
- `cp secrets.template.json to secrets.json`
- get an API key for google cloud
- make a project or use an existing project
- download a credentials json for that project
- enable the speech recognition api https://console.cloud.google.com/apis/api/speech.googleapis.com/overview
- put the credentials in `./secrets/google_cloud_credintials.json`
- download a credentials json for that project (using edit, create key) https://console.cloud.google.com/apis/api/speech.googleapis.com/credentials
- enable the speech recognition api for that project (make sure you've selected your project) https://console.cloud.google.com/apis/api/speech.googleapis.com/overview
- place the credientials json at this path `./secrets/google_cloud_credintials.json`
- never commit this file!
+37
View File
@@ -0,0 +1,37 @@
"""
from https://gist.github.com/wassname/d17325f36c36fa663dd7de3c09a55e74
Setup simple logging in python. This logs info message to stdout and debug messages to file.
Sure it's long but this is as simple as I could make it for this outcome.
Note: We must set the root logger at DEBUG level, since it must be higher than it's children to pass them on.
Then set filehandler at debug and stream handler at info.
"""
import logging
import sys
import os
import datetime
import tempfile
# To use differen't log level for file and console
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
filename = os.path.join(tempfile.gettempdir(), '/tmp/bitlit_log_{}.log'.format(timestamp))
formatter = logging.Formatter('[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')
file_handler = logging.FileHandler(filename=filename)
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(formatter)
stream_handler = logging.StreamHandler(sys.stdout)
stream_handler.setLevel(logging.INFO)
# The handlers have to be at a root level since they are the final output
logging.basicConfig(
level=logging.DEBUG,
format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
handlers=[
file_handler,
stream_handler
]
)
logger = logging.getLogger('bitlit')
logger.info('Logging to STDOUT and {}'.format(filename))
+13 -14
View File
@@ -7,14 +7,16 @@ import sys
from gtts import gTTS ## Packages for Text to voice
import os
import numpy as np
import speech_recognition as sr ## Packages for voice recognizer
import speech_recognition as sr ## Packages for voice recognizer
os.environ['CUDA_VISIBLE_DEVICES']=""
import tensorflow as tf
tf.enable_eager_execution()
from tensorflow.keras.layers import Embedding, GRU, Dense
import re
from textblob import TextBlob
import random
from BitLit_param import*
from BitLit_param import *
# Architechture of the GRU
@@ -103,10 +105,9 @@ def poem(USER_INPUT):
### ML POEM PREDICTOR
###########################
# USER INPUT a line #
###########################
###########################
# USER INPUT a line #
###########################
USER_INPUT = USER_INPUT.lower()
USER_INPUT = re.sub('[^a-z\n]', ' ', USER_INPUT)
text_generated = USER_INPUT[::-1]
@@ -114,11 +115,9 @@ def poem(USER_INPUT):
######################
# RHYMES GENERATION #
######################
######################
# RHYMES GENERATION #
######################
temperature = 0.09
num_generate = 5 # number of characters to generate
@@ -145,9 +144,9 @@ def poem(USER_INPUT):
print('rhymes:', rhymes)
####################
# POEM GENERATION #
####################
####################
# POEM GENERATION #
####################
temperature = 0.8
+10 -3
View File
@@ -1,4 +1,11 @@
PyAudio==0.2.9
gTTS==2.0.1
SpeechRecognition==3.8.1
tensorflow==1.10.1
# or tensorflow-gpu==1.10.1
textblob==0.15.2
# for playing mp3s
pyglet==1.2.4
# using google cloud speech recognition
google-api-python-client==1.7.7
oauth2client==4.1.3
+1 -4
View File
@@ -19,7 +19,6 @@ TOP_DIR = os.path.dirname(os.path.abspath(__file__))
RESOURCE_FILE = os.path.join(TOP_DIR, "snowboy/resources/common.res")
DETECT_DING = os.path.join(TOP_DIR, "snowboy/resources/ding.wav")
DETECT_DONG = os.path.join(TOP_DIR, "snowboy/resources/dong.wav")
DETECT_PY = os.path.join(TOP_DIR, "BitLit_main.py") ## ADDED
class RingBuffer(object):
@@ -37,9 +36,7 @@ class RingBuffer(object):
self._buf.clear()
return tmp
###
def run(fname=DETECT_PY):
Py_wav=os.system('python BitLit_main.py')
###
def play_audio_file(fname=DETECT_DING):
"""Simple callback function to play a wave file. By default it plays