mirror of
https://github.com/wassname/BitLit_test1.git
synced 2026-07-03 14:49:40 +08:00
working and using google cloud speech
This commit is contained in:
@@ -2,6 +2,7 @@
|
||||
/outputs/
|
||||
/snowboy/
|
||||
/secrets/
|
||||
.pytest_cache/
|
||||
|
||||
# Created by https://www.gitignore.io/api/c,c++,cmake,linux,macos,python
|
||||
# Edit at https://www.gitignore.io/?templates=c,c++,cmake,linux,macos,python
|
||||
|
||||
+11
-20
@@ -10,10 +10,11 @@ import signal
|
||||
|
||||
####
|
||||
from gtts import gTTS ## Packages for Text to voice
|
||||
import speech_recognition as sr ## Packages for voice recognizer
|
||||
import tensorflow as tf
|
||||
tf.enable_eager_execution()
|
||||
from tensorflow.keras.layers import Embedding, GRU, Dense
|
||||
import speech_recognition as sr ## Packages for voice recognizer
|
||||
import BitLit_main
|
||||
# import tensorflow as tf
|
||||
# tf.enable_eager_execution()
|
||||
# from tensorflow.keras.layers import Embedding, GRU, Dense
|
||||
import numpy as np
|
||||
import re
|
||||
from textblob import TextBlob
|
||||
@@ -21,11 +22,11 @@ import random
|
||||
import poem_generator ## POEM GENERATOR IMPORT
|
||||
from poem_generator import*
|
||||
import time
|
||||
####
|
||||
|
||||
|
||||
t0=time.time() ## Time counter
|
||||
interrupted = False
|
||||
|
||||
|
||||
def signal_handler(signal, frame):
|
||||
global interrupted
|
||||
interrupted = True
|
||||
@@ -48,20 +49,10 @@ signal.signal(signal.SIGINT, signal_handler)
|
||||
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
|
||||
print('Listening... Press Ctrl+C to exit')
|
||||
|
||||
# main loop
|
||||
detector.start(detected_callback=snowboydecoder.run,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.01)
|
||||
##detector.start(detected_callback=snowboydecoder.play_audio_file,
|
||||
## interrupt_check=interrupt_callback,
|
||||
## sleep_time=0.00)
|
||||
##detector.start(detected_callback= os.system('python BitLit_main.py'),
|
||||
## interrupt_check=interrupt_callback,
|
||||
## sleep_time=0.00) ##0.03
|
||||
#import BitLit_main
|
||||
##detector.start(detected_callback=BitLit_main,
|
||||
## interrupt_check=interrupt_callback,
|
||||
## sleep_time=0.03)
|
||||
|
||||
detector.start(detected_callback=BitLit_main.generate_poem,
|
||||
interrupt_check=interrupt_callback,
|
||||
sleep_time=0.03)
|
||||
|
||||
detector.terminate()
|
||||
t1 =time.time()
|
||||
|
||||
+115
-63
@@ -1,76 +1,128 @@
|
||||
'''
|
||||
"""
|
||||
Voice to text to poem to speech
|
||||
Credits: Michel, Lauren, Thomas
|
||||
'''
|
||||
"""
|
||||
|
||||
#https://pythonprogramminglanguage.com/text-to-speech/
|
||||
# https://pythonprogramminglanguage.com/text-to-speech/
|
||||
## cmd 1:::: sudo pip install gTTS
|
||||
## cmd 2:::: sudo pip install pyttsx
|
||||
import sys
|
||||
from gtts import gTTS ## Packages for Text to voice
|
||||
from gtts import gTTS ## Packages for Text to voice
|
||||
import os
|
||||
import speech_recognition as sr ## Packages for voice recognizer
|
||||
import tensorflow as tf
|
||||
tf.enable_eager_execution()
|
||||
from tensorflow.keras.layers import Embedding, GRU, Dense
|
||||
|
||||
|
||||
import numpy as np
|
||||
import re
|
||||
from textblob import TextBlob
|
||||
import random
|
||||
from poem_generator import*
|
||||
import pyglet
|
||||
import json
|
||||
import time
|
||||
##### KNOWN PARAMETERS
|
||||
#######################################################
|
||||
##sys.path
|
||||
##sys.path.append('/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python')
|
||||
##sys.path.append('/Users/ShebMichel/Library/Python/2.7/lib/python/site-packages'
|
||||
################################################################################
|
||||
############ AUDIO CONVERSION TO TEST
|
||||
t0=time.time()
|
||||
r = sr.Recognizer()
|
||||
with sr.Microphone() as source:
|
||||
tts = gTTS(text='HELLO! My Name is BIT-LIT. PLEASE SPEAK IN ABOUT 3 SECONDS.', lang='en')
|
||||
tts.save("outputs/BitLit.mp3")
|
||||
os.system("afplay outputs/BitLit.mp3")
|
||||
# ######
|
||||
|
||||
print("SPEAK NOW-SPEAK NOW-SPEAK NOW:")
|
||||
audio = r.listen(source)
|
||||
tts = gTTS(text='THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM', lang='en')
|
||||
tts.save("outputs/BitLit.mp3")
|
||||
os.system("afplay outputs/BitLit.mp3")
|
||||
try:
|
||||
# for testing purposes, we're just using the default API key
|
||||
# to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
|
||||
# instead of `r.recognize_google(audio)
|
||||
AA0=r.recognize_google(audio)
|
||||
USER_INPUT=AA0
|
||||
print("You said: " + r.recognize_google(audio))
|
||||
except sr.UnknownValueError:
|
||||
print("Could not understand audio")
|
||||
except sr.RequestError as e:
|
||||
print("Could not request results; {0}".format(e))
|
||||
text_generated=poem(USER_INPUT)
|
||||
#### END CODE
|
||||
#########################################################
|
||||
################# TEXT CONVERSION IN AUDIO
|
||||
################# FEED POEM TO TRANSCRIBER
|
||||
print('ML POEM is:', text_generated)
|
||||
tts = gTTS(text=text_generated, lang='en')
|
||||
tts.save("outputs/BitLit.mp3")
|
||||
os.system("afplay outputs/BitLit.mp3")
|
||||
#########################################################
|
||||
####
|
||||
print("BIT-LIT ENDING STATEMENT:")
|
||||
tts = gTTS(text='THANK YOU! CHECK ME OUT IN THE NEWS SOON.', lang='en')
|
||||
tts.save("outputs/BitLit.mp3")
|
||||
os.system("afplay outputs/BitLit.mp3")
|
||||
######
|
||||
t1 =time.time()
|
||||
total=t1-t0
|
||||
print 'Time spent is about:', np.round(total), 'seconds'
|
||||
import datetime
|
||||
import hashlib
|
||||
import tempfile
|
||||
from logger import logger
|
||||
|
||||
### USING JUPITER
|
||||
# import IPython.display as ipd
|
||||
# ipd.Audio(filename='path/to/file.mp3')
|
||||
#tk.mainloop()
|
||||
from snowboydecoder import play_audio_file
|
||||
|
||||
import speech_recognition as sr ## Packages for voice recognizer
|
||||
for index, name in enumerate(sr.Microphone.list_microphone_names()):
|
||||
logger.debug("Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
|
||||
|
||||
from poem_generator import poem
|
||||
|
||||
|
||||
# Load credentials
|
||||
try:
|
||||
GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read()
|
||||
except:
|
||||
print('you should place google cloud json credentials at "secrets/google_cloud_credentials.json", make sure you enable the speech recognition api')
|
||||
GOOGLE_CLOUD_SPEECH_CREDENTIALS = None
|
||||
|
||||
|
||||
def play_mp3(mp3_file):
|
||||
"""Play mp3 file with pyglet."""
|
||||
source = pyglet.media.load(filename=mp3_file, streaming=False)
|
||||
source.play()
|
||||
time.sleep(source.duration + 2) # must be a better way to wait untill the media has played
|
||||
|
||||
|
||||
def cache_gtts(text, lang="en", cache_file=None):
|
||||
"""
|
||||
Cache calls to gtts.
|
||||
|
||||
Saves each to a temporary file
|
||||
"""
|
||||
if not cache_file:
|
||||
hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3'
|
||||
cache_file = os.path.join(tempfile.gettempdir(), hash_filename)
|
||||
if not os.path.isfile(cache_file):
|
||||
tts = gTTS(text=text, lang="en")
|
||||
tts.save(cache_file)
|
||||
assert os.path.isfile(cache_file)
|
||||
return cache_file
|
||||
|
||||
|
||||
def generate_poem():
|
||||
|
||||
# https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst
|
||||
# snowboy_configuration = (SNOWBOY_LOCATION, LIST_OF_HOT_WORD_FILES)
|
||||
snowboy_configuration = ('./snowboy', ['./HiBitLit.pmdl', './snowboy/resources/alexa.umdl', './snowboy/resources/snowboy.umdl'])
|
||||
|
||||
############ AUDIO CONVERSION TO TEST
|
||||
play_audio_file()
|
||||
t0 = time.time()
|
||||
r = sr.Recognizer()
|
||||
with sr.Microphone() as source:
|
||||
outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK. You have 20 seconds before the beep.", lang="en")
|
||||
play_mp3(outfile1)
|
||||
|
||||
print("SPEAK NOW-SPEAK:", source)
|
||||
audio = r.listen(source, timeout=5, phrase_time_limit=20)#, snowboy_configuration=snowboy_configuration)
|
||||
# speech_recognition.WaitTimeoutError
|
||||
print('Recorded audio', audio)
|
||||
|
||||
outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM", lang="en")
|
||||
play_mp3(outfile2)
|
||||
|
||||
t1 = time.time()
|
||||
print('listen took', t1 - t0)
|
||||
|
||||
try:
|
||||
print("using google speech to text...")
|
||||
USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
|
||||
print("Google thinks you said: " + USER_INPUT)
|
||||
except sr.UnknownValueError as e:
|
||||
print("Could not understand audio. {}".format(e))
|
||||
except sr.RequestError as e:
|
||||
print("Could not request results; {0}".format(e))
|
||||
# speech_recognition.WaitTimeoutError
|
||||
|
||||
t1b = time.time()
|
||||
print('transcribe took', t1b - t1)
|
||||
|
||||
# Generate poem from user seed
|
||||
text_generated = poem(USER_INPUT)
|
||||
t2 = time.time()
|
||||
print("ML POEM is:", text_generated, 'in', t2 - t1)
|
||||
|
||||
# TEXT CONVERSION IN AUDIO
|
||||
# FEED POEM TO TRANSCRIBER
|
||||
tts = gTTS(text=text_generated, lang="en")
|
||||
ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
|
||||
poem_mp3 = "outputs/BitLit_{}.mp3".format(ts)
|
||||
tts.save(poem_mp3)
|
||||
play_mp3(poem_mp3)
|
||||
|
||||
print("BIT-LIT ENDING STATEMENT:")
|
||||
outfile = cache_gtts(text="THANK YOU! CHECK ME OUT IN THE NEWS SOON.", lang="en")
|
||||
play_mp3(outfile)
|
||||
|
||||
######
|
||||
t3 = time.time()
|
||||
print('Poem to speech took', t3 - t2)
|
||||
print("Time spent is about:", np.round(t3 - t0), "seconds")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
generate_poem()
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
# Run
|
||||
To run the program once installed
|
||||
>> python BitLit_decoder1.py HiBitLit.pmdl
|
||||
>> python BitLit_decoder.py HiBitLit.pmdl
|
||||
After sometimes you should say " HI BitLit" then wait till the poetbot speak to you..and enjoy the rest.. At the end you just need to say again " Hi BitLit" to a different vocal input for so that the poetbot can generate a new poem and the cycle can go days and weeks..
|
||||
WARNING::: Might be slow depending on your computer capabilities..
|
||||
|
||||
@@ -31,7 +31,7 @@ API Keys:
|
||||
- `cp secrets.template.json to secrets.json`
|
||||
- get an API key for google cloud
|
||||
- make a project or use an existing project
|
||||
- download a credentials json for that project
|
||||
- enable the speech recognition api https://console.cloud.google.com/apis/api/speech.googleapis.com/overview
|
||||
- put the credentials in `./secrets/google_cloud_credintials.json`
|
||||
- download a credentials json for that project (using edit, create key) https://console.cloud.google.com/apis/api/speech.googleapis.com/credentials
|
||||
- enable the speech recognition api for that project (make sure you've selected your project) https://console.cloud.google.com/apis/api/speech.googleapis.com/overview
|
||||
- place the credientials json at this path `./secrets/google_cloud_credintials.json`
|
||||
- never commit this file!
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
"""
|
||||
from https://gist.github.com/wassname/d17325f36c36fa663dd7de3c09a55e74
|
||||
Setup simple logging in python. This logs info message to stdout and debug messages to file.
|
||||
Sure it's long but this is as simple as I could make it for this outcome.
|
||||
Note: We must set the root logger at DEBUG level, since it must be higher than it's children to pass them on.
|
||||
Then set filehandler at debug and stream handler at info.
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
import datetime
|
||||
import tempfile
|
||||
|
||||
# To use differen't log level for file and console
|
||||
timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
|
||||
filename = os.path.join(tempfile.gettempdir(), '/tmp/bitlit_log_{}.log'.format(timestamp))
|
||||
formatter = logging.Formatter('[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')
|
||||
|
||||
file_handler = logging.FileHandler(filename=filename)
|
||||
file_handler.setLevel(logging.DEBUG)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
stream_handler = logging.StreamHandler(sys.stdout)
|
||||
stream_handler.setLevel(logging.INFO)
|
||||
|
||||
# The handlers have to be at a root level since they are the final output
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
file_handler,
|
||||
stream_handler
|
||||
]
|
||||
)
|
||||
|
||||
logger = logging.getLogger('bitlit')
|
||||
logger.info('Logging to STDOUT and {}'.format(filename))
|
||||
+13
-14
@@ -7,14 +7,16 @@ import sys
|
||||
from gtts import gTTS ## Packages for Text to voice
|
||||
import os
|
||||
import numpy as np
|
||||
import speech_recognition as sr ## Packages for voice recognizer
|
||||
import speech_recognition as sr ## Packages for voice recognizer
|
||||
|
||||
os.environ['CUDA_VISIBLE_DEVICES']=""
|
||||
import tensorflow as tf
|
||||
tf.enable_eager_execution()
|
||||
from tensorflow.keras.layers import Embedding, GRU, Dense
|
||||
import re
|
||||
from textblob import TextBlob
|
||||
import random
|
||||
from BitLit_param import*
|
||||
from BitLit_param import *
|
||||
|
||||
# Architechture of the GRU
|
||||
|
||||
@@ -103,10 +105,9 @@ def poem(USER_INPUT):
|
||||
### ML POEM PREDICTOR
|
||||
|
||||
|
||||
###########################
|
||||
# USER INPUT a line #
|
||||
###########################
|
||||
|
||||
###########################
|
||||
# USER INPUT a line #
|
||||
###########################
|
||||
USER_INPUT = USER_INPUT.lower()
|
||||
USER_INPUT = re.sub('[^a-z\n]', ' ', USER_INPUT)
|
||||
text_generated = USER_INPUT[::-1]
|
||||
@@ -114,11 +115,9 @@ def poem(USER_INPUT):
|
||||
|
||||
|
||||
|
||||
######################
|
||||
# RHYMES GENERATION #
|
||||
######################
|
||||
|
||||
|
||||
######################
|
||||
# RHYMES GENERATION #
|
||||
######################
|
||||
temperature = 0.09
|
||||
|
||||
num_generate = 5 # number of characters to generate
|
||||
@@ -145,9 +144,9 @@ def poem(USER_INPUT):
|
||||
print('rhymes:', rhymes)
|
||||
|
||||
|
||||
####################
|
||||
# POEM GENERATION #
|
||||
####################
|
||||
####################
|
||||
# POEM GENERATION #
|
||||
####################
|
||||
|
||||
|
||||
temperature = 0.8
|
||||
|
||||
+10
-3
@@ -1,4 +1,11 @@
|
||||
PyAudio==0.2.9
|
||||
|
||||
|
||||
|
||||
gTTS==2.0.1
|
||||
SpeechRecognition==3.8.1
|
||||
tensorflow==1.10.1
|
||||
# or tensorflow-gpu==1.10.1
|
||||
textblob==0.15.2
|
||||
# for playing mp3s
|
||||
pyglet==1.2.4
|
||||
# using google cloud speech recognition
|
||||
google-api-python-client==1.7.7
|
||||
oauth2client==4.1.3
|
||||
|
||||
+1
-4
@@ -19,7 +19,6 @@ TOP_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
RESOURCE_FILE = os.path.join(TOP_DIR, "snowboy/resources/common.res")
|
||||
DETECT_DING = os.path.join(TOP_DIR, "snowboy/resources/ding.wav")
|
||||
DETECT_DONG = os.path.join(TOP_DIR, "snowboy/resources/dong.wav")
|
||||
DETECT_PY = os.path.join(TOP_DIR, "BitLit_main.py") ## ADDED
|
||||
|
||||
|
||||
class RingBuffer(object):
|
||||
@@ -37,9 +36,7 @@ class RingBuffer(object):
|
||||
self._buf.clear()
|
||||
return tmp
|
||||
|
||||
###
|
||||
def run(fname=DETECT_PY):
|
||||
Py_wav=os.system('python BitLit_main.py')
|
||||
|
||||
###
|
||||
def play_audio_file(fname=DETECT_DING):
|
||||
"""Simple callback function to play a wave file. By default it plays
|
||||
|
||||
Reference in New Issue
Block a user