use speechrecognition look and calibrate mic

This commit is contained in:
wassname
2019-01-12 11:35:10 +08:00
parent a67bc9531d
commit ab1b07329c
6 changed files with 60 additions and 107 deletions
-56
View File
@@ -1,56 +0,0 @@
#### RUNNING THE MODEL
# cd documents/pmlg/wake/decoder
# python demo.py resources/HiBitLit.pmdl
from __future__ import print_function
import os
import sys
import snowboydecoder
from snowboydecoder import play_ding, play_dong
import signal
import time
import numpy as np
import BitLit_main
t0 = time.time() ## Time counter
interrupted = False
def signal_handler(signal, frame):
global interrupted
interrupted = True
def interrupt_callback():
global interrupted
return interrupted
if len(sys.argv) == 1:
print("Error: need to specify model name")
print("Usage: python demo.py your.model")
sys.exit(-1)
model = sys.argv[1]
# capture SIGINT signal, e.g., Ctrl+C
signal.signal(signal.SIGINT, signal_handler)
play_ding()
detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
print("Listening... Press Ctrl+C to exit")
detector.start(
detected_callback=BitLit_main.generate_poem,
interrupt_check=interrupt_callback,
sleep_time=0.03,
)
detector.terminate()
t1 = time.time()
total = t1 - t0
print(("Time spent is about:", np.round(total), "seconds"))
+59 -50
View File
@@ -32,6 +32,8 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()):
from poem_generator import poem
snowboy_configuration = ('./snowboy', os.listdir('hotwords'))
# Load credentials
try:
GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read()
@@ -45,7 +47,7 @@ def play_mp3(mp3_file):
source = pyglet.media.load(filename=mp3_file, streaming=False)
source.play()
print(mp3_file, source.duration)
time.sleep(source.duration + 4) # must be a better way to wait untill the media has played
time.sleep(source.duration + 2) # must be a better way to wait untill the media has played
print(mp3_file, source.duration)
def cache_gtts(text, lang="en-nz", cache_file=None):
@@ -80,6 +82,10 @@ def cache_gtts(text, lang="en-nz", cache_file=None):
tts.save(cache_file)
return cache_file
def speak(text):
mp3_file = cache_gtts(text)
play_mp3(mp3_file)
def generate_poem():
@@ -89,67 +95,70 @@ def generate_poem():
t0 = time.time()
r = sr.Recognizer()
with sr.Microphone() as source:
# print(r.energy_threshold)
# r.adjust_for_ambient_noise(source)
# print('energy_threshold', r.energy_threshold)
r.energy_threshold=50
print('mic', source)
outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE BEEP.")
play_mp3(outfile1)
speak('please be quiet while I calibrate the microphones. I will ding when I am finished')
time.sleep(5)
r.pause_threshold = 5
r.adjust_for_ambient_noise(source)
print('calibrate mic energy_threshold to', r.energy_threshold)
speak('Thanks. When you want to talk to me say "Hi BitLit"')
play_ding()
print('speak now', time.time())
audio = r.listen(source)
logger.debug('done recording %s', time.time())
logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
while True:
print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy')
audio_null = r.listen(source, snowboy_configuration=snowboy_configuration)
play_dong()
play_dong()
outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
play_mp3(outfile2)
speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.")
play_ding()
t1 = time.time()
logger.debug('listen took %s', t1 - t0)
print('speak now', time.time())
try:
audio = r.listen(source, timeout=30, phrase_time_limit=30)
except sr.WaitTimeoutError as e:
print('WaitTimeoutError', e)
continue
logger.debug('done recording %s', time.time())
logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
try:
logger.debug("using google speech to text...")
USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
logger.info("Google thinks you said: " + USER_INPUT)
except sr.UnknownValueError as e:
logger.error("Could not understand audio. {}".format(e))
return
except sr.RequestError as e:
logger.error("Could not request results; {0}".format(e))
return
play_dong()
speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
t1b = time.time()
logger.debug('transcribe took %s', t1b - t1)
t1 = time.time()
logger.debug('listen took %s', t1 - t0)
return
try:
logger.debug("using google speech to text...")
USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
logger.info("Google thinks you said: " + USER_INPUT)
except sr.UnknownValueError as e:
logger.error("Could not understand audio. {}".format(e))
return
except sr.RequestError as e:
logger.error("Could not request results; {0}".format(e))
return
# Generate poem from user seed
text_generated = poem(USER_INPUT)
t2 = time.time()
logger.info("ML POEM is: %s", text_generated)
logger.debug('poem and rhyme generation took %s', t2 - t1)
t1b = time.time()
logger.debug('transcribe took %s', t1b - t1)
# TEXT CONVERSION IN AUDIO
# FEED POEM TO TRANSCRIBER
tts = gTTS(text=text_generated)
ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
poem_mp3 = "outputs/BitLit_{}.mp3".format(ts)
tts.save(poem_mp3)
play_mp3(poem_mp3)
# Generate poem from user seed
text_generated = poem(USER_INPUT)
t2 = time.time()
logger.info("ML POEM is: %s", text_generated)
logger.debug('poem and rhyme generation took %s', t2 - t1)
outfile = cache_gtts(text="THANK YOU! BEEP.")
play_mp3(outfile)
# FEED POEM TO TRANSCRIBER
tts = gTTS(text=text_generated)
poem_mp3 = "outputs/BitLit_poem.mp3"
tts.save(poem_mp3)
play_mp3(poem_mp3)
######
t3 = time.time()
logger.debug('Poem to speech took %s', t3 - t2)
logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
speak(text="THANK YOU! BEEP.")
######
t3 = time.time()
logger.debug('Poem to speech took %s', t3 - t2)
logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
if __name__ == "__main__":
Binary file not shown.
Binary file not shown.
+1 -1
View File
@@ -145,7 +145,7 @@ def poem(USER_INPUT):
input_eval = tf.expand_dims([predicted_id], 0)
rhymes += [idx2word_rhymes[predicted_id]]
logger.info("rhymes:", rhymes)
logger.info("rhymes: %s", rhymes)
####################
# POEM GENERATION #