diff --git a/BitLit_decoder.py b/BitLit_decoder.py deleted file mode 100644 index afce2e6..0000000 --- a/BitLit_decoder.py +++ /dev/null @@ -1,56 +0,0 @@ -#### RUNNING THE MODEL -# cd documents/pmlg/wake/decoder -# python demo.py resources/HiBitLit.pmdl -from __future__ import print_function -import os -import sys - -import snowboydecoder -from snowboydecoder import play_ding, play_dong -import signal - -import time -import numpy as np - -import BitLit_main - - -t0 = time.time() ## Time counter -interrupted = False - - -def signal_handler(signal, frame): - global interrupted - interrupted = True - - -def interrupt_callback(): - global interrupted - return interrupted - - -if len(sys.argv) == 1: - print("Error: need to specify model name") - print("Usage: python demo.py your.model") - sys.exit(-1) - -model = sys.argv[1] - -# capture SIGINT signal, e.g., Ctrl+C -signal.signal(signal.SIGINT, signal_handler) - -play_ding() -detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5) -print("Listening... Press Ctrl+C to exit") - - -detector.start( - detected_callback=BitLit_main.generate_poem, - interrupt_check=interrupt_callback, - sleep_time=0.03, -) - -detector.terminate() -t1 = time.time() -total = t1 - t0 -print(("Time spent is about:", np.round(total), "seconds")) diff --git a/BitLit_main.py b/BitLit_main.py index 796e9c6..207c6e0 100644 --- a/BitLit_main.py +++ b/BitLit_main.py @@ -32,6 +32,8 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()): from poem_generator import poem +snowboy_configuration = ('./snowboy', os.listdir('hotwords')) + # Load credentials try: GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read() @@ -45,7 +47,7 @@ def play_mp3(mp3_file): source = pyglet.media.load(filename=mp3_file, streaming=False) source.play() print(mp3_file, source.duration) - time.sleep(source.duration + 4) # must be a better way to wait untill the media has played + time.sleep(source.duration + 2) # must be a better way to wait untill the media has played print(mp3_file, source.duration) def cache_gtts(text, lang="en-nz", cache_file=None): @@ -80,6 +82,10 @@ def cache_gtts(text, lang="en-nz", cache_file=None): tts.save(cache_file) return cache_file +def speak(text): + mp3_file = cache_gtts(text) + play_mp3(mp3_file) + def generate_poem(): @@ -89,67 +95,70 @@ def generate_poem(): t0 = time.time() r = sr.Recognizer() with sr.Microphone() as source: - - # print(r.energy_threshold) - # r.adjust_for_ambient_noise(source) - # print('energy_threshold', r.energy_threshold) - r.energy_threshold=50 - - print('mic', source) - outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE BEEP.") - play_mp3(outfile1) + speak('please be quiet while I calibrate the microphones. I will ding when I am finished') + time.sleep(5) + r.pause_threshold = 5 + r.adjust_for_ambient_noise(source) + print('calibrate mic energy_threshold to', r.energy_threshold) + speak('Thanks. When you want to talk to me say "Hi BitLit"') play_ding() - print('speak now', time.time()) - audio = r.listen(source) - logger.debug('done recording %s', time.time()) - logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate) + while True: + print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy') + audio_null = r.listen(source, snowboy_configuration=snowboy_configuration) + play_dong() - play_dong() - outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM") - play_mp3(outfile2) + speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.") + play_ding() - t1 = time.time() - logger.debug('listen took %s', t1 - t0) + print('speak now', time.time()) + try: + audio = r.listen(source, timeout=30, phrase_time_limit=30) + except sr.WaitTimeoutError as e: + print('WaitTimeoutError', e) + continue + logger.debug('done recording %s', time.time()) + logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate) - try: - logger.debug("using google speech to text...") - USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS) - logger.info("Google thinks you said: " + USER_INPUT) - except sr.UnknownValueError as e: - logger.error("Could not understand audio. {}".format(e)) - return - except sr.RequestError as e: - logger.error("Could not request results; {0}".format(e)) - return + play_dong() + speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM") - t1b = time.time() - logger.debug('transcribe took %s', t1b - t1) + t1 = time.time() + logger.debug('listen took %s', t1 - t0) - return + try: + logger.debug("using google speech to text...") + USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS) + logger.info("Google thinks you said: " + USER_INPUT) + except sr.UnknownValueError as e: + logger.error("Could not understand audio. {}".format(e)) + return + except sr.RequestError as e: + logger.error("Could not request results; {0}".format(e)) + return - # Generate poem from user seed - text_generated = poem(USER_INPUT) - t2 = time.time() - logger.info("ML POEM is: %s", text_generated) - logger.debug('poem and rhyme generation took %s', t2 - t1) + t1b = time.time() + logger.debug('transcribe took %s', t1b - t1) - # TEXT CONVERSION IN AUDIO - # FEED POEM TO TRANSCRIBER - tts = gTTS(text=text_generated) - ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S') - poem_mp3 = "outputs/BitLit_{}.mp3".format(ts) - tts.save(poem_mp3) - play_mp3(poem_mp3) + # Generate poem from user seed + text_generated = poem(USER_INPUT) + t2 = time.time() + logger.info("ML POEM is: %s", text_generated) + logger.debug('poem and rhyme generation took %s', t2 - t1) - outfile = cache_gtts(text="THANK YOU! BEEP.") - play_mp3(outfile) + # FEED POEM TO TRANSCRIBER + tts = gTTS(text=text_generated) + poem_mp3 = "outputs/BitLit_poem.mp3" + tts.save(poem_mp3) + play_mp3(poem_mp3) - ###### - t3 = time.time() - logger.debug('Poem to speech took %s', t3 - t2) - logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0)) + speak(text="THANK YOU! BEEP.") + + ###### + t3 = time.time() + logger.debug('Poem to speech took %s', t3 - t2) + logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0)) if __name__ == "__main__": diff --git a/HiBitLit.pmdl b/hotwords/HiBitLit.pmdl similarity index 100% rename from HiBitLit.pmdl rename to hotwords/HiBitLit.pmdl diff --git a/hotwords/alexa.umdl b/hotwords/alexa.umdl new file mode 100644 index 0000000..0d9db6f Binary files /dev/null and b/hotwords/alexa.umdl differ diff --git a/hotwords/snowboy.umdl b/hotwords/snowboy.umdl new file mode 100644 index 0000000..bb68185 Binary files /dev/null and b/hotwords/snowboy.umdl differ diff --git a/poem_generator.py b/poem_generator.py index 09d8ace..ff8722b 100644 --- a/poem_generator.py +++ b/poem_generator.py @@ -145,7 +145,7 @@ def poem(USER_INPUT): input_eval = tf.expand_dims([predicted_id], 0) rhymes += [idx2word_rhymes[predicted_id]] - logger.info("rhymes:", rhymes) + logger.info("rhymes: %s", rhymes) #################### # POEM GENERATION #