From eeff8923678d6fd03ed6aeb9db7f1f8621a6d9f5 Mon Sep 17 00:00:00 2001 From: wassname Date: Sat, 12 Jan 2019 12:49:11 +0800 Subject: [PATCH] fix overlapping recording --- BitLit_main.py | 150 +++++++++++++++++++++++++++------------------- README.md | 2 +- poem_generator.py | 4 +- 3 files changed, 92 insertions(+), 64 deletions(-) diff --git a/BitLit_main.py b/BitLit_main.py index 207c6e0..06c8a9b 100644 --- a/BitLit_main.py +++ b/BitLit_main.py @@ -13,7 +13,6 @@ import os import numpy as np import re -from textblob import TextBlob import random import pyglet import json @@ -21,6 +20,7 @@ import time import datetime import hashlib import tempfile +import glob from logger import logger from snowboydecoder import play_ding, play_dong @@ -31,8 +31,9 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()): from poem_generator import poem +DEBUG = True -snowboy_configuration = ('./snowboy', os.listdir('hotwords')) +snowboy_configuration = ('./snowboy', glob.glob('hotwords/*')) # Load credentials try: @@ -45,10 +46,9 @@ except: def play_mp3(mp3_file): """Play mp3 file with pyglet.""" source = pyglet.media.load(filename=mp3_file, streaming=False) + logger.debug('playing %s second file') source.play() - print(mp3_file, source.duration) time.sleep(source.duration + 2) # must be a better way to wait untill the media has played - print(mp3_file, source.duration) def cache_gtts(text, lang="en-nz", cache_file=None): """ @@ -73,92 +73,122 @@ def cache_gtts(text, lang="en-nz", cache_file=None): en: English """ - logger.debug('say: %s', text) + logger.info('%s say: %s', lang, text) if not cache_file: - hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3' + hash_filename = hashlib.md5(text.encode()).hexdigest() + lang + '.mp3' cache_file = os.path.join(tempfile.gettempdir(), hash_filename) if not os.path.isfile(cache_file): tts = gTTS(text=text, lang=lang) tts.save(cache_file) return cache_file -def speak(text): - mp3_file = cache_gtts(text) +def speak(text, lang="en-nz", cache_file=None): + mp3_file = cache_gtts(text, lang=lang, cache_file=cache_file) play_mp3(mp3_file) +def record_audio(audio, output_file, play=False): + # write audio to a WAV file for debugging + with open(output_file, "wb") as f: + f.write(audio.get_flac_data()) + logger.info('recorded %s s. Saved as %s', len(audio.frame_data)/audio.sample_rate, output_file) + if play: + speak("DEBUG: I recorded the following") + play_mp3(output_file) + def generate_poem(): + if DEBUG: + speak("I'm in debug mode") + ############ AUDIO CONVERSION TO TEST play_dong() t0 = time.time() r = sr.Recognizer() + + speak("Hi I'm bit-lit. Silence Humans. I must calibrate the microphone. I will ding when I am finished") + time.sleep(2) with sr.Microphone() as source: - print('mic', source) - speak('please be quiet while I calibrate the microphones. I will ding when I am finished') - time.sleep(5) - r.pause_threshold = 5 - r.adjust_for_ambient_noise(source) - print('calibrate mic energy_threshold to', r.energy_threshold) - speak('Thanks. When you want to talk to me say "Hi BitLit"') + logger.debug('microphone source is %s', source) + r.adjust_for_ambient_noise(source, duration=2) + r.energy_threshold = max(r.energy_threshold, 50) + r.energy_threshold = min(r.energy_threshold, 500) + + logger.info('calibrate mic energy_threshold to %s', r.energy_threshold) + play_dong() + + while True: + speak('When you want me to make a poem say "Hi BitLit" or Alexa or Snowboy') play_ding() + with sr.Microphone() as source: + audio_hotword = r.listen(source, snowboy_configuration=snowboy_configuration) + if DEBUG: + record_audio(audio_hotword, "outputs/hotword-results.flac", play=DEBUG) + play_dong() - while True: - print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy') - audio_null = r.listen(source, snowboy_configuration=snowboy_configuration) - play_dong() + speak(text="Hi! My Name is BIT-LIT. Please speak some ideas for a poem after the bing. You have 20 seconds.") - speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.") - play_ding() + play_ding() + with sr.Microphone() as source: + audio = r.record(source, duration=20) + play_dong() - print('speak now', time.time()) - try: - audio = r.listen(source, timeout=30, phrase_time_limit=30) - except sr.WaitTimeoutError as e: - print('WaitTimeoutError', e) - continue - logger.debug('done recording %s', time.time()) - logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate) + # write audio to a WAV file for debugging + if DEBUG: + record_audio(audio, "outputs/record-results.flac", play=DEBUG) - play_dong() - speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM") + logger.debug('done recording %s', time.time()) + logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate) - t1 = time.time() - logger.debug('listen took %s', t1 - t0) + speak(text="THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM") - try: - logger.debug("using google speech to text...") - USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS) - logger.info("Google thinks you said: " + USER_INPUT) - except sr.UnknownValueError as e: - logger.error("Could not understand audio. {}".format(e)) - return - except sr.RequestError as e: - logger.error("Could not request results; {0}".format(e)) - return + t1 = time.time() + logger.debug('listen took %s', t1 - t0) - t1b = time.time() - logger.debug('transcribe took %s', t1b - t1) + try: + logger.debug("using google speech to text...") + USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS) + logger.info("Google thinks you said: " + USER_INPUT) + except sr.UnknownValueError as e: + logger.error("Could not understand audio. {}".format(e)) + speak("I could not understand that audio") + continue + except sr.RequestError as e: + logger.error("Could not request results; {0}".format(e)) + speak("I'm sorry I could not communicate with the speech to _text the internet'") + continue - # Generate poem from user seed - text_generated = poem(USER_INPUT) - t2 = time.time() - logger.info("ML POEM is: %s", text_generated) - logger.debug('poem and rhyme generation took %s', t2 - t1) + t1b = time.time() + logger.debug('transcribe took %s', t1b - t1) - # FEED POEM TO TRANSCRIBER - tts = gTTS(text=text_generated) - poem_mp3 = "outputs/BitLit_poem.mp3" - tts.save(poem_mp3) - play_mp3(poem_mp3) + if DEBUG: + speak('DEBUG: I think you said %s' % USER_INPUT) - speak(text="THANK YOU! BEEP.") + # Generate poem from user seed + text_generated, rhymes = poem(USER_INPUT) + t2 = time.time() + logger.info("rhymes: %s", rhymes) + logger.info("ML POEM is: %s", text_generated) + logger.debug('poem and rhyme generation took %s', t2 - t1) - ###### - t3 = time.time() - logger.debug('Poem to speech took %s', t3 - t2) - logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0)) + if DEBUG: + speak('DEBUG: your rhymes are '+ ' '.join(rhymes)) + + # FEED POEM TO TRANSCRIBER + speak(text=text_generated, cache_file="outputs/BitLit_last_poem.mp3") + + if random.random()>0.90: + speak(text="THANK YOU!") + else: + speak(text="THANK YOU PUNY HUMANS.") + + ###### + t3 = time.time() + logger.debug('Poem to speech took %s', t3 - t2) + logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0)) + + play_ding() if __name__ == "__main__": diff --git a/README.md b/README.md index d49ea86..e3f5ec4 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ Forked from https://github.com/ShebMichel/BitLit_test1 # Run To run the program once installed ->> python BitLit_decoder.py HiBitLit.pmdl +>> python3 BitLit_main.py This will start snowboy listening for the phrase "Hi BitLit". Then it will ask for seed phrases for a poem. diff --git a/poem_generator.py b/poem_generator.py index ff8722b..e90b361 100644 --- a/poem_generator.py +++ b/poem_generator.py @@ -145,8 +145,6 @@ def poem(USER_INPUT): input_eval = tf.expand_dims([predicted_id], 0) rhymes += [idx2word_rhymes[predicted_id]] - logger.info("rhymes: %s", rhymes) - #################### # POEM GENERATION # #################### @@ -184,4 +182,4 @@ def poem(USER_INPUT): text_generated = re.sub(" +", " ", text_generated) text_generated = str(TextBlob(text_generated).correct()) - return text_generated + return text_generated, rhymes