From eeff8923678d6fd03ed6aeb9db7f1f8621a6d9f5 Mon Sep 17 00:00:00 2001
From: wassname <git@wassname.org>
Date: Sat, 12 Jan 2019 12:49:11 +0800
Subject: [PATCH] fix overlapping recording

---
 BitLit_main.py    | 150 +++++++++++++++++++++++++++-------------------
 README.md         |   2 +-
 poem_generator.py |   4 +-
 3 files changed, 92 insertions(+), 64 deletions(-)

diff --git a/BitLit_main.py b/BitLit_main.py
index 207c6e0..06c8a9b 100644
--- a/BitLit_main.py
+++ b/BitLit_main.py
@@ -13,7 +13,6 @@ import os
 
 import numpy as np
 import re
-from textblob import TextBlob
 import random
 import pyglet
 import json
@@ -21,6 +20,7 @@ import time
 import datetime
 import hashlib
 import tempfile
+import glob
 from logger import logger
 
 from snowboydecoder import play_ding, play_dong
@@ -31,8 +31,9 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()):
 
 from poem_generator import poem
 
+DEBUG = True
 
-snowboy_configuration = ('./snowboy', os.listdir('hotwords'))
+snowboy_configuration = ('./snowboy', glob.glob('hotwords/*'))
 
 # Load credentials
 try:
@@ -45,10 +46,9 @@ except:
 def play_mp3(mp3_file):
     """Play mp3 file with pyglet."""
     source = pyglet.media.load(filename=mp3_file, streaming=False)
+    logger.debug('playing %s second file')
     source.play()
-    print(mp3_file, source.duration)
     time.sleep(source.duration + 2)  # must be a better way to wait untill the media has played
-    print(mp3_file, source.duration)
 
 def cache_gtts(text, lang="en-nz", cache_file=None):
     """
@@ -73,92 +73,122 @@ def cache_gtts(text, lang="en-nz", cache_file=None):
         en: English
 
     """
-    logger.debug('say: %s', text)
+    logger.info('%s say: %s', lang, text)
     if not cache_file:
-        hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3'
+        hash_filename = hashlib.md5(text.encode()).hexdigest() + lang + '.mp3'
         cache_file = os.path.join(tempfile.gettempdir(), hash_filename)
     if not os.path.isfile(cache_file):
         tts = gTTS(text=text, lang=lang)
         tts.save(cache_file)
     return cache_file
 
-def speak(text):
-    mp3_file = cache_gtts(text)
+def speak(text, lang="en-nz", cache_file=None):
+    mp3_file = cache_gtts(text, lang=lang, cache_file=cache_file)
     play_mp3(mp3_file)
 
+def record_audio(audio, output_file, play=False):
+    # write audio to a WAV file for debugging
+    with open(output_file, "wb") as f:
+        f.write(audio.get_flac_data())
+    logger.info('recorded %s s. Saved as %s', len(audio.frame_data)/audio.sample_rate, output_file)
+    if play:
+        speak("DEBUG: I recorded the following")
+        play_mp3(output_file)
+
 
 def generate_poem():
 
+    if DEBUG:
+        speak("I'm in debug mode")
+
 
     ############ AUDIO CONVERSION TO TEST
     play_dong()
     t0 = time.time()
     r = sr.Recognizer()
+
+    speak("Hi I'm bit-lit. Silence Humans. I must calibrate the microphone. I will ding when I am finished")
+    time.sleep(2)
     with sr.Microphone() as source:
-        print('mic', source)
-        speak('please be quiet while I calibrate the microphones. I will ding when I am finished')
-        time.sleep(5)
-        r.pause_threshold = 5
-        r.adjust_for_ambient_noise(source) 
-        print('calibrate mic energy_threshold to', r.energy_threshold)
-        speak('Thanks. When you want to talk to me say "Hi BitLit"')
+        logger.debug('microphone source is %s', source)
+        r.adjust_for_ambient_noise(source, duration=2) 
+    r.energy_threshold = max(r.energy_threshold, 50)
+    r.energy_threshold = min(r.energy_threshold, 500)
+
+    logger.info('calibrate mic energy_threshold to %s', r.energy_threshold)
+    play_dong()
+
+    while True:
+        speak('When you want me to make a poem say "Hi BitLit" or Alexa or Snowboy')
         play_ding()
+        with sr.Microphone() as source:
+            audio_hotword = r.listen(source, snowboy_configuration=snowboy_configuration)
+        if DEBUG:
+            record_audio(audio_hotword, "outputs/hotword-results.flac", play=DEBUG)
+        play_dong()
 
-        while True:
-            print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy')
-            audio_null = r.listen(source, snowboy_configuration=snowboy_configuration)
-            play_dong()
+        speak(text="Hi! My Name is BIT-LIT. Please speak some ideas for a poem after the bing. You have 20 seconds.")
 
-            speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.")
-            play_ding()
+        play_ding()
+        with sr.Microphone() as source:
+            audio = r.record(source, duration=20)
+        play_dong()
 
-            print('speak now', time.time())
-            try:
-                audio = r.listen(source, timeout=30, phrase_time_limit=30)
-            except sr.WaitTimeoutError as e:
-                print('WaitTimeoutError', e)
-                continue
-            logger.debug('done recording %s', time.time())
-            logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
+        # write audio to a WAV file for debugging
+        if DEBUG:
+            record_audio(audio, "outputs/record-results.flac", play=DEBUG)
 
-            play_dong()
-            speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
+        logger.debug('done recording %s', time.time())
+        logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
 
-            t1 = time.time()
-            logger.debug('listen took %s', t1 - t0)
+        speak(text="THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
 
-            try:
-                logger.debug("using google speech to text...")
-                USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
-                logger.info("Google thinks you said: " + USER_INPUT)
-            except sr.UnknownValueError as e:
-                logger.error("Could not understand audio. {}".format(e))
-                return 
-            except sr.RequestError as e:
-                logger.error("Could not request results; {0}".format(e))
-                return
+        t1 = time.time()
+        logger.debug('listen took %s', t1 - t0)
 
-            t1b = time.time()
-            logger.debug('transcribe took %s', t1b - t1)
+        try:
+            logger.debug("using google speech to text...")
+            USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
+            logger.info("Google thinks you said: " + USER_INPUT)
+        except sr.UnknownValueError as e:
+            logger.error("Could not understand audio. {}".format(e))
+            speak("I could not understand that audio")
+            continue 
+        except sr.RequestError as e:
+            logger.error("Could not request results; {0}".format(e))
+            speak("I'm sorry I could not communicate with the speech to _text the internet'")
+            continue
 
-            # Generate poem from user seed
-            text_generated = poem(USER_INPUT)
-            t2 = time.time()
-            logger.info("ML POEM is: %s", text_generated)
-            logger.debug('poem and rhyme generation took %s', t2 - t1)
+        t1b = time.time()
+        logger.debug('transcribe took %s', t1b - t1)
 
-            # FEED POEM TO TRANSCRIBER
-            tts = gTTS(text=text_generated)
-            poem_mp3 = "outputs/BitLit_poem.mp3"
-            tts.save(poem_mp3)
-            play_mp3(poem_mp3)
+        if DEBUG:
+            speak('DEBUG: I think you said %s' % USER_INPUT)
 
-            speak(text="THANK YOU! BEEP.")
+        # Generate poem from user seed
+        text_generated, rhymes = poem(USER_INPUT)
+        t2 = time.time()
+        logger.info("rhymes: %s", rhymes)
+        logger.info("ML POEM is: %s", text_generated)
+        logger.debug('poem and rhyme generation took %s', t2 - t1)
 
-            ######
-            t3 = time.time()
-            logger.debug('Poem to speech took %s', t3 - t2)
-            logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
+        if DEBUG:
+            speak('DEBUG: your rhymes are '+ ' '.join(rhymes))
+
+        # FEED POEM TO TRANSCRIBER
+        speak(text=text_generated, cache_file="outputs/BitLit_last_poem.mp3")
+
+        if random.random()>0.90:
+            speak(text="THANK YOU!")
+        else:
+            speak(text="THANK YOU PUNY HUMANS.")
+
+        ######
+        t3 = time.time()
+        logger.debug('Poem to speech took %s', t3 - t2)
+        logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
+
+        play_ding()
 
 
 if __name__ == "__main__":
diff --git a/README.md b/README.md
index d49ea86..e3f5ec4 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@ Forked from https://github.com/ShebMichel/BitLit_test1
 
 # Run
 To run the program once installed 
->> python BitLit_decoder.py HiBitLit.pmdl
+>> python3 BitLit_main.py
 
 This will start snowboy listening for the phrase "Hi BitLit". Then it will ask for seed phrases for a poem.
 
diff --git a/poem_generator.py b/poem_generator.py
index ff8722b..e90b361 100644
--- a/poem_generator.py
+++ b/poem_generator.py
@@ -145,8 +145,6 @@ def poem(USER_INPUT):
         input_eval = tf.expand_dims([predicted_id], 0)
         rhymes += [idx2word_rhymes[predicted_id]]
 
-    logger.info("rhymes: %s", rhymes)
-
     ####################
     #  POEM GENERATION #
     ####################
@@ -184,4 +182,4 @@ def poem(USER_INPUT):
 
     text_generated = re.sub(" +", " ", text_generated)
     text_generated = str(TextBlob(text_generated).correct())
-    return text_generated
+    return text_generated, rhymes