fix overlapping recording

2026-06-27 15:13:28 +08:00 · 2019-01-12 12:49:11 +08:00
parent ab1b07329c
commit eeff892367
3 changed files with 92 additions and 64 deletions
@@ -13,7 +13,6 @@ import os

 import numpy as np
 import re
-from textblob import TextBlob
 import random
 import pyglet
 import json
@@ -21,6 +20,7 @@ import time
 import datetime
 import hashlib
 import tempfile
+import glob
 from logger import logger

 from snowboydecoder import play_ding, play_dong
@@ -31,8 +31,9 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()):

 from poem_generator import poem

+DEBUG = True

-snowboy_configuration = ('./snowboy', os.listdir('hotwords'))
+snowboy_configuration = ('./snowboy', glob.glob('hotwords/*'))

 # Load credentials
 try:
@@ -45,10 +46,9 @@ except:
 def play_mp3(mp3_file):
    """Play mp3 file with pyglet."""
    source = pyglet.media.load(filename=mp3_file, streaming=False)
+    logger.debug('playing %s second file')
    source.play()
-    print(mp3_file, source.duration)
    time.sleep(source.duration + 2)  # must be a better way to wait untill the media has played
-    print(mp3_file, source.duration)

 def cache_gtts(text, lang="en-nz", cache_file=None):
    """
@@ -73,92 +73,122 @@ def cache_gtts(text, lang="en-nz", cache_file=None):
        en: English

    """
-    logger.debug('say: %s', text)
+    logger.info('%s say: %s', lang, text)
    if not cache_file:
-        hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3'
+        hash_filename = hashlib.md5(text.encode()).hexdigest() + lang + '.mp3'
        cache_file = os.path.join(tempfile.gettempdir(), hash_filename)
    if not os.path.isfile(cache_file):
        tts = gTTS(text=text, lang=lang)
        tts.save(cache_file)
    return cache_file

-def speak(text):
-    mp3_file = cache_gtts(text)
+def speak(text, lang="en-nz", cache_file=None):
+    mp3_file = cache_gtts(text, lang=lang, cache_file=cache_file)
    play_mp3(mp3_file)

+def record_audio(audio, output_file, play=False):
+    # write audio to a WAV file for debugging
+    with open(output_file, "wb") as f:
+        f.write(audio.get_flac_data())
+    logger.info('recorded %s s. Saved as %s', len(audio.frame_data)/audio.sample_rate, output_file)
+    if play:
+        speak("DEBUG: I recorded the following")
+        play_mp3(output_file)
+

 def generate_poem():

+    if DEBUG:
+        speak("I'm in debug mode")
+

    ############ AUDIO CONVERSION TO TEST
    play_dong()
    t0 = time.time()
    r = sr.Recognizer()
+
+    speak("Hi I'm bit-lit. Silence Humans. I must calibrate the microphone. I will ding when I am finished")
+    time.sleep(2)
    with sr.Microphone() as source:
-        print('mic', source)
-        speak('please be quiet while I calibrate the microphones. I will ding when I am finished')
-        time.sleep(5)
-        r.pause_threshold = 5
-        r.adjust_for_ambient_noise(source) 
-        print('calibrate mic energy_threshold to', r.energy_threshold)
-        speak('Thanks. When you want to talk to me say "Hi BitLit"')
+        logger.debug('microphone source is %s', source)
+        r.adjust_for_ambient_noise(source, duration=2) 
+    r.energy_threshold = max(r.energy_threshold, 50)
+    r.energy_threshold = min(r.energy_threshold, 500)
+
+    logger.info('calibrate mic energy_threshold to %s', r.energy_threshold)
+    play_dong()
+
+    while True:
+        speak('When you want me to make a poem say "Hi BitLit" or Alexa or Snowboy')
        play_ding()
+        with sr.Microphone() as source:
+            audio_hotword = r.listen(source, snowboy_configuration=snowboy_configuration)
+        if DEBUG:
+            record_audio(audio_hotword, "outputs/hotword-results.flac", play=DEBUG)
+        play_dong()

-        while True:
-            print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy')
-            audio_null = r.listen(source, snowboy_configuration=snowboy_configuration)
-            play_dong()
+        speak(text="Hi! My Name is BIT-LIT. Please speak some ideas for a poem after the bing. You have 20 seconds.")

-            speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.")
-            play_ding()
+        play_ding()
+        with sr.Microphone() as source:
+            audio = r.record(source, duration=20)
+        play_dong()

-            print('speak now', time.time())
-            try:
-                audio = r.listen(source, timeout=30, phrase_time_limit=30)
-            except sr.WaitTimeoutError as e:
-                print('WaitTimeoutError', e)
-                continue
-            logger.debug('done recording %s', time.time())
-            logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
+        # write audio to a WAV file for debugging
+        if DEBUG:
+            record_audio(audio, "outputs/record-results.flac", play=DEBUG)

-            play_dong()
-            speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
+        logger.debug('done recording %s', time.time())
+        logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)

-            t1 = time.time()
-            logger.debug('listen took %s', t1 - t0)
+        speak(text="THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")

-            try:
-                logger.debug("using google speech to text...")
-                USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
-                logger.info("Google thinks you said: " + USER_INPUT)
-            except sr.UnknownValueError as e:
-                logger.error("Could not understand audio. {}".format(e))
-                return 
-            except sr.RequestError as e:
-                logger.error("Could not request results; {0}".format(e))
-                return
+        t1 = time.time()
+        logger.debug('listen took %s', t1 - t0)

-            t1b = time.time()
-            logger.debug('transcribe took %s', t1b - t1)
+        try:
+            logger.debug("using google speech to text...")
+            USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
+            logger.info("Google thinks you said: " + USER_INPUT)
+        except sr.UnknownValueError as e:
+            logger.error("Could not understand audio. {}".format(e))
+            speak("I could not understand that audio")
+            continue 
+        except sr.RequestError as e:
+            logger.error("Could not request results; {0}".format(e))
+            speak("I'm sorry I could not communicate with the speech to _text the internet'")
+            continue

-            # Generate poem from user seed
-            text_generated = poem(USER_INPUT)
-            t2 = time.time()
-            logger.info("ML POEM is: %s", text_generated)
-            logger.debug('poem and rhyme generation took %s', t2 - t1)
+        t1b = time.time()
+        logger.debug('transcribe took %s', t1b - t1)

-            # FEED POEM TO TRANSCRIBER
-            tts = gTTS(text=text_generated)
-            poem_mp3 = "outputs/BitLit_poem.mp3"
-            tts.save(poem_mp3)
-            play_mp3(poem_mp3)
+        if DEBUG:
+            speak('DEBUG: I think you said %s' % USER_INPUT)

-            speak(text="THANK YOU! BEEP.")
+        # Generate poem from user seed
+        text_generated, rhymes = poem(USER_INPUT)
+        t2 = time.time()
+        logger.info("rhymes: %s", rhymes)
+        logger.info("ML POEM is: %s", text_generated)
+        logger.debug('poem and rhyme generation took %s', t2 - t1)

-            ######
-            t3 = time.time()
-            logger.debug('Poem to speech took %s', t3 - t2)
-            logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
+        if DEBUG:
+            speak('DEBUG: your rhymes are '+ ' '.join(rhymes))
+
+        # FEED POEM TO TRANSCRIBER
+        speak(text=text_generated, cache_file="outputs/BitLit_last_poem.mp3")
+
+        if random.random()>0.90:
+            speak(text="THANK YOU!")
+        else:
+            speak(text="THANK YOU PUNY HUMANS.")
+
+        ######
+        t3 = time.time()
+        logger.debug('Poem to speech took %s', t3 - t2)
+        logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
+
+        play_ding()


 if __name__ == "__main__":
@@ -3,7 +3,7 @@ Forked from https://github.com/ShebMichel/BitLit_test1

 # Run
 To run the program once installed 
->> python BitLit_decoder.py HiBitLit.pmdl
+>> python3 BitLit_main.py

 This will start snowboy listening for the phrase "Hi BitLit". Then it will ask for seed phrases for a poem.

@@ -145,8 +145,6 @@ def poem(USER_INPUT):
        input_eval = tf.expand_dims([predicted_id], 0)
        rhymes += [idx2word_rhymes[predicted_id]]

-    logger.info("rhymes: %s", rhymes)
-
    ####################
    #  POEM GENERATION #
    ####################
@@ -184,4 +182,4 @@ def poem(USER_INPUT):

    text_generated = re.sub(" +", " ", text_generated)
    text_generated = str(TextBlob(text_generated).correct())
-    return text_generated
+    return text_generated, rhymes