fix overlapping recording

2026-06-27 16:43:35 +08:00 · 2019-01-12 12:49:11 +08:00
parent ab1b07329c
commit eeff892367
3 changed files with 92 additions and 64 deletions
@@ -13,7 +13,6 @@ import os
 import numpy as np
 import re
 from textblob import TextBlob
 import random
 import pyglet
 import json
@@ -21,6 +20,7 @@ import time
 import datetime
 import hashlib
 import tempfile
 import glob
 from logger import logger
 from snowboydecoder import play_ding, play_dong
@@ -31,8 +31,9 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()):
 from poem_generator import poem
 DEBUG = True
-snowboy_configuration = ('./snowboy', os.listdir('hotwords'))
+snowboy_configuration = ('./snowboy', glob.glob('hotwords/*'))
 # Load credentials
 try:
@@ -45,10 +46,9 @@ except:
 def play_mp3(mp3_file):
    """Play mp3 file with pyglet."""
    source = pyglet.media.load(filename=mp3_file, streaming=False)
    logger.debug('playing %s second file')
    source.play()
    print(mp3_file, source.duration)
    time.sleep(source.duration + 2)  # must be a better way to wait untill the media has played
    print(mp3_file, source.duration)
 def cache_gtts(text, lang="en-nz", cache_file=None):
    """
@@ -73,92 +73,122 @@ def cache_gtts(text, lang="en-nz", cache_file=None):
        en: English
    """
-    logger.debug('say: %s', text)
+    logger.info('%s say: %s', lang, text)
    if not cache_file:
-        hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3'
+        hash_filename = hashlib.md5(text.encode()).hexdigest() + lang + '.mp3'
        cache_file = os.path.join(tempfile.gettempdir(), hash_filename)
    if not os.path.isfile(cache_file):
        tts = gTTS(text=text, lang=lang)
        tts.save(cache_file)
    return cache_file
-def speak(text):
+def speak(text, lang="en-nz", cache_file=None):
-    mp3_file = cache_gtts(text)
+    mp3_file = cache_gtts(text, lang=lang, cache_file=cache_file)
    play_mp3(mp3_file)
 def record_audio(audio, output_file, play=False):
    # write audio to a WAV file for debugging
    with open(output_file, "wb") as f:
        f.write(audio.get_flac_data())
    logger.info('recorded %s s. Saved as %s', len(audio.frame_data)/audio.sample_rate, output_file)
    if play:
        speak("DEBUG: I recorded the following")
        play_mp3(output_file)
 def generate_poem():
    if DEBUG:
        speak("I'm in debug mode")
    ############ AUDIO CONVERSION TO TEST
    play_dong()
    t0 = time.time()
    r = sr.Recognizer()
    speak("Hi I'm bit-lit. Silence Humans. I must calibrate the microphone. I will ding when I am finished")
    time.sleep(2)
    with sr.Microphone() as source:
-        print('mic', source)
+        logger.debug('microphone source is %s', source)
-        speak('please be quiet while I calibrate the microphones. I will ding when I am finished')
+        r.adjust_for_ambient_noise(source, duration=2) 
-        time.sleep(5)
+    r.energy_threshold = max(r.energy_threshold, 50)
-        r.pause_threshold = 5
+    r.energy_threshold = min(r.energy_threshold, 500)
-        r.adjust_for_ambient_noise(source) 
+
-        print('calibrate mic energy_threshold to', r.energy_threshold)
+    logger.info('calibrate mic energy_threshold to %s', r.energy_threshold)
-        speak('Thanks. When you want to talk to me say "Hi BitLit"')
+    play_dong()
    while True:
        speak('When you want me to make a poem say "Hi BitLit" or Alexa or Snowboy')
        play_ding()
        with sr.Microphone() as source:
            audio_hotword = r.listen(source, snowboy_configuration=snowboy_configuration)
        if DEBUG:
            record_audio(audio_hotword, "outputs/hotword-results.flac", play=DEBUG)
        play_dong()
-        while True:
+        speak(text="Hi! My Name is BIT-LIT. Please speak some ideas for a poem after the bing. You have 20 seconds.")
            print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy')
            audio_null = r.listen(source, snowboy_configuration=snowboy_configuration)
            play_dong()
-            speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.")
+        play_ding()
-            play_ding()
+        with sr.Microphone() as source:
            audio = r.record(source, duration=20)
        play_dong()
-            print('speak now', time.time())
+        # write audio to a WAV file for debugging
-            try:
+        if DEBUG:
-                audio = r.listen(source, timeout=30, phrase_time_limit=30)
+            record_audio(audio, "outputs/record-results.flac", play=DEBUG)
            except sr.WaitTimeoutError as e:
                print('WaitTimeoutError', e)
                continue
            logger.debug('done recording %s', time.time())
            logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
-            play_dong()
+        logger.debug('done recording %s', time.time())
-            speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
+        logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
-            t1 = time.time()
+        speak(text="THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
            logger.debug('listen took %s', t1 - t0)
-            try:
+        t1 = time.time()
-                logger.debug("using google speech to text...")
+        logger.debug('listen took %s', t1 - t0)
                USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
                logger.info("Google thinks you said: " + USER_INPUT)
            except sr.UnknownValueError as e:
                logger.error("Could not understand audio. {}".format(e))
                return 
            except sr.RequestError as e:
                logger.error("Could not request results; {0}".format(e))
                return
-            t1b = time.time()
+        try:
-            logger.debug('transcribe took %s', t1b - t1)
+            logger.debug("using google speech to text...")
            USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
            logger.info("Google thinks you said: " + USER_INPUT)
        except sr.UnknownValueError as e:
            logger.error("Could not understand audio. {}".format(e))
            speak("I could not understand that audio")
            continue 
        except sr.RequestError as e:
            logger.error("Could not request results; {0}".format(e))
            speak("I'm sorry I could not communicate with the speech to _text the internet'")
            continue
-            # Generate poem from user seed
+        t1b = time.time()
-            text_generated = poem(USER_INPUT)
+        logger.debug('transcribe took %s', t1b - t1)
            t2 = time.time()
            logger.info("ML POEM is: %s", text_generated)
            logger.debug('poem and rhyme generation took %s', t2 - t1)
-            # FEED POEM TO TRANSCRIBER
+        if DEBUG:
-            tts = gTTS(text=text_generated)
+            speak('DEBUG: I think you said %s' % USER_INPUT)
            poem_mp3 = "outputs/BitLit_poem.mp3"
            tts.save(poem_mp3)
            play_mp3(poem_mp3)
-            speak(text="THANK YOU! BEEP.")
+        # Generate poem from user seed
        text_generated, rhymes = poem(USER_INPUT)
        t2 = time.time()
        logger.info("rhymes: %s", rhymes)
        logger.info("ML POEM is: %s", text_generated)
        logger.debug('poem and rhyme generation took %s', t2 - t1)
-            ######
+        if DEBUG:
-            t3 = time.time()
+            speak('DEBUG: your rhymes are '+ ' '.join(rhymes))
-            logger.debug('Poem to speech took %s', t3 - t2)
+
-            logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
+        # FEED POEM TO TRANSCRIBER
        speak(text=text_generated, cache_file="outputs/BitLit_last_poem.mp3")
        if random.random()>0.90:
            speak(text="THANK YOU!")
        else:
            speak(text="THANK YOU PUNY HUMANS.")
        ######
        t3 = time.time()
        logger.debug('Poem to speech took %s', t3 - t2)
        logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
        play_ding()
 if __name__ == "__main__":
@@ -3,7 +3,7 @@ Forked from https://github.com/ShebMichel/BitLit_test1
 # Run
 To run the program once installed 
->> python BitLit_decoder.py HiBitLit.pmdl
+>> python3 BitLit_main.py
 This will start snowboy listening for the phrase "Hi BitLit". Then it will ask for seed phrases for a poem.
@@ -145,8 +145,6 @@ def poem(USER_INPUT):
        input_eval = tf.expand_dims([predicted_id], 0)
        rhymes += [idx2word_rhymes[predicted_id]]
    logger.info("rhymes: %s", rhymes)
    ####################
    #  POEM GENERATION #
    ####################
@@ -184,4 +182,4 @@ def poem(USER_INPUT):
    text_generated = re.sub(" +", " ", text_generated)
    text_generated = str(TextBlob(text_generated).correct())
-    return text_generated
+    return text_generated, rhymes