use speechrecognition look and calibrate mic

2026-06-27 16:43:35 +08:00 · 2019-01-12 11:35:10 +08:00
parent a67bc9531d
commit ab1b07329c
6 changed files with 60 additions and 107 deletions
@@ -1,56 +0,0 @@
-####  RUNNING THE MODEL
-# cd documents/pmlg/wake/decoder
-# python demo.py resources/HiBitLit.pmdl
-from __future__ import print_function
-import os
-import sys
-
-import snowboydecoder
-from snowboydecoder import play_ding, play_dong
-import signal
-
-import time
-import numpy as np
-
-import BitLit_main
-
-
-t0 = time.time()  ## Time counter
-interrupted = False
-
-
-def signal_handler(signal, frame):
-    global interrupted
-    interrupted = True
-
-
-def interrupt_callback():
-    global interrupted
-    return interrupted
-
-
-if len(sys.argv) == 1:
-    print("Error: need to specify model name")
-    print("Usage: python demo.py your.model")
-    sys.exit(-1)
-
-model = sys.argv[1]
-
-# capture SIGINT signal, e.g., Ctrl+C
-signal.signal(signal.SIGINT, signal_handler)
-
-play_ding()
-detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
-print("Listening... Press Ctrl+C to exit")
-
-
-detector.start(
-    detected_callback=BitLit_main.generate_poem,
-    interrupt_check=interrupt_callback,
-    sleep_time=0.03,
-)
-
-detector.terminate()
-t1 = time.time()
-total = t1 - t0
-print(("Time spent is about:", np.round(total), "seconds"))
@@ -32,6 +32,8 @@ for index, name in enumerate(sr.Microphone.list_microphone_names()):
 from poem_generator import poem


+snowboy_configuration = ('./snowboy', os.listdir('hotwords'))
+
 # Load credentials
 try:
    GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read()
@@ -45,7 +47,7 @@ def play_mp3(mp3_file):
    source = pyglet.media.load(filename=mp3_file, streaming=False)
    source.play()
    print(mp3_file, source.duration)
-    time.sleep(source.duration + 4)  # must be a better way to wait untill the media has played
+    time.sleep(source.duration + 2)  # must be a better way to wait untill the media has played
    print(mp3_file, source.duration)

 def cache_gtts(text, lang="en-nz", cache_file=None):
@@ -80,6 +82,10 @@ def cache_gtts(text, lang="en-nz", cache_file=None):
        tts.save(cache_file)
    return cache_file

+def speak(text):
+    mp3_file = cache_gtts(text)
+    play_mp3(mp3_file)
+

 def generate_poem():

@@ -89,67 +95,70 @@ def generate_poem():
    t0 = time.time()
    r = sr.Recognizer()
    with sr.Microphone() as source:
-
-        # print(r.energy_threshold)
-        # r.adjust_for_ambient_noise(source) 
-        # print('energy_threshold', r.energy_threshold)
-        r.energy_threshold=50
-
-
        print('mic', source)
-        outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE BEEP.")
-        play_mp3(outfile1)
+        speak('please be quiet while I calibrate the microphones. I will ding when I am finished')
+        time.sleep(5)
+        r.pause_threshold = 5
+        r.adjust_for_ambient_noise(source) 
+        print('calibrate mic energy_threshold to', r.energy_threshold)
+        speak('Thanks. When you want to talk to me say "Hi BitLit"')
        play_ding()

-        print('speak now', time.time())
-        audio = r.listen(source)
-        logger.debug('done recording %s', time.time())
-        logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
+        while True:
+            print('waiting for hotword "Hi BitLit" or Alexa or SnowBoy')
+            audio_null = r.listen(source, snowboy_configuration=snowboy_configuration)
+            play_dong()

-        play_dong()
-        outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")
-        play_mp3(outfile2)
+            speak(text="Hi! My Name is BIT-LIT. PLEASE SPEAK SOME IDEAS FOR A POEM AFTER THE DING.")
+            play_ding()

-    t1 = time.time()
-    logger.debug('listen took %s', t1 - t0)
+            print('speak now', time.time())
+            try:
+                audio = r.listen(source, timeout=30, phrase_time_limit=30)
+            except sr.WaitTimeoutError as e:
+                print('WaitTimeoutError', e)
+                continue
+            logger.debug('done recording %s', time.time())
+            logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)

-    try:
-        logger.debug("using google speech to text...")
-        USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
-        logger.info("Google thinks you said: " + USER_INPUT)
-    except sr.UnknownValueError as e:
-        logger.error("Could not understand audio. {}".format(e))
-        return 
-    except sr.RequestError as e:
-        logger.error("Could not request results; {0}".format(e))
-        return
+            play_dong()
+            speak(text="BEEP. THANK YOU! GIVE ME A MINUTE TO GENERATE AND READ YOUR POEM")

-    t1b = time.time()
-    logger.debug('transcribe took %s', t1b - t1)
+            t1 = time.time()
+            logger.debug('listen took %s', t1 - t0)

-    return
+            try:
+                logger.debug("using google speech to text...")
+                USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
+                logger.info("Google thinks you said: " + USER_INPUT)
+            except sr.UnknownValueError as e:
+                logger.error("Could not understand audio. {}".format(e))
+                return 
+            except sr.RequestError as e:
+                logger.error("Could not request results; {0}".format(e))
+                return

-    # Generate poem from user seed
-    text_generated = poem(USER_INPUT)
-    t2 = time.time()
-    logger.info("ML POEM is: %s", text_generated)
-    logger.debug('poem and rhyme generation took %s', t2 - t1)
+            t1b = time.time()
+            logger.debug('transcribe took %s', t1b - t1)

-    # TEXT CONVERSION IN AUDIO
-    # FEED POEM TO TRANSCRIBER
-    tts = gTTS(text=text_generated)
-    ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
-    poem_mp3 = "outputs/BitLit_{}.mp3".format(ts)
-    tts.save(poem_mp3)
-    play_mp3(poem_mp3)
+            # Generate poem from user seed
+            text_generated = poem(USER_INPUT)
+            t2 = time.time()
+            logger.info("ML POEM is: %s", text_generated)
+            logger.debug('poem and rhyme generation took %s', t2 - t1)

-    outfile = cache_gtts(text="THANK YOU! BEEP.")
-    play_mp3(outfile)
+            # FEED POEM TO TRANSCRIBER
+            tts = gTTS(text=text_generated)
+            poem_mp3 = "outputs/BitLit_poem.mp3"
+            tts.save(poem_mp3)
+            play_mp3(poem_mp3)

-    ######
-    t3 = time.time()
-    logger.debug('Poem to speech took %s', t3 - t2)
-    logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))
+            speak(text="THANK YOU! BEEP.")
+
+            ######
+            t3 = time.time()
+            logger.debug('Poem to speech took %s', t3 - t2)
+            logger.debug("Total time spent is about: %s seconds", np.round(t3 - t0))


 if __name__ == "__main__":
@@ -145,7 +145,7 @@ def poem(USER_INPUT):
        input_eval = tf.expand_dims([predicted_id], 0)
        rhymes += [idx2word_rhymes[predicted_id]]

-    logger.info("rhymes:", rhymes)
+    logger.info("rhymes: %s", rhymes)

    ####################
    #  POEM GENERATION #