working and using google cloud speech

2026-07-03 14:49:40 +08:00 · 2018-12-29 14:20:04 +08:00
parent 76bee538ca
commit c328ccfeac
8 changed files with 192 additions and 108 deletions
@@ -2,6 +2,7 @@
 /outputs/
 /snowboy/
 /secrets/
+.pytest_cache/

 # Created by https://www.gitignore.io/api/c,c++,cmake,linux,macos,python
 # Edit at https://www.gitignore.io/?templates=c,c++,cmake,linux,macos,python
@@ -10,10 +10,11 @@ import signal

 ####
 from gtts import gTTS           ## Packages for Text to voice
-import speech_recognition as sr ## Packages for voice recognizer
-import tensorflow as tf
-tf.enable_eager_execution()
-from tensorflow.keras.layers import Embedding, GRU, Dense
+import speech_recognition as sr  ## Packages for voice recognizer
+import BitLit_main
+# import tensorflow as tf
+# tf.enable_eager_execution()
+# from tensorflow.keras.layers import Embedding, GRU, Dense
 import numpy as np
 import re
 from textblob import TextBlob
@@ -21,11 +22,11 @@ import random
 import poem_generator           ## POEM GENERATOR IMPORT
 from poem_generator import*
 import time
-####
+
+
 t0=time.time()   ## Time counter
 interrupted = False

-
 def signal_handler(signal, frame):
    global interrupted
    interrupted = True
@@ -48,20 +49,10 @@ signal.signal(signal.SIGINT, signal_handler)
 detector = snowboydecoder.HotwordDetector(model, sensitivity=0.5)
 print('Listening... Press Ctrl+C to exit')

-# main loop
-detector.start(detected_callback=snowboydecoder.run,
-              interrupt_check=interrupt_callback, 
-               sleep_time=0.01)
-##detector.start(detected_callback=snowboydecoder.play_audio_file,
-##              interrupt_check=interrupt_callback, 
-##               sleep_time=0.00)
-##detector.start(detected_callback= os.system('python BitLit_main.py'),
-##              interrupt_check=interrupt_callback, 
-##               sleep_time=0.00) ##0.03
-#import BitLit_main
-##detector.start(detected_callback=BitLit_main,
-##              interrupt_check=interrupt_callback,
-##              sleep_time=0.03)
+
+detector.start(detected_callback=BitLit_main.generate_poem,
+             interrupt_check=interrupt_callback,
+             sleep_time=0.03)

 detector.terminate()
 t1   =time.time()
@@ -1,76 +1,128 @@
-'''
+"""
 Voice to text to poem to speech
 Credits: Michel, Lauren, Thomas
-'''
+"""

-#https://pythonprogramminglanguage.com/text-to-speech/
+# https://pythonprogramminglanguage.com/text-to-speech/
 ## cmd 1::::  sudo pip install gTTS
 ## cmd 2::::  sudo pip install pyttsx
 import sys
-from gtts import gTTS           ## Packages for Text to voice
+from gtts import gTTS  ## Packages for Text to voice
 import os
-import speech_recognition as sr ## Packages for voice recognizer
-import tensorflow as tf
-tf.enable_eager_execution()
-from tensorflow.keras.layers import Embedding, GRU, Dense
+
+
 import numpy as np
 import re
 from textblob import TextBlob
 import random
-from poem_generator import*
+import pyglet
+import json
 import time
-##### KNOWN PARAMETERS
-#######################################################
-##sys.path
-##sys.path.append('/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python')
-##sys.path.append('/Users/ShebMichel/Library/Python/2.7/lib/python/site-packages'
-################################################################################
-############ AUDIO CONVERSION TO TEST
-t0=time.time()
-r = sr.Recognizer()                                                                                   
-with sr.Microphone() as source:                                                                       
-    tts = gTTS(text='HELLO! My Name is BIT-LIT. PLEASE SPEAK IN ABOUT 3 SECONDS.', lang='en')
-    tts.save("outputs/BitLit.mp3")
-    os.system("afplay outputs/BitLit.mp3")
-#    ######
-    
-    print("SPEAK NOW-SPEAK NOW-SPEAK NOW:")
-    audio = r.listen(source)   
-    tts = gTTS(text='THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM', lang='en')
-    tts.save("outputs/BitLit.mp3")
-    os.system("afplay outputs/BitLit.mp3")
-try:
-    # for testing purposes, we're just using the default API key
-    # to use another API key, use `r.recognize_google(audio, key="GOOGLE_SPEECH_RECOGNITION_API_KEY")`
-    # instead of `r.recognize_google(audio)
-    AA0=r.recognize_google(audio)
-    USER_INPUT=AA0
-    print("You said: " + r.recognize_google(audio))
-except sr.UnknownValueError:
-    print("Could not understand audio")
-except sr.RequestError as e:
-    print("Could not request results; {0}".format(e))    
-text_generated=poem(USER_INPUT)
-#### END CODE
-#########################################################
-################# TEXT CONVERSION IN AUDIO
-################# FEED POEM TO TRANSCRIBER
-print('ML POEM is:', text_generated)
-tts = gTTS(text=text_generated, lang='en')
-tts.save("outputs/BitLit.mp3")
-os.system("afplay outputs/BitLit.mp3")
-#########################################################
-####
-print("BIT-LIT ENDING STATEMENT:")   
-tts = gTTS(text='THANK YOU! CHECK ME OUT IN THE NEWS SOON.', lang='en')
-tts.save("outputs/BitLit.mp3")
-os.system("afplay outputs/BitLit.mp3")
-######
-t1   =time.time()
-total=t1-t0
-print 'Time spent is about:', np.round(total), 'seconds'
+import datetime
+import hashlib
+import tempfile
+from logger import logger

-### USING JUPITER
-# import IPython.display as ipd
-# ipd.Audio(filename='path/to/file.mp3')
-#tk.mainloop()
+from snowboydecoder import play_audio_file
+
+import speech_recognition as sr  ## Packages for voice recognizer
+for index, name in enumerate(sr.Microphone.list_microphone_names()):
+    logger.debug("Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
+
+from poem_generator import poem
+
+
+# Load credentials
+try:
+    GOOGLE_CLOUD_SPEECH_CREDENTIALS = open("secrets/google_cloud_credentials.json").read()
+except:
+    print('you should place google cloud json credentials at "secrets/google_cloud_credentials.json", make sure you enable the speech recognition api')
+    GOOGLE_CLOUD_SPEECH_CREDENTIALS = None
+
+
+def play_mp3(mp3_file):
+    """Play mp3 file with pyglet."""
+    source = pyglet.media.load(filename=mp3_file, streaming=False)
+    source.play()
+    time.sleep(source.duration + 2)  # must be a better way to wait untill the media has played
+
+
+def cache_gtts(text, lang="en", cache_file=None):
+    """
+    Cache calls to gtts.
+    
+    Saves each to a temporary file
+    """
+    if not cache_file:
+        hash_filename = hashlib.md5(text.encode()).hexdigest() + '.mp3'
+        cache_file = os.path.join(tempfile.gettempdir(), hash_filename)
+    if not os.path.isfile(cache_file):
+        tts = gTTS(text=text, lang="en")
+        tts.save(cache_file)
+    assert os.path.isfile(cache_file)
+    return cache_file
+
+
+def generate_poem():
+
+    # https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst
+    # snowboy_configuration = (SNOWBOY_LOCATION, LIST_OF_HOT_WORD_FILES)
+    snowboy_configuration = ('./snowboy', ['./HiBitLit.pmdl', './snowboy/resources/alexa.umdl', './snowboy/resources/snowboy.umdl'])
+
+    ############ AUDIO CONVERSION TO TEST
+    play_audio_file()
+    t0 = time.time()
+    r = sr.Recognizer()
+    with sr.Microphone() as source:
+        outfile1 = cache_gtts(text="Hi! My Name is BIT-LIT. PLEASE SPEAK. You have 20 seconds before the beep.", lang="en")
+        play_mp3(outfile1)
+
+        print("SPEAK NOW-SPEAK:", source)
+        audio = r.listen(source, timeout=5, phrase_time_limit=20)#, snowboy_configuration=snowboy_configuration)
+        # speech_recognition.WaitTimeoutError
+        print('Recorded audio', audio)
+
+        outfile2 = cache_gtts(text="BEEP. THANK YOU! GIVE ME A SECOND TO READ OUT YOUR POEM", lang="en")
+        play_mp3(outfile2)
+
+    t1 = time.time()
+    print('listen took', t1 - t0)
+
+    try:
+        print("using google speech to text...")
+        USER_INPUT = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS)
+        print("Google thinks you said: " + USER_INPUT)
+    except sr.UnknownValueError as e:
+        print("Could not understand audio. {}".format(e))
+    except sr.RequestError as e:
+        print("Could not request results; {0}".format(e))
+    # speech_recognition.WaitTimeoutError
+
+    t1b = time.time()
+    print('transcribe took', t1b - t1)
+
+    # Generate poem from user seed
+    text_generated = poem(USER_INPUT)
+    t2 = time.time()
+    print("ML POEM is:", text_generated, 'in', t2 - t1)
+
+    # TEXT CONVERSION IN AUDIO
+    # FEED POEM TO TRANSCRIBER
+    tts = gTTS(text=text_generated, lang="en")
+    ts = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
+    poem_mp3 = "outputs/BitLit_{}.mp3".format(ts)
+    tts.save(poem_mp3)
+    play_mp3(poem_mp3)
+
+    print("BIT-LIT ENDING STATEMENT:")
+    outfile = cache_gtts(text="THANK YOU! CHECK ME OUT IN THE NEWS SOON.", lang="en")
+    play_mp3(outfile)
+
+    ######
+    t3 = time.time()
+    print('Poem to speech took', t3 - t2)
+    print("Time spent is about:", np.round(t3 - t0), "seconds")
+
+
+if __name__ == "__main__":
+    generate_poem()
@@ -3,7 +3,7 @@

 # Run
 To run the program once installed 
->> python BitLit_decoder1.py HiBitLit.pmdl
+>> python BitLit_decoder.py HiBitLit.pmdl
 After sometimes you should say " HI BitLit" then wait till the poetbot speak to you..and enjoy the rest.. At the end you just need to say again " Hi BitLit" to a different vocal input for so that the poetbot can generate a new poem and the cycle can go days and weeks..
 WARNING::: Might be slow depending on your computer capabilities..

@@ -31,7 +31,7 @@ API Keys:
 - `cp secrets.template.json to secrets.json`
 - get an API key for google cloud
  - make a project or use an existing project
-  - download a credentials json for that project
-  - enable the speech recognition api https://console.cloud.google.com/apis/api/speech.googleapis.com/overview
-  - put the credentials in `./secrets/google_cloud_credintials.json`
+  - download a credentials json for that project (using edit, create key) https://console.cloud.google.com/apis/api/speech.googleapis.com/credentials
+  - enable the speech recognition api for that project (make sure you've selected your project) https://console.cloud.google.com/apis/api/speech.googleapis.com/overview
+  - place the credientials json at this path `./secrets/google_cloud_credintials.json`
  - never commit this file!
@@ -0,0 +1,37 @@
+"""
+from https://gist.github.com/wassname/d17325f36c36fa663dd7de3c09a55e74
+Setup simple logging in python. This logs info message to stdout and debug messages to file.
+Sure it's long but this is as simple as I could make it for this outcome.
+Note: We must set the root logger at DEBUG level, since it must be higher than it's children to pass them on.
+Then set filehandler at debug and stream handler at info.
+"""
+import logging
+import sys
+import os
+import datetime
+import tempfile
+
+# To use differen't log level for file and console
+timestamp = datetime.datetime.utcnow().strftime('%Y%m%d_%H-%M-%S')
+filename = os.path.join(tempfile.gettempdir(), '/tmp/bitlit_log_{}.log'.format(timestamp))
+formatter = logging.Formatter('[%(asctime)s] %(name)s {%(filename)s:%(lineno)d} %(levelname)s - %(message)s')
+
+file_handler = logging.FileHandler(filename=filename)
+file_handler.setLevel(logging.DEBUG)
+file_handler.setFormatter(formatter)
+
+stream_handler = logging.StreamHandler(sys.stdout)
+stream_handler.setLevel(logging.INFO)
+
+# The handlers have to be at a root level since they are the final output
+logging.basicConfig(
+    level=logging.DEBUG, 
+    format='[{%(filename)s:%(lineno)d} %(levelname)s - %(message)s',
+    handlers=[
+        file_handler,
+        stream_handler
+    ]
+)
+
+logger = logging.getLogger('bitlit')
+logger.info('Logging to STDOUT and {}'.format(filename))
@@ -7,14 +7,16 @@ import sys
 from gtts import gTTS           ## Packages for Text to voice
 import os
 import numpy as np
-import speech_recognition as sr ## Packages for voice recognizer
+import speech_recognition as sr  ## Packages for voice recognizer
+
+os.environ['CUDA_VISIBLE_DEVICES']=""
 import tensorflow as tf
 tf.enable_eager_execution()
 from tensorflow.keras.layers import Embedding, GRU, Dense
 import re
 from textblob import TextBlob
 import random
-from BitLit_param import*
+from BitLit_param import *

 # Architechture of the GRU

@@ -103,10 +105,9 @@ def poem(USER_INPUT):
    ### ML POEM PREDICTOR
    
    
-            ###########################
-            #  USER INPUT a line      #
-            ###########################
-    
+    ###########################
+    #  USER INPUT a line      #
+    ###########################
    USER_INPUT = USER_INPUT.lower()
    USER_INPUT = re.sub('[^a-z\n]', ' ', USER_INPUT)
    text_generated = USER_INPUT[::-1]
@@ -114,11 +115,9 @@ def poem(USER_INPUT):
    
    
    
-            ######################
-            #  RHYMES GENERATION #
-            ######################
-            
-            
+    ######################
+    #  RHYMES GENERATION #
+    ######################          
    temperature = 0.09
    
    num_generate = 5  # number of characters to generate
@@ -145,9 +144,9 @@ def poem(USER_INPUT):
    print('rhymes:', rhymes)
    
    
-            ####################
-            #  POEM GENERATION #
-            ####################
+    ####################
+    #  POEM GENERATION #
+    ####################
    
    
    temperature = 0.8
@@ -1,4 +1,11 @@
 PyAudio==0.2.9
-
-   
-  
+gTTS==2.0.1
+SpeechRecognition==3.8.1
+tensorflow==1.10.1
+# or tensorflow-gpu==1.10.1
+textblob==0.15.2
+# for playing mp3s
+pyglet==1.2.4
+# using google cloud speech recognition
+google-api-python-client==1.7.7
+oauth2client==4.1.3
@@ -19,7 +19,6 @@ TOP_DIR = os.path.dirname(os.path.abspath(__file__))
 RESOURCE_FILE = os.path.join(TOP_DIR, "snowboy/resources/common.res")
 DETECT_DING = os.path.join(TOP_DIR, "snowboy/resources/ding.wav")
 DETECT_DONG = os.path.join(TOP_DIR, "snowboy/resources/dong.wav")
-DETECT_PY   = os.path.join(TOP_DIR, "BitLit_main.py")  ## ADDED


 class RingBuffer(object):
@@ -37,9 +36,7 @@ class RingBuffer(object):
        self._buf.clear()
        return tmp

-###
-def run(fname=DETECT_PY):
-    Py_wav=os.system('python BitLit_main.py')
+
 ###    
 def play_audio_file(fname=DETECT_DING):
    """Simple callback function to play a wave file. By default it plays