calib, small changes

This commit is contained in:
wassname
2019-01-17 06:46:15 +08:00
parent d0e90959fd
commit 34e3bb98f5
+50 -34
View File
@@ -24,7 +24,7 @@ import tempfile
import glob
from logger import logger
import argparse
from snowboydecoder import play_ding, play_dong
from snowboydecoder import play_ding
## Packages for voice recognizer
import speech_recognition as sr
@@ -42,6 +42,9 @@ except:
GOOGLE_CLOUD_SPEECH_CREDENTIALS = None
def play_ding():
speak('ding')
def play_mp3(mp3_file):
"""Play mp3 file with pyglet."""
source = pyglet.media.load(filename=mp3_file, streaming=False)
@@ -95,49 +98,59 @@ def record_audio(audio, output_file, play=False):
play_mp3(output_file)
def generate_poem():
def generate_poem(args):
if DEBUG:
speak("I'm in debug mode")
############ AUDIO CONVERSION TO TEST
play_dong()
play_ding()
t0 = time.time()
r = sr.Recognizer()
# This section could use work. But since I'm frequently initialising the mic,
# I don't think it has time to dynamically adapt, so I'm doing a specific auto calibrate here first
speak("Hi I'm bit-lit. Silence puny Humans. I must calibrate the microphone. I will make a dong sound when I am finished")
time.sleep(1)
r.dynamic_energy_threshold = False
with sr.Microphone() as source:
logger.debug('microphone source is %s', source)
print(dir(source))
r.adjust_for_ambient_noise(source, duration=4)
# https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst#recognizer_instanceenergy_threshold--300---type-float
logger.info('calibrate mic energy_threshold to %s. This should be between 150 and 3500 for speaking. If its higher you should turn down your mic', r.energy_threshold)
r.energy_threshold = max(r.energy_threshold, 150)
r.energy_threshold = min(r.energy_threshold, 3500)
logger.info('maxmin mic energy_threshold to %s', r.energy_threshold)
play_dong()
if args.pre_calib:
# This section could use work. But since I'm frequently initialising the mic,
# I don't think it has time to dynamically adapt, so I'm doing a specific auto calibrate here first
speak("Hi I'm bit-lit. Silence puny Humans. I must calibrate the microphone. I will make a dong sound when I am finished")
time.sleep(1)
r.dynamic_energy_threshold = False
with sr.Microphone() as source:
logger.debug('microphone source is %s', source)
print(dir(source))
r.adjust_for_ambient_noise(source, duration=4)
# https://github.com/Uberi/speech_recognition/blob/master/reference/library-reference.rst#recognizer_instanceenergy_threshold--300---type-float
logger.info('calibrate mic energy_threshold to %s. This should be between 150 and 3500 for speaking. If its higher you should turn down your mic', r.energy_threshold)
r.energy_threshold = max(r.energy_threshold, 150)
r.energy_threshold = min(r.energy_threshold, 3500)
logger.info('maxmin mic energy_threshold to %s', r.energy_threshold)
play_ding()
elif args.energy_threshold:
r.dynamic_energy_threshold = False
r.energy_threshold = args.energy_threshold
logger.info("setting constant energy_threshold to %s", args.energy_threshold)
else:
logger.info("using dynamic background energy_threshold calibration")
while True:
try:
speak('When you want me to make a poem summon me with "Hi BitLit" or "computer"')
logger.info('mic energy_threshold to %s', r.energy_threshold)
play_ding()
with sr.Microphone() as source:
audio_hotword = r.listen(source, snowboy_configuration=snowboy_configuration)
if DEBUG:
record_audio(audio_hotword, "outputs/hotword-results.flac", play=DEBUG)
play_dong()
if not args.woke:
speak('When you want me to make a poem summon me with "Hi BitLit" or "computer"')
logger.info('mic energy_threshold to %s', r.energy_threshold)
time.sleep(1)
with sr.Microphone() as source:
time.sleep(1)
play_ding()
audio_hotword = r.listen(source, snowboy_configuration=snowboy_configuration)
if DEBUG:
record_audio(audio_hotword, "outputs/hotword-results.flac", play=DEBUG)
play_ding()
speak(text="Hi Humans! My Name is BIT-LIT. Inspire me with the first line of a poem. You may speak for 10 seconds after the bing.")
speak('ding')
speak(text="Hi Humans! My Name is BIT-LIT. Inspire me with the first line of a poem: You may speek for 10 seconds after the bing.")
play_ding()
with sr.Microphone() as source:
audio = r.record(source, duration=10)
play_dong()
play_ding()
# write audio to a WAV file for debugging
if DEBUG:
@@ -146,7 +159,8 @@ def generate_poem():
logger.debug('done recording %s', time.time())
logger.info('recorded %s s', len(audio.frame_data)/audio.sample_rate)
speak(text="Thank you! Give me a minute to generate and read your poem.")
# Text to speech
speak(text="Thank you! Give me a minute to generate and reed your poem")
t1 = time.time()
logger.debug('listen took %s', t1 - t0)
@@ -167,8 +181,7 @@ def generate_poem():
t1b = time.time()
logger.debug('transcribe took %s', t1b - t1)
if DEBUG:
speak('DEBUG: I think you said %s' % USER_INPUT)
speak('I think you said %s' % USER_INPUT)
# Generate poem from user seed
text_generated, rhymes = poem(USER_INPUT)
@@ -178,7 +191,7 @@ def generate_poem():
logger.debug('poem and rhyme generation took %s', t2 - t1)
if DEBUG:
speak('DEBUG: your rhymes are '+ ' '.join(rhymes))
speak('DEBUG: your rhymes are: '+ ' '.join(rhymes))
# FEED POEM TO TRANSCRIBER
cache_file = "outputs/BitLit_last_poem.mp3"
@@ -210,6 +223,9 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--debug", help="increase output verbosity",
action="store_true")
parser.add_argument("-e", "--energy-threshold", help="Instead of using dynamic or pre calibration, set an integer for the level of background noise. Ideally between 40-4000.", default=None)
parser.add_argument("-p", "--pre-calib", help="Pre calibration instead of the default deynamic calibration", action='store_true')
parser.add_argument("-w", "--woke", help="Woke youself. In this mode bitlit will be so woke it wont need a wokeword.", action='store_true')
args = parser.parse_args()
DEBUG = args.debug
@@ -222,4 +238,4 @@ if __name__ == "__main__":
logger.debug("Microphone with name \"{1}\" found for `Microphone(device_index={0})`".format(index, name))
generate_poem()
generate_poem(args)