mirror of
https://github.com/wassname/pysle.git
synced 2026-06-26 16:00:06 +08:00
INITIAL COMMIT: pysle - python interface to ISLEX dictionary
Code tested over a decent corpus. Features: - look up phone and syllable info for the canonical pronunciation of a word - map an actual pronunciation to a dictionary pronunciation (can be used to automatically find speech errors) - automatically syllabify a textgrid containing words and phones (e.g. force-aligned text)
This commit is contained in:
+33
@@ -0,0 +1,33 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
env/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
*.DS_Store
|
||||
|
||||
*.project
|
||||
*.pydevproject
|
||||
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Tim
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
|
||||
---------
|
||||
pysle
|
||||
---------
|
||||
|
||||
Pronounced like 'p' + 'isle'.
|
||||
|
||||
An interface for the ILSEX (international speech lexicon) dictionary,
|
||||
along with some tools for working with comparing and aligning
|
||||
pronunciations (e.g. a list of phones someone said versus a standard or
|
||||
canonical dictionary pronunciation).
|
||||
|
||||
|
||||
Requirements
|
||||
================
|
||||
|
||||
- Before you use this library (before or after installing it) you will need
|
||||
to download the ILSEX dictionary. It can be downloaded here:
|
||||
|
||||
`ISLEX project page <http://www.isle.illinois.edu/sst/data/dict/>`_
|
||||
|
||||
`Direct link to the ISLEX file used in this project
|
||||
<http://www.isle.illinois.edu/sst/data/dict/islev2.txt)>`_
|
||||
|
||||
- ``Python 2.7.*`` or above
|
||||
|
||||
Installation
|
||||
================
|
||||
|
||||
From a command-line shell, navigate to the directory this is located in
|
||||
and type::
|
||||
|
||||
python setup.py install
|
||||
|
||||
If python is not in your path, you'll need to enter the full path e.g.::
|
||||
|
||||
C:\Python27\python.exe setup.py install
|
||||
|
||||
|
||||
Example usage
|
||||
================
|
||||
|
||||
Here is a typical common usage::
|
||||
|
||||
from pysle import isle
|
||||
isleDict = isle.LexicalTool('C:\islev2.dict')
|
||||
print isleDict.lookup('catatonic')[0] # Get the first pronunciation
|
||||
>> [['kh', '@,'], ['t_(', '&'], ['th', "A'"], ['n', 'I', 'kh']] [2]
|
||||
|
||||
and another::
|
||||
|
||||
from pysle import isle
|
||||
from psyle import pronunciationTools
|
||||
|
||||
searchWord = 'another'
|
||||
anotherPhoneList = ['n', '@', 'th', 'r'] # Actually produced
|
||||
|
||||
returnList = pronunciationTools.findBestSyllabification(isleDict,
|
||||
searchWord,
|
||||
anotherPhoneList)
|
||||
print syllableList
|
||||
>> [["''"], ['n', '@'], ['th', 'r']]
|
||||
|
||||
stressedSyllable, syllableList, syllabification, stressedIndex = returnList
|
||||
|
||||
Please see \test for example usage
|
||||
|
||||
@@ -0,0 +1,136 @@
|
||||
'''
|
||||
Created on Oct 11, 2012
|
||||
|
||||
@author: timmahrt
|
||||
'''
|
||||
|
||||
|
||||
vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>',]
|
||||
|
||||
|
||||
class WordNotInISLE(Exception):
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word
|
||||
|
||||
def __str__(self):
|
||||
return "Word '%s' not in ISLE dictionary. Please add it to continue." % self.word
|
||||
|
||||
|
||||
|
||||
class LexicalTool():
|
||||
|
||||
|
||||
def __init__(self, islePath):
|
||||
self.islePath = islePath
|
||||
self.data = None
|
||||
self.pronDict = None
|
||||
|
||||
|
||||
def lookup(self, word):
|
||||
|
||||
# All words must be lowercase with no extraneous whitespace
|
||||
word = word.lower()
|
||||
word = word.strip()
|
||||
|
||||
# Find indicies in the dictionary
|
||||
|
||||
if self.data == None:
|
||||
self.data = open(self.islePath, "r").read()
|
||||
|
||||
wordList = []
|
||||
searchIndex = 0
|
||||
while True:
|
||||
# (The +1 skips over the "\n" which marks the start of every word)
|
||||
startIndex = self.data.find("\n"+word + "(", searchIndex) + 1
|
||||
|
||||
# find() returns -1 if it does not find anything, but
|
||||
# note that we added 1 to the return value
|
||||
try:
|
||||
assert(startIndex != 0)
|
||||
except AssertionError:
|
||||
if searchIndex == 0:
|
||||
raise WordNotInISLE(word)
|
||||
else:
|
||||
break
|
||||
|
||||
endIndex = self.data.find("\n", startIndex)
|
||||
|
||||
searchIndex = endIndex
|
||||
wordList.append((startIndex, endIndex))
|
||||
|
||||
returnList = []
|
||||
for startIndex, endIndex in wordList:
|
||||
isleWord = self.data[startIndex:endIndex]
|
||||
syllableTxt = isleWord.split("#")[1].strip()
|
||||
syllableList = [x for x in syllableTxt.split(' . ')]
|
||||
|
||||
# Find stress
|
||||
stressList = []
|
||||
for i, syllable in enumerate(syllableList):
|
||||
# Primary stress
|
||||
if "'" in syllable:
|
||||
stressList.insert(0, i)
|
||||
# Secondary stress
|
||||
elif '"' in syllable:
|
||||
stressList.append(i)
|
||||
|
||||
syllableList = [x.split(" ") for x in syllableList]
|
||||
returnList.append((syllableList, stressList))
|
||||
|
||||
return returnList
|
||||
|
||||
|
||||
def getNumPhones(isleDict, label, maxFlag):
|
||||
'''
|
||||
|
||||
If maxFlag=True, use the longest pronunciation. Otherwise, take the
|
||||
average length.
|
||||
'''
|
||||
phoneCount = 0
|
||||
syllableCount = 0
|
||||
for word in label.split():
|
||||
|
||||
phoneListOfLists = isleDict.lookup(word)
|
||||
|
||||
syllableCountList = []
|
||||
for syllableList, stressIndex in phoneListOfLists:
|
||||
syllableCountList.append(len(syllableList))
|
||||
|
||||
# In ISLE, there can be multiple pronunciations for each word
|
||||
# as we have no reason to believe one pronunciation is more
|
||||
# likely than another, we take the average of all of them
|
||||
phoneCountList = []
|
||||
for syllableList, stressIndex in phoneListOfLists:
|
||||
phoneCountList.append(len([phon for phoneList in syllableList for phon in phoneList]))
|
||||
|
||||
# The average number of phones for all possible pronunciations
|
||||
# of this word
|
||||
if maxFlag == True:
|
||||
syllableCount += max(syllableCountList)
|
||||
phoneCount += max(phoneCountList)
|
||||
else:
|
||||
syllableCount += sum(syllableCountList) / float(len(syllableCountList))
|
||||
phoneCount += sum(phoneCountList) / float(len(phoneCountList))
|
||||
|
||||
return syllableCount, phoneCount
|
||||
|
||||
|
||||
def findOODWords(isleDict, wordList):
|
||||
'''
|
||||
Returns all of the out-of-dictionary words found in a list of utterances
|
||||
'''
|
||||
oodList = []
|
||||
for word in wordList:
|
||||
try:
|
||||
isleDict.lookup(word)
|
||||
except WordNotInISLE:
|
||||
oodList.append(word)
|
||||
|
||||
oodList = list(set(oodList))
|
||||
oodList.sort()
|
||||
|
||||
return oodList
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
'''
|
||||
Created on Oct 22, 2014
|
||||
|
||||
@author: tmahrt
|
||||
'''
|
||||
|
||||
class OptionalFeatureError(ImportError):
|
||||
|
||||
def __str__(self):
|
||||
return "ERROR: You must have praatio installed to use pysle.praatTools"
|
||||
|
||||
try:
|
||||
import praatio
|
||||
except ImportError:
|
||||
raise OptionalFeatureError()
|
||||
|
||||
from pysle import isletool
|
||||
from pysle import pronunciationtools
|
||||
|
||||
|
||||
def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName,
|
||||
skipLabelList=None):
|
||||
'''
|
||||
Given a textgrid, syllabifies the phones in the textgrid
|
||||
|
||||
skipLabelList allows you to skip labels without generating warnings
|
||||
(e.g. '', 'sp', etc.)
|
||||
|
||||
The textgrid must have a word tier and a phone tier.
|
||||
|
||||
Returns a textgrid with only two tiers containing syllable information
|
||||
(syllabification of the phone tier and a tier marking word-stress).
|
||||
'''
|
||||
wordTier = tg.tierDict[wordTierName]
|
||||
phoneTier = tg.tierDict[phoneTierName]
|
||||
|
||||
if skipLabelList == None:
|
||||
skipLabelList = []
|
||||
|
||||
syllableEntryList = []
|
||||
tonicEntryList = []
|
||||
for start, stop, word in wordTier.entryList:
|
||||
|
||||
if word in skipLabelList:
|
||||
continue
|
||||
|
||||
subPhoneTier = phoneTier.crop(start, stop, True, False)[0]
|
||||
|
||||
phoneList = [phone for startP, endP, phone in subPhoneTier.entryList if phone != '']
|
||||
|
||||
try:
|
||||
returnList = pronunciationtools.findBestSyllabification(isleDict,
|
||||
word,
|
||||
phoneList)
|
||||
except isletool.WordNotInISLE:
|
||||
print "Word ('%s') not is isle -- skipping syllabification" % word
|
||||
continue
|
||||
except (pronunciationtools.NullPronunciationError):
|
||||
print "Word ('%s') has no provided pronunciation" % word
|
||||
continue
|
||||
|
||||
stressedSyllable, syllableList, syllabification, stressIndexList = returnList
|
||||
|
||||
i = 0
|
||||
# print syllableList
|
||||
for k, syllable in enumerate(syllableList):
|
||||
|
||||
# Create the syllable tier entry
|
||||
j = len(syllable)
|
||||
stubEntryList = subPhoneTier.entryList[i:i+j]
|
||||
i += j
|
||||
|
||||
# The whole syllable was deleted
|
||||
if len(stubEntryList) == 0:
|
||||
continue
|
||||
|
||||
syllableStart = stubEntryList[0][0]
|
||||
syllableEnd = stubEntryList[-1][1]
|
||||
label = "-".join([phone for start, end, phone in stubEntryList])
|
||||
|
||||
syllableEntryList.append( (syllableStart, syllableEnd, label) )
|
||||
|
||||
# Create the tonic tier entry
|
||||
try:
|
||||
stressIndex = stressIndexList[0]
|
||||
except IndexError:
|
||||
stressIndex = None # Function word probably
|
||||
|
||||
tonicLabel = ''
|
||||
if k == stressIndex:
|
||||
tonicLabel = 'T'
|
||||
|
||||
tonicEntryList.append( (syllableStart, syllableEnd, tonicLabel) )
|
||||
|
||||
# Create a textgrid with the two syllable-level tiers
|
||||
syllableTier = praatio.TextgridTier("syllable", syllableEntryList, praatio.INTERVAL_TIER)
|
||||
tonicTier = praatio.TextgridTier('tonic', tonicEntryList, praatio.INTERVAL_TIER)
|
||||
|
||||
syllableTG = praatio.Textgrid()
|
||||
syllableTG.addTier(syllableTier)
|
||||
syllableTG.addTier(tonicTier)
|
||||
|
||||
return syllableTG
|
||||
|
||||
@@ -0,0 +1,306 @@
|
||||
'''
|
||||
Created on Oct 15, 2014
|
||||
|
||||
@author: tmahrt
|
||||
'''
|
||||
|
||||
import itertools
|
||||
|
||||
from pysle import isletool
|
||||
|
||||
|
||||
|
||||
class NullPronunciationError(Exception):
|
||||
|
||||
def __init__(self, word):
|
||||
self.word = word
|
||||
|
||||
def __str__(self):
|
||||
return "No pronunciation given for word '%s'" % self.word
|
||||
|
||||
|
||||
class NullPhoneError(Exception):
|
||||
|
||||
def __str(self):
|
||||
return "Received an empty phone in the pronunciation list"
|
||||
|
||||
|
||||
def _lcs_lens(xs, ys):
|
||||
curr = list(itertools.repeat(0, 1 + len(ys)))
|
||||
for x in xs:
|
||||
prev = list(curr)
|
||||
for i, y in enumerate(ys):
|
||||
if x == y:
|
||||
curr[i + 1] = prev[i] + 1
|
||||
else:
|
||||
curr[i + 1] = max(curr[i], prev[i + 1])
|
||||
return curr
|
||||
|
||||
|
||||
def _lcs(xs, ys):
|
||||
nx, ny = len(xs), len(ys)
|
||||
if nx == 0:
|
||||
return []
|
||||
elif nx == 1:
|
||||
return [xs[0]] if xs[0] in ys else []
|
||||
else:
|
||||
i = nx // 2
|
||||
xb, xe = xs[:i], xs[i:]
|
||||
ll_b = _lcs_lens(xb, ys)
|
||||
ll_e = _lcs_lens(xe[::-1], ys[::-1])
|
||||
_, k = max((ll_b[j] + ll_e[ny - j], j)
|
||||
for j in range(ny + 1))
|
||||
yb, ye = ys[:k], ys[k:]
|
||||
return _lcs(xb, yb) + _lcs(xe, ye)
|
||||
|
||||
|
||||
def _prepPronunciation(phoneList):
|
||||
retList = []
|
||||
for phone in phoneList:
|
||||
if 'r' in phone:
|
||||
phone = ['r',]
|
||||
try:
|
||||
phone = phone[0] # Only represent the str by its first letter
|
||||
except IndexError:
|
||||
raise NullPhoneError()
|
||||
|
||||
phone = phone.lower()
|
||||
|
||||
if phone in isletool.vowelList:
|
||||
phone = 'V'
|
||||
retList.append(phone)
|
||||
|
||||
return retList
|
||||
|
||||
|
||||
def _adjustSyllabification(adjustedPhoneList, syllableList):
|
||||
'''
|
||||
Inserts spaces into a syllable if needed
|
||||
|
||||
Originally the phone list and syllable list contained the same number
|
||||
of phones. But the adjustedPhoneList may have some insertions which are
|
||||
not accounted for in the syllableList.
|
||||
'''
|
||||
i = 0
|
||||
retSyllableList = []
|
||||
for syllable in syllableList:
|
||||
j = len(syllable)
|
||||
tmpPhoneList = adjustedPhoneList[i:i+j]
|
||||
numBlanks = -1
|
||||
phoneList = tmpPhoneList[:]
|
||||
while numBlanks != 0:
|
||||
|
||||
numBlanks = tmpPhoneList.count("''")
|
||||
if numBlanks > 0:
|
||||
tmpPhoneList = adjustedPhoneList[i+j:i+j+numBlanks]
|
||||
phoneList.extend(tmpPhoneList)
|
||||
j += numBlanks
|
||||
|
||||
for k, phone in enumerate(phoneList):
|
||||
if phone == "''":
|
||||
syllable.insert(k, "''")
|
||||
|
||||
i += j
|
||||
|
||||
retSyllableList.append(syllable)
|
||||
|
||||
return retSyllableList
|
||||
|
||||
|
||||
def _findBestPronunciation(isleDict, wordText, aPron):
|
||||
'''
|
||||
Words may have multiple candidates in ISLE; returns the 'optimal' one.
|
||||
'''
|
||||
if len(aPron) == 0:
|
||||
raise NullPronunciationError(wordText)
|
||||
|
||||
isleWordList = isleDict.lookup(wordText)
|
||||
|
||||
aP = _prepPronunciation(aPron) # Mapping to simplified phone inventory
|
||||
|
||||
origPronDict = dict((newPron,oldPron) for newPron, oldPron in zip(aP, aPron))
|
||||
|
||||
numDiffList = []
|
||||
withStress = []
|
||||
i = 0
|
||||
alignedSyllabificationList = []
|
||||
alignedActualPronunciationList = []
|
||||
for syllableList, stressList in isleWordList:
|
||||
|
||||
iP = [phone for phoneList in syllableList for phone in phoneList]
|
||||
iP = _prepPronunciation(iP)
|
||||
|
||||
alignedIP, alignedAP = alignPronunciations(iP, aP)
|
||||
alignedAP = [origPronDict.get(phon, "''") for phon in alignedAP] # Remapping to actual phones
|
||||
alignedActualPronunciationList.append(alignedAP)
|
||||
|
||||
# Adjusting the syllabification for differences between the dictionary
|
||||
# pronunciation and the actual pronunciation
|
||||
alignedSyllabification = _adjustSyllabification(alignedIP, syllableList)
|
||||
alignedSyllabificationList.append(alignedSyllabification)
|
||||
|
||||
# Count the number of misalignments between the two
|
||||
numDiff = alignedIP.count("''") + alignedAP.count("''")
|
||||
numDiffList.append(numDiff)
|
||||
|
||||
# Is there stress in this word
|
||||
hasStress = False
|
||||
for syllable in syllableList:
|
||||
for phone in syllable:
|
||||
hasStress = "'" in phone or hasStress
|
||||
|
||||
if hasStress:
|
||||
withStress.append(i)
|
||||
i += 1
|
||||
|
||||
# Return the pronunciation that had the fewest differences
|
||||
# to the actual pronunciation
|
||||
minDiff = min(numDiffList)
|
||||
|
||||
# When there are multiple candidates that have the minimum number
|
||||
# of differences, prefer one that has stress in it
|
||||
bestIndex = None
|
||||
bestIsStressed = None
|
||||
for i, numDiff in enumerate(numDiffList):
|
||||
if numDiff != minDiff:
|
||||
continue
|
||||
if bestIndex == None:
|
||||
bestIndex = i
|
||||
bestIsStressed = i in withStress
|
||||
else:
|
||||
if not bestIsStressed and i in withStress:
|
||||
bestIndex = i
|
||||
bestIsStressed = True
|
||||
|
||||
|
||||
return isleWordList, alignedActualPronunciationList, alignedSyllabificationList, bestIndex
|
||||
|
||||
|
||||
def _syllabifyPhones(phoneList, syllableList, isleStressList):
|
||||
'''
|
||||
Given a phone list and a syllable list, syllabify the phones
|
||||
|
||||
Typically used by findBestSyllabification which first aligns the phoneList
|
||||
with a dictionary phoneList and then uses the dictionary syllabification
|
||||
to syllabify the input phoneList.
|
||||
'''
|
||||
try:
|
||||
stressIndex = isleStressList[0]
|
||||
except IndexError:
|
||||
stressIndex = None
|
||||
|
||||
numPhoneList = [len(syllable) for syllable in syllableList]
|
||||
|
||||
start = 0
|
||||
syllabifiedList = []
|
||||
for i, end in enumerate(numPhoneList):
|
||||
|
||||
syllable = phoneList[start:start+end]
|
||||
syllabifiedList.append(syllable)
|
||||
|
||||
start += end
|
||||
|
||||
return stressIndex, syllabifiedList
|
||||
|
||||
|
||||
def alignPronunciations(pronI, pronA):
|
||||
'''
|
||||
Align the phones in two pronunciations
|
||||
'''
|
||||
|
||||
# First prep the two pronunctions
|
||||
pronI = [char for char in pronI]
|
||||
pronA = [char for char in pronA]
|
||||
|
||||
# -- allow for some flexibility in pronunciation
|
||||
correctionsTuple = (('d', 't'), ('t', 'd'), ('s', 'z'), ('z', 's'),
|
||||
('m', 'n'), ('n', 'm'),)
|
||||
|
||||
doMatch = lambda i, a: ((i == a) or
|
||||
((i, a) in correctionsTuple))
|
||||
|
||||
def matchExists(targetPhone, pron):
|
||||
match = False
|
||||
for phone in pron:
|
||||
match = match or doMatch(targetPhone, phone)
|
||||
return match
|
||||
|
||||
# Remove vowels
|
||||
|
||||
# Remove any elements not in the other list (but maintain order)
|
||||
pronITmp = pronI
|
||||
pronATmp = pronA
|
||||
|
||||
# Find the longest sequence
|
||||
sequence = _lcs(pronITmp, pronATmp)
|
||||
|
||||
# Find the index of the sequence
|
||||
# TODO: investigate ambiguous cases
|
||||
startA = 0
|
||||
startI = 0
|
||||
sequenceIndexListA = []
|
||||
sequenceIndexListI = []
|
||||
for phone in sequence:
|
||||
startA = pronA.index(phone, startA)
|
||||
startI = pronI.index(phone, startI)
|
||||
|
||||
sequenceIndexListA.append(startA)
|
||||
sequenceIndexListI.append(startI)
|
||||
|
||||
# An index on the tail of both will be used to create output strings
|
||||
# of the same length
|
||||
sequenceIndexListA.append(len(pronA))
|
||||
sequenceIndexListI.append(len(pronI))
|
||||
|
||||
# Fill in any blanks such that the sequential items have the same
|
||||
# index and the two strings are the same length
|
||||
for x in xrange(len(sequenceIndexListA)):
|
||||
indexA = sequenceIndexListA[x]
|
||||
indexI = sequenceIndexListI[x]
|
||||
if indexA < indexI :
|
||||
for x in xrange(indexI - indexA):
|
||||
pronA.insert(indexA, "''")
|
||||
sequenceIndexListA = [val + indexI - indexA for val in sequenceIndexListA]
|
||||
elif indexA > indexI:
|
||||
for x in xrange(indexA - indexI):
|
||||
pronI.insert(indexI, "''")
|
||||
sequenceIndexListI = [val + indexA - indexI for val in sequenceIndexListI]
|
||||
|
||||
return pronI, pronA
|
||||
|
||||
|
||||
def findBestSyllabification(isleDict, wordText, actualPronunciationList):
|
||||
'''
|
||||
Find the best syllabification for a word
|
||||
|
||||
First find the closest pronunciation to a given pronunciation. Then take
|
||||
the syllabification for that pronunciation and map it onto the
|
||||
input pronunciation.
|
||||
'''
|
||||
retList = _findBestPronunciation(isleDict, wordText, actualPronunciationList)
|
||||
isleWordList, alignedAPronList, alignedSyllableList, bestIndex = retList
|
||||
|
||||
alignedPhoneList = alignedAPronList[bestIndex]
|
||||
alignedSyllables = alignedSyllableList[bestIndex]
|
||||
syllabification = isleWordList[bestIndex][0]
|
||||
stressedIndex = isleWordList[bestIndex][1]
|
||||
|
||||
stressedSyllable, syllableList = _syllabifyPhones(alignedPhoneList,
|
||||
alignedSyllables,
|
||||
stressedIndex)
|
||||
|
||||
return stressedSyllable, syllableList, syllabification, stressedIndex
|
||||
|
||||
|
||||
def findClosestPronunciation(isleDict, wordText, aPron):
|
||||
'''
|
||||
Find the closest dictionary pronunciation to a provided pronunciation
|
||||
'''
|
||||
|
||||
retList = _findBestPronunciation(isleDict, wordText, aPron)
|
||||
isleWordList, actualPronunciationList, bestIndex = retList
|
||||
|
||||
return isleWordList[bestIndex]
|
||||
|
||||
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
'''
|
||||
Created on Oct 15, 2014
|
||||
|
||||
@author: tmahrt
|
||||
'''
|
||||
from distutils.core import setup
|
||||
setup(name='pysle',
|
||||
version='1.0.0',
|
||||
author='Tim Mahrt',
|
||||
author_email='timmahrt@gmail.com',
|
||||
package_dir={'pysle':'pysle'},
|
||||
packages=['pysle'],
|
||||
license='LICENSE',
|
||||
long_description=open('README.rst', 'r').read(),
|
||||
# install_requires=[], # No requirements! # requires 'from setuptools import setup'
|
||||
)
|
||||
@@ -0,0 +1,44 @@
|
||||
'''
|
||||
Created on Oct 22, 2014
|
||||
|
||||
@author: tmahrt
|
||||
|
||||
Basic examples of common usage.
|
||||
'''
|
||||
|
||||
from pysle import isletool
|
||||
from pysle import pronunciationtools
|
||||
|
||||
# In this first example we look up the syllabification of a word and get it's
|
||||
# stress information.
|
||||
|
||||
searchWord = 'pumpkins'
|
||||
isleDict = isletool.LexicalTool('islev2.txt')
|
||||
lookupResults = isleDict.lookup(searchWord)
|
||||
|
||||
firstEntry = lookupResults[0]
|
||||
firstSyllableList = firstEntry[0]
|
||||
firstStressList = firstEntry[1]
|
||||
|
||||
print searchWord
|
||||
print firstSyllableList, firstStressList # 3rd syllable carries stress
|
||||
|
||||
|
||||
# Here we determine the syllabification of a word, as it was said.
|
||||
# (Of course, this is just a guess)
|
||||
print '-'*50
|
||||
|
||||
searchWord = 'another'
|
||||
anotherPhoneList = ['n', '@', 'th', 'r']
|
||||
|
||||
returnList = pronunciationtools.findBestSyllabification(isleDict,
|
||||
searchWord,
|
||||
anotherPhoneList)
|
||||
|
||||
stressedSyllable, syllableList, syllabification, stressedIndex = returnList
|
||||
|
||||
print searchWord
|
||||
print anotherPhoneList
|
||||
print syllableList # We can see the first syllable was elided
|
||||
|
||||
|
||||
@@ -0,0 +1,140 @@
|
||||
File type = "ooTextFile short"
|
||||
"TextGrid"
|
||||
|
||||
0.0124716553288
|
||||
3.25510204082
|
||||
<exists>
|
||||
2
|
||||
"IntervalTier"
|
||||
"word"
|
||||
0.0124716553288
|
||||
3.25510204082
|
||||
8
|
||||
0.0124716553288
|
||||
0.052380952381
|
||||
""
|
||||
0.052380952381
|
||||
0.491383219955
|
||||
"INNATE"
|
||||
0.491383219955
|
||||
0.591156462585
|
||||
""
|
||||
0.591156462585
|
||||
1.31950113379
|
||||
"PUMPKINS"
|
||||
1.31950113379
|
||||
1.96802721088
|
||||
"PARLEY"
|
||||
1.96802721088
|
||||
2.12766439909
|
||||
"WITH"
|
||||
2.12766439909
|
||||
3.09546485261
|
||||
"PORCUPINES"
|
||||
3.09546485261
|
||||
3.25510204082
|
||||
""
|
||||
"IntervalTier"
|
||||
"phone"
|
||||
0.0124716553288
|
||||
3.25510204082
|
||||
33
|
||||
0.0124716553288
|
||||
0.052380952381
|
||||
""
|
||||
0.052380952381
|
||||
0.1820861678
|
||||
"IH2"
|
||||
0.1820861678
|
||||
0.241950113379
|
||||
"N"
|
||||
0.241950113379
|
||||
0.451473922902
|
||||
"EY1"
|
||||
0.451473922902
|
||||
0.491383219955
|
||||
"T"
|
||||
0.491383219955
|
||||
0.591156462585
|
||||
""
|
||||
0.591156462585
|
||||
0.651020408163
|
||||
"P"
|
||||
0.651020408163
|
||||
0.700907029478
|
||||
"AH1"
|
||||
0.700907029478
|
||||
0.800680272109
|
||||
"M"
|
||||
0.800680272109
|
||||
0.830612244898
|
||||
"P"
|
||||
0.830612244898
|
||||
0.910430839002
|
||||
"K"
|
||||
0.910430839002
|
||||
1.04013605442
|
||||
"AH0"
|
||||
1.04013605442
|
||||
1.15986394558
|
||||
"N"
|
||||
1.15986394558
|
||||
1.31950113379
|
||||
"Z"
|
||||
1.31950113379
|
||||
1.43922902494
|
||||
"P"
|
||||
1.43922902494
|
||||
1.53900226757
|
||||
"AA1"
|
||||
1.53900226757
|
||||
1.60884353741
|
||||
"R"
|
||||
1.60884353741
|
||||
1.73854875283
|
||||
"L"
|
||||
1.73854875283
|
||||
1.96802721088
|
||||
"IY0"
|
||||
1.96802721088
|
||||
2.00793650794
|
||||
"W"
|
||||
2.00793650794
|
||||
2.03786848073
|
||||
"IH1"
|
||||
2.03786848073
|
||||
2.12766439909
|
||||
"TH"
|
||||
2.12766439909
|
||||
2.23741496599
|
||||
"P"
|
||||
2.23741496599
|
||||
2.30725623583
|
||||
"AO1"
|
||||
2.30725623583
|
||||
2.36712018141
|
||||
"R"
|
||||
2.36712018141
|
||||
2.42698412698
|
||||
"K"
|
||||
2.42698412698
|
||||
2.4768707483
|
||||
"Y"
|
||||
2.4768707483
|
||||
2.50680272109
|
||||
"AH0"
|
||||
2.50680272109
|
||||
2.57664399093
|
||||
"P"
|
||||
2.57664399093
|
||||
2.80612244898
|
||||
"AY2"
|
||||
2.80612244898
|
||||
2.94580498866
|
||||
"N"
|
||||
2.94580498866
|
||||
3.09546485261
|
||||
"Z"
|
||||
3.09546485261
|
||||
3.25510204082
|
||||
""
|
||||
Binary file not shown.
@@ -0,0 +1,36 @@
|
||||
'''
|
||||
Created on Oct 22, 2014
|
||||
|
||||
@author: tmahrt
|
||||
|
||||
This example was originally meant to show how you can use the library
|
||||
to modify a textgrid. It still shows that, but all that code is now in
|
||||
the main library (pysle.praattools.syllabifyTextgrid)
|
||||
|
||||
This snippet shows you how to use this function.
|
||||
'''
|
||||
|
||||
from os.path import join
|
||||
|
||||
import praatio
|
||||
from pysle import isletool
|
||||
from pysle import praattools
|
||||
|
||||
path = join('.', 'files')
|
||||
path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files"
|
||||
|
||||
tg = praatio.openTextGrid(join(path, "pumpkins.TextGrid"))
|
||||
isleDict = isletool.LexicalTool('/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt') # Needs the full path to the file
|
||||
|
||||
# Get the syllabification tiers and add it to the textgrid
|
||||
syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone",
|
||||
skipLabelList=["",])
|
||||
tg.addTier(syllableTG.tierDict["syllable"])
|
||||
tg.addTier(syllableTG.tierDict["tonic"])
|
||||
|
||||
|
||||
|
||||
tg.save(join(path, "pumpkins_with_syllables.TextGrid"))
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user