diff --git a/pysle/praattools.py b/pysle/praattools.py index df59655..0db6803 100644 --- a/pysle/praattools.py +++ b/pysle/praattools.py @@ -4,6 +4,7 @@ Created on Oct 22, 2014 @author: tmahrt ''' + class OptionalFeatureError(ImportError): def __str__(self): @@ -18,7 +19,7 @@ from pysle import isletool from pysle import pronunciationtools -def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, +def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, skipLabelList=None): ''' Given a textgrid, syllabifies the phones in the textgrid @@ -34,7 +35,7 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, wordTier = tg.tierDict[wordTierName] phoneTier = tg.tierDict[phoneTierName] - if skipLabelList == None: + if skipLabelList is None: skipLabelList = [] syllableEntryList = [] @@ -46,11 +47,13 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, subPhoneTier = phoneTier.crop(start, stop, True, False)[0] - phoneList = [phone for startP, endP, phone in subPhoneTier.entryList if phone != ''] + # entry = (start, stop, phone) + phoneList = [entry[2] for entry in subPhoneTier.entryList + if entry[2] != ''] try: - returnList = pronunciationtools.findBestSyllabification(isleDict, - word, + returnList = pronunciationtools.findBestSyllabification(isleDict, + word, phoneList) except isletool.WordNotInISLE: print "Word ('%s') not is isle -- skipping syllabification" % word @@ -58,8 +61,9 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, except (pronunciationtools.NullPronunciationError): print "Word ('%s') has no provided pronunciation" % word continue - - stressedSyllable, syllableList, syllabification, stressIndexList = returnList + + syllableList = returnList[1] + stressIndexList = returnList[3] i = 0 # print syllableList @@ -67,7 +71,7 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, # Create the syllable tier entry j = len(syllable) - stubEntryList = subPhoneTier.entryList[i:i+j] + stubEntryList = subPhoneTier.entryList[i:i + j] i += j # The whole syllable was deleted @@ -76,21 +80,21 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, syllableStart = stubEntryList[0][0] syllableEnd = stubEntryList[-1][1] - label = "-".join([phone for start, end, phone in stubEntryList]) + label = "-".join([entry[2] for entry in stubEntryList]) - syllableEntryList.append( (syllableStart, syllableEnd, label) ) + syllableEntryList.append((syllableStart, syllableEnd, label)) # Create the tonic tier entry try: stressIndex = stressIndexList[0] except IndexError: - stressIndex = None # Function word probably + stressIndex = None # Function word probably tonicLabel = '' if k == stressIndex: tonicLabel = 'T' - tonicEntryList.append( (syllableStart, syllableEnd, tonicLabel) ) + tonicEntryList.append((syllableStart, syllableEnd, tonicLabel)) # Create a textgrid with the two syllable-level tiers syllableTier = praatio.IntervalTier("syllable", syllableEntryList) @@ -101,4 +105,3 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, syllableTG.addTier(tonicTier) return syllableTG - diff --git a/pysle/pronunciationtools.py b/pysle/pronunciationtools.py index ca6f91d..1f56dd8 100644 --- a/pysle/pronunciationtools.py +++ b/pysle/pronunciationtools.py @@ -9,10 +9,10 @@ import itertools from pysle import isletool - class NullPronunciationError(Exception): def __init__(self, word): + super(NullPronunciationError, self).__init__() self.word = word def __str__(self): @@ -49,7 +49,7 @@ def _lcs(xs, ys): ll_b = _lcs_lens(xb, ys) ll_e = _lcs_lens(xe[::-1], ys[::-1]) _, k = max((ll_b[j] + ll_e[ny - j], j) - for j in range(ny + 1)) + for j in range(ny + 1)) yb, ye = ys[:k], ys[k:] return _lcs(xb, yb) + _lcs(xe, ye) @@ -58,14 +58,13 @@ def _prepPronunciation(phoneList): retList = [] for phone in phoneList: if 'r' in phone: - phone = ['r',] + phone = ['r', ] try: - phone = phone[0] # Only represent the str by its first letter + phone = phone[0] # Only represent the string by its first letter + phone = phone.lower() except IndexError: raise NullPhoneError() - phone = phone.lower() - if phone in isletool.vowelList: phone = 'V' retList.append(phone) @@ -85,14 +84,14 @@ def _adjustSyllabification(adjustedPhoneList, syllableList): retSyllableList = [] for syllable in syllableList: j = len(syllable) - tmpPhoneList = adjustedPhoneList[i:i+j] + tmpPhoneList = adjustedPhoneList[i:i + j] numBlanks = -1 phoneList = tmpPhoneList[:] while numBlanks != 0: numBlanks = tmpPhoneList.count("''") if numBlanks > 0: - tmpPhoneList = adjustedPhoneList[i+j:i+j+numBlanks] + tmpPhoneList = adjustedPhoneList[i + j:i + j + numBlanks] phoneList.extend(tmpPhoneList) j += numBlanks @@ -116,27 +115,32 @@ def _findBestPronunciation(isleDict, wordText, aPron): isleWordList = isleDict.lookup(wordText) - aP = _prepPronunciation(aPron) # Mapping to simplified phone inventory + aP = _prepPronunciation(aPron) # Mapping to simplified phone inventory - origPronDict = dict((newPron,oldPron) for newPron, oldPron in zip(aP, aPron)) + origPronDict = dict((newPron, oldPron) + for newPron, oldPron in zip(aP, aPron)) numDiffList = [] withStress = [] i = 0 alignedSyllabificationList = [] alignedActualPronunciationList = [] - for syllableList, stressList in isleWordList: + for wordTuple in isleWordList: + syllableList = wordTuple[0] # syllableList, stressList iP = [phone for phoneList in syllableList for phone in phoneList] iP = _prepPronunciation(iP) alignedIP, alignedAP = alignPronunciations(iP, aP) - alignedAP = [origPronDict.get(phon, "''") for phon in alignedAP] # Remapping to actual phones + + # Remapping to actual phones + alignedAP = [origPronDict.get(phon, "''") for phon in alignedAP] alignedActualPronunciationList.append(alignedAP) # Adjusting the syllabification for differences between the dictionary # pronunciation and the actual pronunciation - alignedSyllabification = _adjustSyllabification(alignedIP, syllableList) + alignedSyllabification = _adjustSyllabification(alignedIP, + syllableList) alignedSyllabificationList.append(alignedSyllabification) # Count the number of misalignments between the two @@ -147,7 +151,7 @@ def _findBestPronunciation(isleDict, wordText, aPron): hasStress = False for syllable in syllableList: for phone in syllable: - hasStress = "'" in phone or hasStress + hasStress = "'" in phone or hasStress if hasStress: withStress.append(i) @@ -164,16 +168,16 @@ def _findBestPronunciation(isleDict, wordText, aPron): for i, numDiff in enumerate(numDiffList): if numDiff != minDiff: continue - if bestIndex == None: + if bestIndex is None: bestIndex = i bestIsStressed = i in withStress else: if not bestIsStressed and i in withStress: bestIndex = i bestIsStressed = True - - return isleWordList, alignedActualPronunciationList, alignedSyllabificationList, bestIndex + return (isleWordList, alignedActualPronunciationList, + alignedSyllabificationList, bestIndex) def _syllabifyPhones(phoneList, syllableList, isleStressList): @@ -193,9 +197,9 @@ def _syllabifyPhones(phoneList, syllableList, isleStressList): start = 0 syllabifiedList = [] - for i, end in enumerate(numPhoneList): + for end in numPhoneList: - syllable = phoneList[start:start+end] + syllable = phoneList[start:start + end] syllabifiedList.append(syllable) start += end @@ -212,21 +216,6 @@ def alignPronunciations(pronI, pronA): pronI = [char for char in pronI] pronA = [char for char in pronA] - # -- allow for some flexibility in pronunciation - correctionsTuple = (('d', 't'), ('t', 'd'), ('s', 'z'), ('z', 's'), - ('m', 'n'), ('n', 'm'),) - - doMatch = lambda i, a: ((i == a) or - ((i, a) in correctionsTuple)) - - def matchExists(targetPhone, pron): - match = False - for phone in pron: - match = match or doMatch(targetPhone, phone) - return match - - # Remove vowels - # Remove any elements not in the other list (but maintain order) pronITmp = pronI pronATmp = pronA @@ -244,7 +233,7 @@ def alignPronunciations(pronI, pronA): startA = pronA.index(phone, startA) startI = pronI.index(phone, startI) - sequenceIndexListA.append(startA) + sequenceIndexListA.append(startA) sequenceIndexListI.append(startI) # An index on the tail of both will be used to create output strings @@ -257,14 +246,16 @@ def alignPronunciations(pronI, pronA): for x in xrange(len(sequenceIndexListA)): indexA = sequenceIndexListA[x] indexI = sequenceIndexListI[x] - if indexA < indexI : + if indexA < indexI: for x in xrange(indexI - indexA): pronA.insert(indexA, "''") - sequenceIndexListA = [val + indexI - indexA for val in sequenceIndexListA] + sequenceIndexListA = [val + indexI - indexA + for val in sequenceIndexListA] elif indexA > indexI: for x in xrange(indexA - indexI): pronI.insert(indexI, "''") - sequenceIndexListI = [val + indexA - indexI for val in sequenceIndexListI] + sequenceIndexListI = [val + indexA - indexI + for val in sequenceIndexListI] return pronI, pronA @@ -273,11 +264,12 @@ def findBestSyllabification(isleDict, wordText, actualPronunciationList): ''' Find the best syllabification for a word - First find the closest pronunciation to a given pronunciation. Then take - the syllabification for that pronunciation and map it onto the + First find the closest pronunciation to a given pronunciation. Then take + the syllabification for that pronunciation and map it onto the input pronunciation. ''' - retList = _findBestPronunciation(isleDict, wordText, actualPronunciationList) + retList = _findBestPronunciation(isleDict, wordText, + actualPronunciationList) isleWordList, alignedAPronList, alignedSyllableList, bestIndex = retList alignedPhoneList = alignedAPronList[bestIndex] @@ -285,8 +277,8 @@ def findBestSyllabification(isleDict, wordText, actualPronunciationList): syllabification = isleWordList[bestIndex][0] stressedIndex = isleWordList[bestIndex][1] - stressedSyllable, syllableList = _syllabifyPhones(alignedPhoneList, - alignedSyllables, + stressedSyllable, syllableList = _syllabifyPhones(alignedPhoneList, + alignedSyllables, stressedIndex) return stressedSyllable, syllableList, syllabification, stressedIndex @@ -298,9 +290,7 @@ def findClosestPronunciation(isleDict, wordText, aPron): ''' retList = _findBestPronunciation(isleDict, wordText, aPron) - isleWordList, actualPronunciationList, bestIndex = retList + isleWordList = retList[0] + bestIndex = retList[3] return isleWordList[bestIndex] - - - diff --git a/test/syllabifyTextgrid.py b/test/syllabifyTextgrid.py index edbafb0..3793774 100644 --- a/test/syllabifyTextgrid.py +++ b/test/syllabifyTextgrid.py @@ -20,7 +20,10 @@ path = join('.', 'files') path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files" tg = praatio.openTextGrid(join(path, "pumpkins.TextGrid")) -isleDict = isletool.LexicalTool('/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt') # Needs the full path to the file + +# Needs the full path to the file +islevPath = '/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt' +isleDict = isletool.LexicalTool(islevPath) # Get the syllabification tiers and add it to the textgrid syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone",