mirror of
https://github.com/wassname/pysle.git
synced 2026-06-27 16:10:05 +08:00
Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| a36d7c8d17 | |||
| 65ac652dea | |||
| ee08c347d5 | |||
| c16c68a6ac | |||
| bc4f19c74c | |||
| c19cde7165 | |||
| 38ebc7f3f9 | |||
| 102e8a7488 | |||
| 6b786cd00a | |||
| fb1e638cb8 | |||
| e5acdfce30 | |||
| d47c312de7 | |||
| 303d9bfcf2 | |||
| 9c0ccd5748 | |||
| 393182500e | |||
| 985d68da6c | |||
| 0e53ed654e | |||
| ce633d0590 | |||
| e2a2025f5b | |||
| c10e3cf05f | |||
| 06222bf176 | |||
| 6353e0172e |
+53
-5
@@ -3,6 +3,9 @@
|
||||
pysle
|
||||
---------
|
||||
|
||||
.. image:: https://img.shields.io/badge/license-MIT-blue.svg?
|
||||
:target: http://opensource.org/licenses/MIT
|
||||
|
||||
Pronounced like 'p' + 'isle'.
|
||||
|
||||
An interface for the ILSEX (international speech lexicon) dictionary,
|
||||
@@ -37,6 +40,26 @@ What can you do with this library?
|
||||
pysle.syllabifyTextgrid(isleDict, praatioTextgrid, "words", "phones")
|
||||
|
||||
|
||||
Major revisions
|
||||
================
|
||||
|
||||
Ver 1.3 (March 15, 2016)
|
||||
|
||||
- added indicies for stressed vowels
|
||||
|
||||
Ver 1.2 (June 20, 2015)
|
||||
|
||||
- Python 3.x support
|
||||
|
||||
Ver 1.1 (January 30, 2015)
|
||||
|
||||
- word lookup ~65 times faster
|
||||
|
||||
Ver 1.0 (October 23, 2014)
|
||||
|
||||
- first public release.
|
||||
|
||||
|
||||
Requirements
|
||||
================
|
||||
|
||||
@@ -46,10 +69,12 @@ Requirements
|
||||
`ISLEX project page <http://www.isle.illinois.edu/sst/data/dict/>`_
|
||||
|
||||
`Direct link to the ISLEX file used in this project
|
||||
<http://www.isle.illinois.edu/sst/data/dict/islev2.txt)>`_ (islev2.txt)
|
||||
<http://www.isle.illinois.edu/sst/data/dict/islex/islev2.txt>`_ (islev2.txt)
|
||||
|
||||
- ``Python 2.7.*`` or above
|
||||
|
||||
- ``Python 3.3.*`` or above
|
||||
|
||||
- The `praatIO <https://github.com/timmahrt/praatIO>`_ library is required IF
|
||||
you want to use the textgrid functionality. It is not required
|
||||
for normal use.
|
||||
@@ -58,10 +83,12 @@ Requirements
|
||||
Installation
|
||||
================
|
||||
|
||||
From a command-line shell, navigate to the directory this is located in
|
||||
and type::
|
||||
If you on Windows, you can use the installer found here (check that it is up to date though)
|
||||
`Windows installer <http://www.timmahrt.com/python_installers>`_
|
||||
|
||||
python setup.py install
|
||||
Otherwise, to manually install, after downloading the source from github, from a command-line shell, navigate to the directory containing setup.py and type::
|
||||
|
||||
python setup.py install
|
||||
|
||||
If python is not in your path, you'll need to enter the full path e.g.::
|
||||
|
||||
@@ -93,5 +120,26 @@ and another::
|
||||
>> [["''"], ['n', '@'], ['th', 'r']]
|
||||
|
||||
|
||||
Please see \\test for example usage
|
||||
Please see \\examples for example usage
|
||||
|
||||
|
||||
Citing pysle
|
||||
===============
|
||||
|
||||
Pysle is general purpose coding and doesn't need to be cited
|
||||
(you should cite the
|
||||
`ISLEX project <http://www.isle.illinois.edu/sst/data/dict/islex/index.shtml>`_
|
||||
instead) but if you would like to, it can be cited like so:
|
||||
|
||||
Tim Mahrt. Pysle. https://github.com/timmahrt/pysle, 2016.
|
||||
|
||||
|
||||
Acknowledgements
|
||||
================
|
||||
|
||||
Development of Pysle was possible thanks to NSF grant **IIS 07-03624**
|
||||
to Jennifer Cole and Mark Hasegawa-Johnson, NSF grant **BCS 12-51343**
|
||||
to Jennifer Cole, José Hualde, and Caroline Smith, and
|
||||
to the A*MIDEX project (n° **ANR-11-IDEX-0001-02**) to James Sneed German
|
||||
funded by the Investissements d’Avenir French Government program, managed
|
||||
by the French National Research Agency (ANR).
|
||||
|
||||
+36
-35
@@ -5,41 +5,41 @@ Created on Oct 11, 2012
|
||||
'''
|
||||
|
||||
|
||||
vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>',]
|
||||
vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>', ]
|
||||
|
||||
|
||||
class WordNotInISLE(Exception):
|
||||
|
||||
def __init__(self, word):
|
||||
super(WordNotInISLE, self).__init__()
|
||||
self.word = word
|
||||
|
||||
def __str__(self):
|
||||
return "Word '%s' not in ISLE dictionary. Please add it to continue." % self.word
|
||||
return ("Word '%s' not in ISLE dictionary. "
|
||||
"Please add it to continue." % self.word)
|
||||
|
||||
|
||||
class LexicalTool():
|
||||
|
||||
|
||||
def __init__(self, islePath):
|
||||
self.islePath = islePath
|
||||
self.data = self._buildDict()
|
||||
|
||||
|
||||
def _buildDict(self):
|
||||
'''
|
||||
Builds the isle textfile into a dictionary for fast searching
|
||||
'''
|
||||
dict = {}
|
||||
wordList = open(self.islePath, "r").read().split("\n")
|
||||
lexDict = {}
|
||||
wordList = [line.rstrip('\n') for line in open(self.islePath, "rU")]
|
||||
|
||||
for row in wordList:
|
||||
word, pronunciation = row.split(" ", 1)
|
||||
word = word.split("(")[0]
|
||||
|
||||
dict.setdefault(word, [])
|
||||
dict[word].append(pronunciation)
|
||||
lexDict.setdefault(word, [])
|
||||
lexDict[word].append(pronunciation)
|
||||
|
||||
return dict
|
||||
|
||||
return lexDict
|
||||
|
||||
def lookup(self, word):
|
||||
'''
|
||||
@@ -52,10 +52,10 @@ class LexicalTool():
|
||||
|
||||
pronList = self.data.get(word, None)
|
||||
|
||||
if pronList == None:
|
||||
if pronList is None:
|
||||
raise WordNotInISLE(word)
|
||||
else:
|
||||
pronList = [_parsePronunciation(pronunciationStr)
|
||||
pronList = [_parsePronunciation(pronunciationStr)
|
||||
for pronunciationStr in pronList]
|
||||
|
||||
return pronList
|
||||
@@ -65,31 +65,32 @@ def _parsePronunciation(pronunciationStr):
|
||||
'''
|
||||
Parses the pronunciation string
|
||||
|
||||
Returns the list of syllables and a list of primary and
|
||||
secondary stress locations
|
||||
Returns the list of syllables and a list of primary and
|
||||
secondary stress locations
|
||||
'''
|
||||
syllableTxt = pronunciationStr.split("#")[1].strip()
|
||||
syllableList = [x for x in syllableTxt.split(' . ')]
|
||||
syllableList = [x.split() for x in syllableTxt.split(' . ')]
|
||||
|
||||
# Find stress
|
||||
stressList = []
|
||||
stressedSyllableList = []
|
||||
stressedPhoneList = []
|
||||
for i, syllable in enumerate(syllableList):
|
||||
# Primary stress
|
||||
if "'" in syllable:
|
||||
stressList.insert(0, i)
|
||||
# Secondary stress
|
||||
elif '"' in syllable:
|
||||
stressList.append(i)
|
||||
for j, phone in enumerate(syllable):
|
||||
if "'" in phone:
|
||||
stressedSyllableList.insert(0, i)
|
||||
stressedPhoneList.insert(0, j)
|
||||
break
|
||||
elif '"' in phone:
|
||||
stressedSyllableList.insert(i)
|
||||
stressedPhoneList.insert(j)
|
||||
|
||||
syllableList = [x.split(" ") for x in syllableList]
|
||||
|
||||
return syllableList, stressList
|
||||
return syllableList, stressedSyllableList, stressedPhoneList
|
||||
|
||||
|
||||
def getNumPhones(isleDict, label, maxFlag):
|
||||
'''
|
||||
|
||||
If maxFlag=True, use the longest pronunciation. Otherwise, take the
|
||||
If maxFlag=True, use the longest pronunciation. Otherwise, take the
|
||||
average length.
|
||||
'''
|
||||
phoneCount = 0
|
||||
@@ -99,25 +100,28 @@ def getNumPhones(isleDict, label, maxFlag):
|
||||
phoneListOfLists = isleDict.lookup(word)
|
||||
|
||||
syllableCountList = []
|
||||
for syllableList, stressIndex in phoneListOfLists:
|
||||
for row in phoneListOfLists:
|
||||
syllableList = row[0]
|
||||
syllableCountList.append(len(syllableList))
|
||||
|
||||
# In ISLE, there can be multiple pronunciations for each word
|
||||
# as we have no reason to believe one pronunciation is more
|
||||
# likely than another, we take the average of all of them
|
||||
phoneCountList = []
|
||||
for syllableList, stressIndex in phoneListOfLists:
|
||||
phoneCountList.append(len([phon for phoneList in syllableList for
|
||||
for row in phoneListOfLists:
|
||||
syllableList = row[0]
|
||||
phoneCountList.append(len([phon for phoneList in syllableList for
|
||||
phon in phoneList]))
|
||||
|
||||
# The average number of phones for all possible pronunciations
|
||||
# of this word
|
||||
if maxFlag == True:
|
||||
if maxFlag is True:
|
||||
syllableCount += max(syllableCountList)
|
||||
phoneCount += max(phoneCountList)
|
||||
else:
|
||||
syllableCount += sum(syllableCountList) / float(len(syllableCountList))
|
||||
phoneCount += sum(phoneCountList) / float(len(phoneCountList))
|
||||
syllableCount += (sum(syllableCountList) /
|
||||
float(len(syllableCountList)))
|
||||
phoneCount += sum(phoneCountList) / float(len(phoneCountList))
|
||||
|
||||
return syllableCount, phoneCount
|
||||
|
||||
@@ -137,6 +141,3 @@ def findOODWords(isleDict, wordList):
|
||||
oodList.sort()
|
||||
|
||||
return oodList
|
||||
|
||||
|
||||
|
||||
+51
-31
@@ -4,13 +4,14 @@ Created on Oct 22, 2014
|
||||
@author: tmahrt
|
||||
'''
|
||||
|
||||
|
||||
class OptionalFeatureError(ImportError):
|
||||
|
||||
def __str__(self):
|
||||
return "ERROR: You must have praatio installed to use pysle.praatTools"
|
||||
|
||||
try:
|
||||
import praatio
|
||||
from praatio import tgio
|
||||
except ImportError:
|
||||
raise OptionalFeatureError()
|
||||
|
||||
@@ -18,7 +19,7 @@ from pysle import isletool
|
||||
from pysle import pronunciationtools
|
||||
|
||||
|
||||
def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName,
|
||||
def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName,
|
||||
skipLabelList=None):
|
||||
'''
|
||||
Given a textgrid, syllabifies the phones in the textgrid
|
||||
@@ -34,11 +35,12 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName,
|
||||
wordTier = tg.tierDict[wordTierName]
|
||||
phoneTier = tg.tierDict[phoneTierName]
|
||||
|
||||
if skipLabelList == None:
|
||||
if skipLabelList is None:
|
||||
skipLabelList = []
|
||||
|
||||
syllableEntryList = []
|
||||
tonicEntryList = []
|
||||
tonicSEntryList = []
|
||||
tonicPEntryList = []
|
||||
for start, stop, word in wordTier.entryList:
|
||||
|
||||
if word in skipLabelList:
|
||||
@@ -46,28 +48,43 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName,
|
||||
|
||||
subPhoneTier = phoneTier.crop(start, stop, True, False)[0]
|
||||
|
||||
phoneList = [phone for startP, endP, phone in subPhoneTier.entryList if phone != '']
|
||||
# entry = (start, stop, phone)
|
||||
phoneList = [entry[2] for entry in subPhoneTier.entryList
|
||||
if entry[2] != '']
|
||||
|
||||
try:
|
||||
returnList = pronunciationtools.findBestSyllabification(isleDict,
|
||||
word,
|
||||
returnList = pronunciationtools.findBestSyllabification(isleDict,
|
||||
word,
|
||||
phoneList)
|
||||
except isletool.WordNotInISLE:
|
||||
print "Word ('%s') not is isle -- skipping syllabification" % word
|
||||
print("Word ('%s') not is isle -- skipping syllabification" % word)
|
||||
continue
|
||||
except (pronunciationtools.NullPronunciationError):
|
||||
print "Word ('%s') has no provided pronunciation" % word
|
||||
print("Word ('%s') has no provided pronunciation" % word)
|
||||
continue
|
||||
|
||||
stressedSyllable, syllableList, syllabification, stressIndexList = returnList
|
||||
|
||||
syllableList = returnList[1]
|
||||
stressedSyllableIndexList = returnList[3]
|
||||
stressedPhoneIndexList = returnList[4]
|
||||
flattenedPhoneIndexList = returnList[5]
|
||||
|
||||
try:
|
||||
stressI = stressedSyllableIndexList[0]
|
||||
stressJ = stressedPhoneIndexList[0]
|
||||
except IndexError:
|
||||
stressI = None # Function word probably
|
||||
stressJ = None #
|
||||
|
||||
if stressI is not None:
|
||||
syllableList[stressI][stressJ] += "'"
|
||||
|
||||
i = 0
|
||||
# print syllableList
|
||||
# print(syllableList)
|
||||
for k, syllable in enumerate(syllableList):
|
||||
|
||||
# Create the syllable tier entry
|
||||
j = len(syllable)
|
||||
stubEntryList = subPhoneTier.entryList[i:i+j]
|
||||
stubEntryList = subPhoneTier.entryList[i:i + j]
|
||||
i += j
|
||||
|
||||
# The whole syllable was deleted
|
||||
@@ -76,29 +93,32 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName,
|
||||
|
||||
syllableStart = stubEntryList[0][0]
|
||||
syllableEnd = stubEntryList[-1][1]
|
||||
label = "-".join([phone for start, end, phone in stubEntryList])
|
||||
label = "-".join([entry[2] for entry in stubEntryList])
|
||||
|
||||
syllableEntryList.append( (syllableStart, syllableEnd, label) )
|
||||
syllableEntryList.append((syllableStart, syllableEnd, label))
|
||||
|
||||
# Create the tonic tier entry
|
||||
try:
|
||||
stressIndex = stressIndexList[0]
|
||||
except IndexError:
|
||||
stressIndex = None # Function word probably
|
||||
|
||||
tonicLabel = ''
|
||||
if k == stressIndex:
|
||||
tonicLabel = 'T'
|
||||
|
||||
tonicEntryList.append( (syllableStart, syllableEnd, tonicLabel) )
|
||||
# Create the tonic syllable tier entry
|
||||
if k == stressI:
|
||||
tonicSEntryList.append((syllableStart, syllableEnd, 'T'))
|
||||
|
||||
# Create the tonic phone tier entry
|
||||
if k == stressI:
|
||||
syllablePhoneTier = phoneTier.crop(syllableStart, syllableEnd,
|
||||
True, False)[0]
|
||||
|
||||
phoneList = [entry for entry in syllablePhoneTier.entryList
|
||||
if entry[2] != '']
|
||||
phoneStart, phoneEnd = phoneList[stressJ][:2]
|
||||
tonicPEntryList.append((phoneStart, phoneEnd, 'T'))
|
||||
|
||||
# Create a textgrid with the two syllable-level tiers
|
||||
syllableTier = praatio.TextgridTier("syllable", syllableEntryList, praatio.INTERVAL_TIER)
|
||||
tonicTier = praatio.TextgridTier('tonic', tonicEntryList, praatio.INTERVAL_TIER)
|
||||
syllableTier = tgio.IntervalTier("syllable", syllableEntryList)
|
||||
tonicSTier = tgio.IntervalTier('tonicSyllable', tonicSEntryList)
|
||||
tonicPTier = tgio.IntervalTier('tonicVowel', tonicPEntryList)
|
||||
|
||||
syllableTG = praatio.Textgrid()
|
||||
syllableTG = tgio.Textgrid()
|
||||
syllableTG.addTier(syllableTier)
|
||||
syllableTG.addTier(tonicTier)
|
||||
syllableTG.addTier(tonicSTier)
|
||||
syllableTG.addTier(tonicPTier)
|
||||
|
||||
return syllableTG
|
||||
|
||||
|
||||
+55
-53
@@ -9,10 +9,10 @@ import itertools
|
||||
from pysle import isletool
|
||||
|
||||
|
||||
|
||||
class NullPronunciationError(Exception):
|
||||
|
||||
def __init__(self, word):
|
||||
super(NullPronunciationError, self).__init__()
|
||||
self.word = word
|
||||
|
||||
def __str__(self):
|
||||
@@ -49,7 +49,7 @@ def _lcs(xs, ys):
|
||||
ll_b = _lcs_lens(xb, ys)
|
||||
ll_e = _lcs_lens(xe[::-1], ys[::-1])
|
||||
_, k = max((ll_b[j] + ll_e[ny - j], j)
|
||||
for j in range(ny + 1))
|
||||
for j in range(ny + 1))
|
||||
yb, ye = ys[:k], ys[k:]
|
||||
return _lcs(xb, yb) + _lcs(xe, ye)
|
||||
|
||||
@@ -58,14 +58,13 @@ def _prepPronunciation(phoneList):
|
||||
retList = []
|
||||
for phone in phoneList:
|
||||
if 'r' in phone:
|
||||
phone = ['r',]
|
||||
phone = ['r', ]
|
||||
try:
|
||||
phone = phone[0] # Only represent the str by its first letter
|
||||
phone = phone[0] # Only represent the string by its first letter
|
||||
phone = phone.lower()
|
||||
except IndexError:
|
||||
raise NullPhoneError()
|
||||
|
||||
phone = phone.lower()
|
||||
|
||||
if phone in isletool.vowelList:
|
||||
phone = 'V'
|
||||
retList.append(phone)
|
||||
@@ -85,14 +84,14 @@ def _adjustSyllabification(adjustedPhoneList, syllableList):
|
||||
retSyllableList = []
|
||||
for syllable in syllableList:
|
||||
j = len(syllable)
|
||||
tmpPhoneList = adjustedPhoneList[i:i+j]
|
||||
tmpPhoneList = adjustedPhoneList[i:i + j]
|
||||
numBlanks = -1
|
||||
phoneList = tmpPhoneList[:]
|
||||
while numBlanks != 0:
|
||||
|
||||
numBlanks = tmpPhoneList.count("''")
|
||||
if numBlanks > 0:
|
||||
tmpPhoneList = adjustedPhoneList[i+j:i+j+numBlanks]
|
||||
tmpPhoneList = adjustedPhoneList[i + j:i + j + numBlanks]
|
||||
phoneList.extend(tmpPhoneList)
|
||||
j += numBlanks
|
||||
|
||||
@@ -116,27 +115,32 @@ def _findBestPronunciation(isleDict, wordText, aPron):
|
||||
|
||||
isleWordList = isleDict.lookup(wordText)
|
||||
|
||||
aP = _prepPronunciation(aPron) # Mapping to simplified phone inventory
|
||||
aP = _prepPronunciation(aPron) # Mapping to simplified phone inventory
|
||||
|
||||
origPronDict = dict((newPron,oldPron) for newPron, oldPron in zip(aP, aPron))
|
||||
origPronDict = dict((newPron, oldPron)
|
||||
for newPron, oldPron in zip(aP, aPron))
|
||||
|
||||
numDiffList = []
|
||||
withStress = []
|
||||
i = 0
|
||||
alignedSyllabificationList = []
|
||||
alignedActualPronunciationList = []
|
||||
for syllableList, stressList in isleWordList:
|
||||
for wordTuple in isleWordList:
|
||||
syllableList = wordTuple[0] # syllableList, stressList
|
||||
|
||||
iP = [phone for phoneList in syllableList for phone in phoneList]
|
||||
iP = _prepPronunciation(iP)
|
||||
|
||||
alignedIP, alignedAP = alignPronunciations(iP, aP)
|
||||
alignedAP = [origPronDict.get(phon, "''") for phon in alignedAP] # Remapping to actual phones
|
||||
|
||||
# Remapping to actual phones
|
||||
alignedAP = [origPronDict.get(phon, "''") for phon in alignedAP]
|
||||
alignedActualPronunciationList.append(alignedAP)
|
||||
|
||||
# Adjusting the syllabification for differences between the dictionary
|
||||
# pronunciation and the actual pronunciation
|
||||
alignedSyllabification = _adjustSyllabification(alignedIP, syllableList)
|
||||
alignedSyllabification = _adjustSyllabification(alignedIP,
|
||||
syllableList)
|
||||
alignedSyllabificationList.append(alignedSyllabification)
|
||||
|
||||
# Count the number of misalignments between the two
|
||||
@@ -147,7 +151,7 @@ def _findBestPronunciation(isleDict, wordText, aPron):
|
||||
hasStress = False
|
||||
for syllable in syllableList:
|
||||
for phone in syllable:
|
||||
hasStress = "'" in phone or hasStress
|
||||
hasStress = "'" in phone or hasStress
|
||||
|
||||
if hasStress:
|
||||
withStress.append(i)
|
||||
@@ -164,16 +168,16 @@ def _findBestPronunciation(isleDict, wordText, aPron):
|
||||
for i, numDiff in enumerate(numDiffList):
|
||||
if numDiff != minDiff:
|
||||
continue
|
||||
if bestIndex == None:
|
||||
if bestIndex is None:
|
||||
bestIndex = i
|
||||
bestIsStressed = i in withStress
|
||||
else:
|
||||
if not bestIsStressed and i in withStress:
|
||||
bestIndex = i
|
||||
bestIsStressed = True
|
||||
|
||||
|
||||
return isleWordList, alignedActualPronunciationList, alignedSyllabificationList, bestIndex
|
||||
return (isleWordList, alignedActualPronunciationList,
|
||||
alignedSyllabificationList, bestIndex)
|
||||
|
||||
|
||||
def _syllabifyPhones(phoneList, syllableList, isleStressList):
|
||||
@@ -193,9 +197,9 @@ def _syllabifyPhones(phoneList, syllableList, isleStressList):
|
||||
|
||||
start = 0
|
||||
syllabifiedList = []
|
||||
for i, end in enumerate(numPhoneList):
|
||||
for end in numPhoneList:
|
||||
|
||||
syllable = phoneList[start:start+end]
|
||||
syllable = phoneList[start:start + end]
|
||||
syllabifiedList.append(syllable)
|
||||
|
||||
start += end
|
||||
@@ -212,21 +216,6 @@ def alignPronunciations(pronI, pronA):
|
||||
pronI = [char for char in pronI]
|
||||
pronA = [char for char in pronA]
|
||||
|
||||
# -- allow for some flexibility in pronunciation
|
||||
correctionsTuple = (('d', 't'), ('t', 'd'), ('s', 'z'), ('z', 's'),
|
||||
('m', 'n'), ('n', 'm'),)
|
||||
|
||||
doMatch = lambda i, a: ((i == a) or
|
||||
((i, a) in correctionsTuple))
|
||||
|
||||
def matchExists(targetPhone, pron):
|
||||
match = False
|
||||
for phone in pron:
|
||||
match = match or doMatch(targetPhone, phone)
|
||||
return match
|
||||
|
||||
# Remove vowels
|
||||
|
||||
# Remove any elements not in the other list (but maintain order)
|
||||
pronITmp = pronI
|
||||
pronATmp = pronA
|
||||
@@ -244,7 +233,7 @@ def alignPronunciations(pronI, pronA):
|
||||
startA = pronA.index(phone, startA)
|
||||
startI = pronI.index(phone, startI)
|
||||
|
||||
sequenceIndexListA.append(startA)
|
||||
sequenceIndexListA.append(startA)
|
||||
sequenceIndexListI.append(startI)
|
||||
|
||||
# An index on the tail of both will be used to create output strings
|
||||
@@ -254,17 +243,19 @@ def alignPronunciations(pronI, pronA):
|
||||
|
||||
# Fill in any blanks such that the sequential items have the same
|
||||
# index and the two strings are the same length
|
||||
for x in xrange(len(sequenceIndexListA)):
|
||||
for x in range(len(sequenceIndexListA)):
|
||||
indexA = sequenceIndexListA[x]
|
||||
indexI = sequenceIndexListI[x]
|
||||
if indexA < indexI :
|
||||
for x in xrange(indexI - indexA):
|
||||
if indexA < indexI:
|
||||
for x in range(indexI - indexA):
|
||||
pronA.insert(indexA, "''")
|
||||
sequenceIndexListA = [val + indexI - indexA for val in sequenceIndexListA]
|
||||
sequenceIndexListA = [val + indexI - indexA
|
||||
for val in sequenceIndexListA]
|
||||
elif indexA > indexI:
|
||||
for x in xrange(indexA - indexI):
|
||||
for x in range(indexA - indexI):
|
||||
pronI.insert(indexI, "''")
|
||||
sequenceIndexListI = [val + indexA - indexI for val in sequenceIndexListI]
|
||||
sequenceIndexListI = [val + indexA - indexI
|
||||
for val in sequenceIndexListI]
|
||||
|
||||
return pronI, pronA
|
||||
|
||||
@@ -273,23 +264,36 @@ def findBestSyllabification(isleDict, wordText, actualPronunciationList):
|
||||
'''
|
||||
Find the best syllabification for a word
|
||||
|
||||
First find the closest pronunciation to a given pronunciation. Then take
|
||||
the syllabification for that pronunciation and map it onto the
|
||||
First find the closest pronunciation to a given pronunciation. Then take
|
||||
the syllabification for that pronunciation and map it onto the
|
||||
input pronunciation.
|
||||
'''
|
||||
retList = _findBestPronunciation(isleDict, wordText, actualPronunciationList)
|
||||
retList = _findBestPronunciation(isleDict, wordText,
|
||||
actualPronunciationList)
|
||||
isleWordList, alignedAPronList, alignedSyllableList, bestIndex = retList
|
||||
|
||||
alignedPhoneList = alignedAPronList[bestIndex]
|
||||
alignedSyllables = alignedSyllableList[bestIndex]
|
||||
syllabification = isleWordList[bestIndex][0]
|
||||
stressedIndex = isleWordList[bestIndex][1]
|
||||
stressedSyllableIndexList = isleWordList[bestIndex][1]
|
||||
stressedPhoneIndexList = isleWordList[bestIndex][2]
|
||||
|
||||
stressedSyllable, syllableList = _syllabifyPhones(alignedPhoneList,
|
||||
alignedSyllables,
|
||||
stressedIndex)
|
||||
stressedSyllable, syllableList = _syllabifyPhones(alignedPhoneList,
|
||||
alignedSyllables,
|
||||
stressedSyllableIndexList)
|
||||
|
||||
return stressedSyllable, syllableList, syllabification, stressedIndex
|
||||
# Count the index of the stressed phones, if the stress list has
|
||||
# become flattened (no syllable information)
|
||||
flattenedStressIndexList = []
|
||||
for i, j in zip(stressedSyllableIndexList, stressedPhoneIndexList):
|
||||
k = j
|
||||
for l in range(i):
|
||||
k += len(syllableList[l])
|
||||
flattenedStressIndexList.append(k)
|
||||
|
||||
return (stressedSyllable, syllableList, syllabification,
|
||||
stressedSyllableIndexList, stressedPhoneIndexList,
|
||||
flattenedStressIndexList)
|
||||
|
||||
|
||||
def findClosestPronunciation(isleDict, wordText, aPron):
|
||||
@@ -298,9 +302,7 @@ def findClosestPronunciation(isleDict, wordText, aPron):
|
||||
'''
|
||||
|
||||
retList = _findBestPronunciation(isleDict, wordText, aPron)
|
||||
isleWordList, actualPronunciationList, bestIndex = retList
|
||||
isleWordList = retList[0]
|
||||
bestIndex = retList[3]
|
||||
|
||||
return isleWordList[bestIndex]
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ Created on Oct 15, 2014
|
||||
'''
|
||||
from distutils.core import setup
|
||||
setup(name='pysle',
|
||||
version='1.0.0',
|
||||
version='1.3.0',
|
||||
author='Tim Mahrt',
|
||||
author_email='timmahrt@gmail.com',
|
||||
package_dir={'pysle':'pysle'},
|
||||
@@ -13,4 +13,4 @@ setup(name='pysle',
|
||||
license='LICENSE',
|
||||
long_description=open('README.rst', 'r').read(),
|
||||
# install_requires=[], # No requirements! # requires 'from setuptools import setup'
|
||||
)
|
||||
)
|
||||
|
||||
@@ -20,13 +20,13 @@ firstEntry = lookupResults[0]
|
||||
firstSyllableList = firstEntry[0]
|
||||
firstStressList = firstEntry[1]
|
||||
|
||||
print searchWord
|
||||
print firstSyllableList, firstStressList # 3rd syllable carries stress
|
||||
print(searchWord)
|
||||
print(firstSyllableList, firstStressList) # 3rd syllable carries stress
|
||||
|
||||
|
||||
# Here we determine the syllabification of a word, as it was said.
|
||||
# (Of course, this is just a guess)
|
||||
print '-'*50
|
||||
print('-'*50)
|
||||
|
||||
searchWord = 'another'
|
||||
anotherPhoneList = ['n', '@', 'th', 'r']
|
||||
@@ -37,8 +37,8 @@ returnList = pronunciationtools.findBestSyllabification(isleDict,
|
||||
|
||||
stressedSyllable, syllableList, syllabification, stressedIndex = returnList
|
||||
|
||||
print searchWord
|
||||
print anotherPhoneList
|
||||
print syllableList # We can see the first syllable was elided
|
||||
print(searchWord)
|
||||
print(anotherPhoneList)
|
||||
print(syllableList) # We can see the first syllable was elided
|
||||
|
||||
|
||||
|
||||
@@ -12,21 +12,25 @@ This snippet shows you how to use this function.
|
||||
|
||||
from os.path import join
|
||||
|
||||
import praatio
|
||||
from praatio import tgio
|
||||
from pysle import isletool
|
||||
from pysle import praattools
|
||||
|
||||
path = join('.', 'files')
|
||||
path = "/Users/tmahrt/Dropbox/workspace/pysle/test/files"
|
||||
|
||||
tg = praatio.openTextGrid(join(path, "pumpkins.TextGrid"))
|
||||
isleDict = isletool.LexicalTool('/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt') # Needs the full path to the file
|
||||
tg = tgio.openTextGrid(join(path, "pumpkins.TextGrid"))
|
||||
|
||||
# Needs the full path to the file
|
||||
islevPath = '/Users/tmahrt/Dropbox/workspace/pysle/test/islev2.txt'
|
||||
isleDict = isletool.LexicalTool(islevPath)
|
||||
|
||||
# Get the syllabification tiers and add it to the textgrid
|
||||
syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone",
|
||||
skipLabelList=["",])
|
||||
tg.addTier(syllableTG.tierDict["syllable"])
|
||||
tg.addTier(syllableTG.tierDict["tonic"])
|
||||
tg.addTier(syllableTG.tierDict["tonicSyllable"])
|
||||
tg.addTier(syllableTG.tierDict["tonicVowel"])
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user