mirror of
https://github.com/wassname/pysle.git
synced 2026-06-27 16:10:05 +08:00
REFACTOR: PEP 8 compliance
This commit is contained in:
+22
-23
@@ -5,41 +5,40 @@ Created on Oct 11, 2012
|
||||
'''
|
||||
|
||||
|
||||
vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>',]
|
||||
vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>', ]
|
||||
|
||||
|
||||
class WordNotInISLE(Exception):
|
||||
|
||||
def __init__(self, word):
|
||||
super(WordNotInISLE, self).__init__()
|
||||
self.word = word
|
||||
|
||||
def __str__(self):
|
||||
return "Word '%s' not in ISLE dictionary. Please add it to continue." % self.word
|
||||
return ("Word '%s' not in ISLE dictionary. "
|
||||
"Please add it to continue." % self.word)
|
||||
|
||||
|
||||
class LexicalTool():
|
||||
|
||||
|
||||
def __init__(self, islePath):
|
||||
self.islePath = islePath
|
||||
self.data = self._buildDict()
|
||||
|
||||
|
||||
def _buildDict(self):
|
||||
'''
|
||||
Builds the isle textfile into a dictionary for fast searching
|
||||
'''
|
||||
dict = {}
|
||||
lexDict = {}
|
||||
wordList = open(self.islePath, "r").read().split("\n")
|
||||
for row in wordList:
|
||||
word, pronunciation = row.split(" ", 1)
|
||||
word = word.split("(")[0]
|
||||
|
||||
dict.setdefault(word, [])
|
||||
dict[word].append(pronunciation)
|
||||
lexDict.setdefault(word, [])
|
||||
lexDict[word].append(pronunciation)
|
||||
|
||||
return dict
|
||||
|
||||
return lexDict
|
||||
|
||||
def lookup(self, word):
|
||||
'''
|
||||
@@ -52,10 +51,10 @@ class LexicalTool():
|
||||
|
||||
pronList = self.data.get(word, None)
|
||||
|
||||
if pronList == None:
|
||||
if pronList is None:
|
||||
raise WordNotInISLE(word)
|
||||
else:
|
||||
pronList = [_parsePronunciation(pronunciationStr)
|
||||
pronList = [_parsePronunciation(pronunciationStr)
|
||||
for pronunciationStr in pronList]
|
||||
|
||||
return pronList
|
||||
@@ -65,8 +64,8 @@ def _parsePronunciation(pronunciationStr):
|
||||
'''
|
||||
Parses the pronunciation string
|
||||
|
||||
Returns the list of syllables and a list of primary and
|
||||
secondary stress locations
|
||||
Returns the list of syllables and a list of primary and
|
||||
secondary stress locations
|
||||
'''
|
||||
syllableTxt = pronunciationStr.split("#")[1].strip()
|
||||
syllableList = [x for x in syllableTxt.split(' . ')]
|
||||
@@ -89,7 +88,7 @@ def _parsePronunciation(pronunciationStr):
|
||||
def getNumPhones(isleDict, label, maxFlag):
|
||||
'''
|
||||
|
||||
If maxFlag=True, use the longest pronunciation. Otherwise, take the
|
||||
If maxFlag=True, use the longest pronunciation. Otherwise, take the
|
||||
average length.
|
||||
'''
|
||||
phoneCount = 0
|
||||
@@ -99,25 +98,28 @@ def getNumPhones(isleDict, label, maxFlag):
|
||||
phoneListOfLists = isleDict.lookup(word)
|
||||
|
||||
syllableCountList = []
|
||||
for syllableList, stressIndex in phoneListOfLists:
|
||||
for row in phoneListOfLists:
|
||||
syllableList = row[0]
|
||||
syllableCountList.append(len(syllableList))
|
||||
|
||||
# In ISLE, there can be multiple pronunciations for each word
|
||||
# as we have no reason to believe one pronunciation is more
|
||||
# likely than another, we take the average of all of them
|
||||
phoneCountList = []
|
||||
for syllableList, stressIndex in phoneListOfLists:
|
||||
phoneCountList.append(len([phon for phoneList in syllableList for
|
||||
for row in phoneListOfLists:
|
||||
syllableList = row[0]
|
||||
phoneCountList.append(len([phon for phoneList in syllableList for
|
||||
phon in phoneList]))
|
||||
|
||||
# The average number of phones for all possible pronunciations
|
||||
# of this word
|
||||
if maxFlag == True:
|
||||
if maxFlag is True:
|
||||
syllableCount += max(syllableCountList)
|
||||
phoneCount += max(phoneCountList)
|
||||
else:
|
||||
syllableCount += sum(syllableCountList) / float(len(syllableCountList))
|
||||
phoneCount += sum(phoneCountList) / float(len(phoneCountList))
|
||||
syllableCount += (sum(syllableCountList) /
|
||||
float(len(syllableCountList)))
|
||||
phoneCount += sum(phoneCountList) / float(len(phoneCountList))
|
||||
|
||||
return syllableCount, phoneCount
|
||||
|
||||
@@ -137,6 +139,3 @@ def findOODWords(isleDict, wordList):
|
||||
oodList.sort()
|
||||
|
||||
return oodList
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user