diff --git a/pysle/isletool.py b/pysle/isletool.py index 9d2c271..0959aac 100644 --- a/pysle/isletool.py +++ b/pysle/isletool.py @@ -5,41 +5,40 @@ Created on Oct 11, 2012 ''' -vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>',] +vowelList = ['a', '@', 'e', 'i', 'o', 'u', '^', '&', '>', ] class WordNotInISLE(Exception): def __init__(self, word): + super(WordNotInISLE, self).__init__() self.word = word def __str__(self): - return "Word '%s' not in ISLE dictionary. Please add it to continue." % self.word + return ("Word '%s' not in ISLE dictionary. " + "Please add it to continue." % self.word) class LexicalTool(): - def __init__(self, islePath): self.islePath = islePath self.data = self._buildDict() - def _buildDict(self): ''' Builds the isle textfile into a dictionary for fast searching ''' - dict = {} + lexDict = {} wordList = open(self.islePath, "r").read().split("\n") for row in wordList: word, pronunciation = row.split(" ", 1) word = word.split("(")[0] - dict.setdefault(word, []) - dict[word].append(pronunciation) + lexDict.setdefault(word, []) + lexDict[word].append(pronunciation) - return dict - + return lexDict def lookup(self, word): ''' @@ -52,10 +51,10 @@ class LexicalTool(): pronList = self.data.get(word, None) - if pronList == None: + if pronList is None: raise WordNotInISLE(word) else: - pronList = [_parsePronunciation(pronunciationStr) + pronList = [_parsePronunciation(pronunciationStr) for pronunciationStr in pronList] return pronList @@ -65,8 +64,8 @@ def _parsePronunciation(pronunciationStr): ''' Parses the pronunciation string - Returns the list of syllables and a list of primary and - secondary stress locations + Returns the list of syllables and a list of primary and + secondary stress locations ''' syllableTxt = pronunciationStr.split("#")[1].strip() syllableList = [x for x in syllableTxt.split(' . ')] @@ -89,7 +88,7 @@ def _parsePronunciation(pronunciationStr): def getNumPhones(isleDict, label, maxFlag): ''' - If maxFlag=True, use the longest pronunciation. Otherwise, take the + If maxFlag=True, use the longest pronunciation. Otherwise, take the average length. ''' phoneCount = 0 @@ -99,25 +98,28 @@ def getNumPhones(isleDict, label, maxFlag): phoneListOfLists = isleDict.lookup(word) syllableCountList = [] - for syllableList, stressIndex in phoneListOfLists: + for row in phoneListOfLists: + syllableList = row[0] syllableCountList.append(len(syllableList)) # In ISLE, there can be multiple pronunciations for each word # as we have no reason to believe one pronunciation is more # likely than another, we take the average of all of them phoneCountList = [] - for syllableList, stressIndex in phoneListOfLists: - phoneCountList.append(len([phon for phoneList in syllableList for + for row in phoneListOfLists: + syllableList = row[0] + phoneCountList.append(len([phon for phoneList in syllableList for phon in phoneList])) # The average number of phones for all possible pronunciations # of this word - if maxFlag == True: + if maxFlag is True: syllableCount += max(syllableCountList) phoneCount += max(phoneCountList) else: - syllableCount += sum(syllableCountList) / float(len(syllableCountList)) - phoneCount += sum(phoneCountList) / float(len(phoneCountList)) + syllableCount += (sum(syllableCountList) / + float(len(syllableCountList))) + phoneCount += sum(phoneCountList) / float(len(phoneCountList)) return syllableCount, phoneCount @@ -137,6 +139,3 @@ def findOODWords(isleDict, wordList): oodList.sort() return oodList - - - \ No newline at end of file