From bc4f19c74c22c2f3c5a8ba5dc797062bfb7ed233 Mon Sep 17 00:00:00 2001 From: Tim Mahrt Date: Tue, 15 Mar 2016 17:42:33 +0100 Subject: [PATCH] FEATURE: Index to stressed vowel; marking of stressed vowels on textgrids - the index to the stressed syllable was provided in the past. Now the library also includes the index to the stressed vowel. This is provided with relation to the phones in the syllable and all phones in the word. - the code that marks the stressed syllables in the textgrids also now marks the stressed vowels - several variables renamed to be more informative --- README.rst | 6 ++++- pysle/isletool.py | 23 ++++++++--------- pysle/praattools.py | 49 +++++++++++++++++++++++++------------ pysle/pronunciationtools.py | 18 +++++++++++--- test/syllabifyTextgrid.py | 3 ++- 5 files changed, 67 insertions(+), 32 deletions(-) diff --git a/README.rst b/README.rst index 4bac1fe..706d186 100644 --- a/README.rst +++ b/README.rst @@ -43,6 +43,10 @@ What can you do with this library? Major revisions ================ +Ver 1.3 (March 15, 2016) + +- added indicies for stressed vowels + Ver 1.2 (June 20, 2015) - Python 3.x support @@ -82,7 +86,7 @@ Installation If you on Windows, you can use the installer found here (check that it is up to date though) `Windows installer `_ -Otherwise, to manually install, after downloading, from a command-line shell, navigate to the directory containing setup.py and type:: +Otherwise, to manually install, after downloading the source from github, from a command-line shell, navigate to the directory containing setup.py and type:: python setup.py install diff --git a/pysle/isletool.py b/pysle/isletool.py index 3a2e5c7..093b2a5 100644 --- a/pysle/isletool.py +++ b/pysle/isletool.py @@ -69,21 +69,22 @@ def _parsePronunciation(pronunciationStr): secondary stress locations ''' syllableTxt = pronunciationStr.split("#")[1].strip() - syllableList = [x for x in syllableTxt.split(' . ')] + syllableList = [x.split() for x in syllableTxt.split(' . ')] # Find stress - stressList = [] + stressedSyllableList = [] + stressedPhoneList = [] for i, syllable in enumerate(syllableList): - # Primary stress - if "'" in syllable: - stressList.insert(0, i) - # Secondary stress - elif '"' in syllable: - stressList.append(i) + for j, phone in enumerate(syllable): + if "'" in phone: + stressedSyllableList.insert(0, i) + stressedPhoneList.insert(0, j) + break + elif '"' in phone: + stressedSyllableList.insert(i) + stressedPhoneList.insert(j) - syllableList = [x.split(" ") for x in syllableList] - - return syllableList, stressList + return syllableList, stressedSyllableList, stressedPhoneList def getNumPhones(isleDict, label, maxFlag): diff --git a/pysle/praattools.py b/pysle/praattools.py index 70a9913..421b716 100644 --- a/pysle/praattools.py +++ b/pysle/praattools.py @@ -39,7 +39,8 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, skipLabelList = [] syllableEntryList = [] - tonicEntryList = [] + tonicSEntryList = [] + tonicPEntryList = [] for start, stop, word in wordTier.entryList: if word in skipLabelList: @@ -63,8 +64,20 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, continue syllableList = returnList[1] - stressIndexList = returnList[3] - + stressedSyllableIndexList = returnList[3] + stressedPhoneIndexList = returnList[4] + flattenedPhoneIndexList = returnList[5] + + try: + stressI = stressedSyllableIndexList[0] + stressJ = stressedPhoneIndexList[0] + except IndexError: + stressI = None # Function word probably + stressJ = None # + + if stressI is not None: + syllableList[stressI][stressJ] += "'" + i = 0 # print(syllableList) for k, syllable in enumerate(syllableList): @@ -84,24 +97,28 @@ def syllabifyTextgrid(isleDict, tg, wordTierName, phoneTierName, syllableEntryList.append((syllableStart, syllableEnd, label)) - # Create the tonic tier entry - try: - stressIndex = stressIndexList[0] - except IndexError: - stressIndex = None # Function word probably - - tonicLabel = '' - if k == stressIndex: - tonicLabel = 'T' - - tonicEntryList.append((syllableStart, syllableEnd, tonicLabel)) + # Create the tonic syllable tier entry + if k == stressI: + tonicSEntryList.append((syllableStart, syllableEnd, 'T')) + + # Create the tonic phone tier entry + if k == stressI: + syllablePhoneTier = phoneTier.crop(syllableStart, syllableEnd, + True, False)[0] + + phoneList = [entry for entry in syllablePhoneTier.entryList + if entry[2] != ''] + phoneStart, phoneEnd = phoneList[stressJ][:2] + tonicPEntryList.append((phoneStart, phoneEnd, 'T')) # Create a textgrid with the two syllable-level tiers syllableTier = tgio.IntervalTier("syllable", syllableEntryList) - tonicTier = tgio.IntervalTier('tonic', tonicEntryList) + tonicSTier = tgio.IntervalTier('tonicSyllable', tonicSEntryList) + tonicPTier = tgio.IntervalTier('tonicPhone', tonicPEntryList) syllableTG = tgio.Textgrid() syllableTG.addTier(syllableTier) - syllableTG.addTier(tonicTier) + syllableTG.addTier(tonicSTier) + syllableTG.addTier(tonicPTier) return syllableTG diff --git a/pysle/pronunciationtools.py b/pysle/pronunciationtools.py index ad8979a..b34e447 100644 --- a/pysle/pronunciationtools.py +++ b/pysle/pronunciationtools.py @@ -275,13 +275,25 @@ def findBestSyllabification(isleDict, wordText, actualPronunciationList): alignedPhoneList = alignedAPronList[bestIndex] alignedSyllables = alignedSyllableList[bestIndex] syllabification = isleWordList[bestIndex][0] - stressedIndex = isleWordList[bestIndex][1] + stressedSyllableIndexList = isleWordList[bestIndex][1] + stressedPhoneIndexList = isleWordList[bestIndex][2] stressedSyllable, syllableList = _syllabifyPhones(alignedPhoneList, alignedSyllables, - stressedIndex) + stressedSyllableIndexList) - return stressedSyllable, syllableList, syllabification, stressedIndex + # Count the index of the stressed phones, if the stress list has + # become flattened (no syllable information) + flattenedStressIndexList = [] + for i, j in zip(stressedSyllableIndexList, stressedPhoneIndexList): + k = j + for l in range(i): + k += len(syllableList[l]) + flattenedStressIndexList.append(k) + + return (stressedSyllable, syllableList, syllabification, + stressedSyllableIndexList, stressedPhoneIndexList, + flattenedStressIndexList) def findClosestPronunciation(isleDict, wordText, aPron): diff --git a/test/syllabifyTextgrid.py b/test/syllabifyTextgrid.py index bfac6fb..9405a88 100644 --- a/test/syllabifyTextgrid.py +++ b/test/syllabifyTextgrid.py @@ -29,7 +29,8 @@ isleDict = isletool.LexicalTool(islevPath) syllableTG = praattools.syllabifyTextgrid(isleDict, tg, "word", "phone", skipLabelList=["",]) tg.addTier(syllableTG.tierDict["syllable"]) -tg.addTier(syllableTG.tierDict["tonic"]) +tg.addTier(syllableTG.tierDict["tonicSyllable"]) +tg.addTier(syllableTG.tierDict["tonicPhone"])