saysynth.lib.nlp
Utilities for extracing phonemes and syllables from english text.
1""" 2Utilities for extracing phonemes and syllables from english text. 3""" 4 5from functools import lru_cache 6from typing import List, Tuple 7 8import nltk 9from g2p_en import G2p 10from nltk.corpus import cmudict 11from nltk.tokenize import word_tokenize 12 13# ensure NLTK resources are downloaded 14try: 15 cmudict.ensure_loaded() 16except LookupError: 17 nltk.download("cmudict") 18try: 19 word_tokenize("test") 20except Exception: 21 nltk.download("punkt") 22 23 24from ..constants import G2P_PHONEMES_TO_SAY_PHONEMES 25 26G2P = None 27CMU = None 28VOWELS = "aeiouy" 29 30 31@lru_cache(maxsize=1024) 32def word_to_g2p_phonemes(text: str) -> List[str]: 33 """ 34 Extract g2p phonemes from a text. 35 36 Args: 37 text: A text to extract phonemes from 38 """ 39 global G2P 40 if not G2P: 41 G2P = G2p() 42 return G2P(text) 43 44 45def word_to_say_phonemes(text: str) -> List[str]: 46 """ 47 Extract Apple SpeechSynthesis phonemes from a text. 48 49 Args: 50 text: A text to extract phonemes from 51 """ 52 return [ 53 G2P_PHONEMES_TO_SAY_PHONEMES.get(p, "") 54 for p in word_to_g2p_phonemes(text) 55 ] 56 57 58@lru_cache(maxsize=1024) 59def word_to_syllable_count(word: str) -> int: 60 """ 61 Compute the syllable count of a word, using CMU if the word is present in the 62 corpus, otherwise [follow this implementation](https://stackoverflow.com/questions/14541303/count-the-number-of-syllables-in-a-word). 63 64 Args: 65 word: A word to compute syllables for. 66 """ 67 global CMU 68 if not CMU: 69 CMU = cmudict.dict() 70 try: 71 return [ 72 len(list(y for y in x if y[-1].isdigit())) 73 for x in CMU[word.lower()] 74 ][0] 75 except KeyError: 76 # if word not found in cmudict 77 # referred from stackoverflow.com/questions/14541303/count-the-number-of-syllables-in-a-word 78 count = 0 79 word = word.lower() 80 if word[0] in VOWELS: 81 count += 1 82 for index in range(1, len(word)): 83 if word[index] in VOWELS and word[index - 1] not in VOWELS: 84 count += 1 85 if word.endswith("e"): 86 count -= 1 87 if word.endswith("le"): 88 count += 1 89 if count == 0: 90 count += 1 91 return count 92 93 94@lru_cache(maxsize=1024) 95def process_text_for_say(text: str) -> List[Tuple[str, int, List[str]]]: 96 """ 97 Get a list of phonemes + syllable counts for each word in a text. 98 Args: 99 text: A text to process. 100 """ 101 return [ 102 (word, word_to_syllable_count(word), word_to_say_phonemes(word)) 103 if word not in [",", ".", "?", "!", "-", ":", ";"] 104 else ("", 1, ["%"]) # silence 105 for word in word_tokenize(text) 106 ]
@lru_cache(maxsize=1024)
def
word_to_g2p_phonemes(text: str) -> List[str]:
32@lru_cache(maxsize=1024) 33def word_to_g2p_phonemes(text: str) -> List[str]: 34 """ 35 Extract g2p phonemes from a text. 36 37 Args: 38 text: A text to extract phonemes from 39 """ 40 global G2P 41 if not G2P: 42 G2P = G2p() 43 return G2P(text)
Extract g2p phonemes from a text.
Arguments:
- text: A text to extract phonemes from
def
word_to_say_phonemes(text: str) -> List[str]:
46def word_to_say_phonemes(text: str) -> List[str]: 47 """ 48 Extract Apple SpeechSynthesis phonemes from a text. 49 50 Args: 51 text: A text to extract phonemes from 52 """ 53 return [ 54 G2P_PHONEMES_TO_SAY_PHONEMES.get(p, "") 55 for p in word_to_g2p_phonemes(text) 56 ]
Extract Apple SpeechSynthesis phonemes from a text.
Arguments:
- text: A text to extract phonemes from
@lru_cache(maxsize=1024)
def
word_to_syllable_count(word: str) -> int:
59@lru_cache(maxsize=1024) 60def word_to_syllable_count(word: str) -> int: 61 """ 62 Compute the syllable count of a word, using CMU if the word is present in the 63 corpus, otherwise [follow this implementation](https://stackoverflow.com/questions/14541303/count-the-number-of-syllables-in-a-word). 64 65 Args: 66 word: A word to compute syllables for. 67 """ 68 global CMU 69 if not CMU: 70 CMU = cmudict.dict() 71 try: 72 return [ 73 len(list(y for y in x if y[-1].isdigit())) 74 for x in CMU[word.lower()] 75 ][0] 76 except KeyError: 77 # if word not found in cmudict 78 # referred from stackoverflow.com/questions/14541303/count-the-number-of-syllables-in-a-word 79 count = 0 80 word = word.lower() 81 if word[0] in VOWELS: 82 count += 1 83 for index in range(1, len(word)): 84 if word[index] in VOWELS and word[index - 1] not in VOWELS: 85 count += 1 86 if word.endswith("e"): 87 count -= 1 88 if word.endswith("le"): 89 count += 1 90 if count == 0: 91 count += 1 92 return count
Compute the syllable count of a word, using CMU if the word is present in the corpus, otherwise follow this implementation.
Arguments:
- word: A word to compute syllables for.
@lru_cache(maxsize=1024)
def
process_text_for_say(text: str) -> List[Tuple[str, int, List[str]]]:
95@lru_cache(maxsize=1024) 96def process_text_for_say(text: str) -> List[Tuple[str, int, List[str]]]: 97 """ 98 Get a list of phonemes + syllable counts for each word in a text. 99 Args: 100 text: A text to process. 101 """ 102 return [ 103 (word, word_to_syllable_count(word), word_to_say_phonemes(word)) 104 if word not in [",", ".", "?", "!", "-", ":", ";"] 105 else ("", 1, ["%"]) # silence 106 for word in word_tokenize(text) 107 ]
Get a list of phonemes + syllable counts for each word in a text.
Arguments:
- text: A text to process.