saysynth.constants

Constants for use throughout saysynth

View Source

  1"""
  2Constants for use throughout `saysynth`
  3"""
  4import os
  5
  6SAY_EXECUTABLE = os.getenv("SAYSYNTH_SAY_EXECUTABLE", "/usr/bin/say")
  7"""The path to where the `say` command exists."""
  8
  9SAY_FILE_FORMATS = ["wav", "aiff"]
 10"""Valid output file formats."""
 11
 12SAY_TUNED_VOICES = ["Alex", "Fred", "Victoria"]
 13"""`say` voices which respect [[TUNE]] input"""
 14
 15SAY_TUNE_TAG = "[[inpt TUNE]]"
 16"""Opening tag for tuned input to `say`"""
 17
 18SAY_COLORS = [
 19    "black",
 20    "red",
 21    "green",
 22    "yellow",
 23    "blue",
 24    "magenta",
 25    "cyan",
 26    "white",
 27]
 28"""Colors which can used to style the interactive output of `say`"""
 29
 30SAY_ENDIANNESS = ["BE", "LE"]
 31"""Values for --data-format to determine the endianness"""
 32
 33SAY_DATA_TYPES = ["F", "I", "UI"]
 34"""Values for --data-format to determine the data type"""
 35
 36SAY_SAMPLE_SIZES = [8, 16, 24, 32, 64]
 37"""Values for --data-format to determine the sample size"""
 38
 39SAY_FILE_FORMATS = {
 40    "mp3": "mp4f",
 41    "aiff": "AIFF",
 42    "aif": "AIFF",
 43    "flac": "FLAC",
 44    "m4a": "m4af",
 45    "wav": "WAVE",
 46}
 47"""Mapping of file extension to say's --file-format arguments"""
 48
 49SAY_BIG_ENDIAN_ONLY_FILE_FORMATS = ["AIFF", "m4af", "FLAC"]
 50"""List of say's --file-format arguments which accept only BE endianness"""
 51
 52SAY_DEFAULT_FLOAT_SAMPLE_SIZE = 32
 53"""The default sample size to use when the data_type is F"""
 54
 55SAY_VALID_FLOAT_SAMPLE_SIZES = [32, 64]
 56"""A list of valid sample sizes when the data_type is F"""
 57
 58SAY_MAX_SAMPLE_RATE = 22050
 59"""
 60This is the max sample rate,
 61anything above this [will generate up-sampled audio](https://stackoverflow.com/questions/9729153/error-on-say-when-output-format-is-wave)
 62"""
 63
 64SAY_ALL_PHONEMES = [
 65    "AE",
 66    "EY",
 67    "AO",
 68    "AX",
 69    "IY",
 70    "EH",
 71    "IH",
 72    "AY",
 73    "IX",
 74    "AA",
 75    "UW",
 76    "UH",
 77    "UX",
 78    "OW",
 79    "AW",
 80    "OY",
 81    "b",
 82    "C",
 83    "d",
 84    "D",
 85    "f",
 86    "g",
 87    "h",
 88    "J",
 89    "k",
 90    "l",
 91    "m",
 92    "n",
 93    "N",
 94    "p",
 95    "r",
 96    "s",
 97    "S",
 98    "t",
 99    "T",
100    "v",
101    "w",
102    "y",
103    "z",
104    "Z",
105]
106"""
107[A list of all valid phonemes to pass into `say`]( https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
108"""
109
110SAY_PHONEME_CLASSES = ["drone", "noise", "note"]
111""" Classes of phonemes as defined in scripts/classify_phonemes.py """
112
113SAY_PHONEME_VOICE_CLASSES = {
114    "Alex": {
115        "drone": [
116            "AE",
117            "EY",
118            "AO",
119            "AX",
120            "IY",
121            "EH",
122            "IH",
123            "AY",
124            "IX",
125            "UW",
126            "OW",
127            "OY",
128            "h",
129            "l",
130            "m",
131            "n",
132            "N",
133            "r",
134            "Z",
135        ],
136        "noise": [
137            "C",
138            "d",
139            "D",
140            "f",
141            "J",
142            "k",
143            "p",
144            "s",
145            "S",
146            "t",
147            "T",
148            "v",
149            "z",
150            "Z",
151        ],
152        "note": ["AA", "UH", "UX", "AW", "b", "g", "w", "y", "Z"],
153    },
154    "Fred": {
155        "drone": [
156            "AE",
157            "EY",
158            "AO",
159            "AX",
160            "IY",
161            "EH",
162            "IH",
163            "AY",
164            "IX",
165            "AA",
166            "UW",
167            "UH",
168            "UX",
169            "OW",
170            "AW",
171            "OY",
172            "D",
173            "l",
174            "m",
175            "n",
176            "N",
177            "r",
178            "v",
179            "w",
180            "y",
181            # "z",
182            # "Z",
183        ],
184        "note": [  # all of Fred's drones work as notes.
185            "AE",
186            "EY",
187            "AO",
188            "AX",
189            "IY",
190            "EH",
191            "IH",
192            "AY",
193            "IX",
194            "AA",
195            "UW",
196            "UH",
197            "UX",
198            "OW",
199            "AW",
200            "OY",
201            "D",
202            "l",
203            "m",
204            "n",
205            "N",
206            "r",
207            "v",
208            "w",
209            "y",
210            "z",
211            "Z",
212        ],
213        "noise": [
214            "b",
215            "C",
216            "d",
217            "f",
218            "g",
219            "h",
220            "J",
221            "k",
222            "p",
223            "s",
224            "S",
225            "t",
226            "T",
227        ],
228    },
229    "Victoria": {
230        "drone": [
231            "AE",
232            "EY",
233            "AO",
234            "AX",
235            "IY",
236            "EH",
237            "IH",
238            "AY",
239            "IX",
240            "AA",
241            "UW",
242            "UH",
243            "UX",
244            "AW",
245            "OY",
246            "l",
247            "m",
248            "n",
249            "N",
250            "r",
251            "v",
252            "w",
253            "y",
254        ],
255        "noise": [
256            "C",
257            "d",
258            "D",
259            "f",
260            "h",
261            "k",
262            "p",
263            "s",
264            "S",
265            "t",
266            "T",
267            "z",
268            "Z",
269        ],
270        "note": ["OW", "b", "g", "J"],
271    },
272}
273""" Generated by running scripts/classify_phonemes.py """
274
275SAY_PHONEME_SILENCE = "%"
276""" This is the sound of silence """
277
278G2P_PHONEMES_TO_SAY_PHONEMES = {
279    "AA0": "AA",
280    "AA1": "1AA",
281    "AA2": "2AA",
282    "AE0": "AE",
283    "AE1": "1AE",
284    "AE2": "2AE",
285    "AH0": "AAh",
286    "AH1": "1AAh",
287    "AH2": "2AAh",
288    "AO0": "AO",
289    "AO1": "1AO",
290    "AO2": "2AO",
291    "AW0": "AW",
292    "AW1": "1AW",
293    "AW2": "2AW",
294    "AY0": "AY",
295    "AY1": "1AY",
296    "AY2": "2AY",
297    "B": "b",
298    "CH": "C",
299    "D": "d",
300    "DH": "T",
301    "EH0": "EH",
302    "EH1": "1EH",
303    "EH2": "2EH",
304    "ER0": "AXr",
305    "ER1": "1AXr",
306    "ER2": "2AXr",
307    "EY0": "EY",
308    "EY1": "1EY",
309    "EY2": "2EY",
310    "F": "f",
311    "G": "g",
312    "HH": "h",
313    "IH0": "IH",
314    "IH1": "1IH",
315    "IH2": "2IH",
316    "IY0": "IY",
317    "IY1": "1IY",
318    "IY2": "2IY",
319    "JH": "J",
320    "K": "k",
321    "L": "l",
322    "M": "m",
323    "N": "n",
324    "NG": "N",
325    "OW0": "OW",
326    "OW1": "1OW",
327    "OW2": "2OW",
328    "OY0": "OY",
329    "OY1": "1OY",
330    "OY2": "2OY",
331    "P": "p",
332    "R": "r",
333    "S": "s",
334    "SH": "S",
335    "T": "t",
336    "TH": "T",
337    "UH0": "UH",
338    "UH1": "1UH",
339    "UH2": "2UH",
340    "UW": "UW",
341    "UW0": "0UW",
342    "UW1": "1UW",
343    "UW2": "2UW",
344    "V": "v",
345    "W": "w",
346    "Y": "y",
347    "Z": "z",
348    "ZH": "Z",
349}
350"""
351A lookup between phonemes in [G2P](https://github.com/Kyubyong/g2p/blob/master/g2p_en/g2p.py#L55)
352and [say](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
353"""
354
355SAY_SEGMENT_MAX_DURATION = 1200
356"""
357The number of milliseconds at which the duration
358of an individual phoneme stops changing
359"""
360
361SAY_SEGMENT_SILENCE_DURATION = 1000
362"""
363This is the number of milliseconds to use for an individual segment of silence.
364"""
365
366SAY_EMPHASIS = [75, 100]
367"""
368The midi velocity values above which we add an emphasis to a phoneme.
369"""
370
371SAY_VOLUME_RANGE = [0.0, 1.0]
372"""
373The min and max range of volume levels to map to from midi velocities.
374"""
375
376SAY_VOLUME_LEVEL_PER_NOTE = 2
377"""
378The number of notes per sequence to show volume tags.
379Including too many volume tags in a single command can cause random drop-outs.
380"""
381
382SAY_VOLUME_LEVEL_PER_SEGMENT = 4
383"""
384The number of segments per note to show volume tags.
385Including too many volume tags in a single command can cause random drop-outs.
386"""
387
388DEFAULT_SEQUENCE_NAME = "sy"
389"""
390The sequence name to assign to a process when launched outside the context
391of a sequence.
392"""
393
394DEFAULT_BPM_TIME_SIG = "4/4"
395DEFAULT_BPM_TIME_BPM = 120
396DEFAULT_BPM_TIME_COUNT = 1
397
398SAY_EXTRA_OPTION_DELIMITER = "__"

SAY_EXECUTABLE = '/usr/bin/say'

The path to where the say command exists.

SAY_FILE_FORMATS = {'mp3': 'mp4f', 'aiff': 'AIFF', 'aif': 'AIFF', 'flac': 'FLAC', 'm4a': 'm4af', 'wav': 'WAVE'}

Mapping of file extension to say's --file-format arguments

SAY_TUNED_VOICES = ['Alex', 'Fred', 'Victoria']

say voices which respect [[TUNE]] input

SAY_TUNE_TAG = '[[inpt TUNE]]'

Opening tag for tuned input to say

SAY_COLORS = ['black', 'red', 'green', 'yellow', 'blue', 'magenta', 'cyan', 'white']

Colors which can used to style the interactive output of say

SAY_ENDIANNESS = ['BE', 'LE']

Values for --data-format to determine the endianness

SAY_DATA_TYPES = ['F', 'I', 'UI']

Values for --data-format to determine the data type

SAY_SAMPLE_SIZES = [8, 16, 24, 32, 64]

Values for --data-format to determine the sample size

SAY_BIG_ENDIAN_ONLY_FILE_FORMATS = ['AIFF', 'm4af', 'FLAC']

List of say's --file-format arguments which accept only BE endianness

SAY_DEFAULT_FLOAT_SAMPLE_SIZE = 32

The default sample size to use when the data_type is F

SAY_VALID_FLOAT_SAMPLE_SIZES = [32, 64]

A list of valid sample sizes when the data_type is F

SAY_MAX_SAMPLE_RATE = 22050

This is the max sample rate, anything above this will generate up-sampled audio

SAY_ALL_PHONEMES = ['AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', 'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k', 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z']

A list of all valid phonemes to pass into say.

SAY_PHONEME_CLASSES = ['drone', 'noise', 'note']

Classes of phonemes as defined in scripts/classify_phonemes.py

SAY_PHONEME_VOICE_CLASSES = {'Alex': {'drone': ['AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'UW', 'OW', 'OY', 'h', 'l', 'm', 'n', 'N', 'r', 'Z'], 'noise': ['C', 'd', 'D', 'f', 'J', 'k', 'p', 's', 'S', 't', 'T', 'v', 'z', 'Z'], 'note': ['AA', 'UH', 'UX', 'AW', 'b', 'g', 'w', 'y', 'Z']}, 'Fred': {'drone': ['AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', 'UH', 'UX', 'OW', 'AW', 'OY', 'D', 'l', 'm', 'n', 'N', 'r', 'v', 'w', 'y'], 'note': ['AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', 'UH', 'UX', 'OW', 'AW', 'OY', 'D', 'l', 'm', 'n', 'N', 'r', 'v', 'w', 'y', 'z', 'Z'], 'noise': ['b', 'C', 'd', 'f', 'g', 'h', 'J', 'k', 'p', 's', 'S', 't', 'T']}, 'Victoria': {'drone': ['AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', 'UH', 'UX', 'AW', 'OY', 'l', 'm', 'n', 'N', 'r', 'v', 'w', 'y'], 'noise': ['C', 'd', 'D', 'f', 'h', 'k', 'p', 's', 'S', 't', 'T', 'z', 'Z'], 'note': ['OW', 'b', 'g', 'J']}}

Generated by running scripts/classify_phonemes.py

SAY_PHONEME_SILENCE = '%'

This is the sound of silence

G2P_PHONEMES_TO_SAY_PHONEMES = {'AA0': 'AA', 'AA1': '1AA', 'AA2': '2AA', 'AE0': 'AE', 'AE1': '1AE', 'AE2': '2AE', 'AH0': 'AAh', 'AH1': '1AAh', 'AH2': '2AAh', 'AO0': 'AO', 'AO1': '1AO', 'AO2': '2AO', 'AW0': 'AW', 'AW1': '1AW', 'AW2': '2AW', 'AY0': 'AY', 'AY1': '1AY', 'AY2': '2AY', 'B': 'b', 'CH': 'C', 'D': 'd', 'DH': 'T', 'EH0': 'EH', 'EH1': '1EH', 'EH2': '2EH', 'ER0': 'AXr', 'ER1': '1AXr', 'ER2': '2AXr', 'EY0': 'EY', 'EY1': '1EY', 'EY2': '2EY', 'F': 'f', 'G': 'g', 'HH': 'h', 'IH0': 'IH', 'IH1': '1IH', 'IH2': '2IH', 'IY0': 'IY', 'IY1': '1IY', 'IY2': '2IY', 'JH': 'J', 'K': 'k', 'L': 'l', 'M': 'm', 'N': 'n', 'NG': 'N', 'OW0': 'OW', 'OW1': '1OW', 'OW2': '2OW', 'OY0': 'OY', 'OY1': '1OY', 'OY2': '2OY', 'P': 'p', 'R': 'r', 'S': 's', 'SH': 'S', 'T': 't', 'TH': 'T', 'UH0': 'UH', 'UH1': '1UH', 'UH2': '2UH', 'UW': 'UW', 'UW0': '0UW', 'UW1': '1UW', 'UW2': '2UW', 'V': 'v', 'W': 'w', 'Y': 'y', 'Z': 'z', 'ZH': 'Z'}

A lookup between phonemes in G2P and say.

SAY_SEGMENT_MAX_DURATION = 1200

The number of milliseconds at which the duration of an individual phoneme stops changing

SAY_SEGMENT_SILENCE_DURATION = 1000

This is the number of milliseconds to use for an individual segment of silence.

SAY_EMPHASIS = [75, 100]

The midi velocity values above which we add an emphasis to a phoneme.

SAY_VOLUME_RANGE = [0.0, 1.0]

The min and max range of volume levels to map to from midi velocities.

SAY_VOLUME_LEVEL_PER_NOTE = 2

The number of notes per sequence to show volume tags. Including too many volume tags in a single command can cause random drop-outs.

SAY_VOLUME_LEVEL_PER_SEGMENT = 4

The number of segments per note to show volume tags. Including too many volume tags in a single command can cause random drop-outs.

DEFAULT_SEQUENCE_NAME = 'sy'

The sequence name to assign to a process when launched outside the context of a sequence.