saysynth.constants
Constants for use throughout saysynth
1""" 2Constants for use throughout `saysynth` 3""" 4import os 5 6SAY_EXECUTABLE = os.getenv("SAYSYNTH_SAY_EXECUTABLE", "/usr/bin/say") 7"""The path to where the `say` command exists.""" 8 9SAY_FILE_FORMATS = ["wav", "aiff"] 10"""Valid output file formats.""" 11 12SAY_TUNED_VOICES = ["Alex", "Fred", "Victoria"] 13"""`say` voices which respect [[TUNE]] input""" 14 15SAY_TUNE_TAG = "[[inpt TUNE]]" 16"""Opening tag for tuned input to `say`""" 17 18SAY_COLORS = [ 19 "black", 20 "red", 21 "green", 22 "yellow", 23 "blue", 24 "magenta", 25 "cyan", 26 "white", 27] 28"""Colors which can used to style the interactive output of `say`""" 29 30SAY_ENDIANNESS = ["BE", "LE"] 31"""Values for --data-format to determine the endianness""" 32 33SAY_DATA_TYPES = ["F", "I", "UI"] 34"""Values for --data-format to determine the data type""" 35 36SAY_SAMPLE_SIZES = [8, 16, 24, 32, 64] 37"""Values for --data-format to determine the sample size""" 38 39SAY_FILE_FORMATS = { 40 "mp3": "mp4f", 41 "aiff": "AIFF", 42 "aif": "AIFF", 43 "flac": "FLAC", 44 "m4a": "m4af", 45 "wav": "WAVE", 46} 47"""Mapping of file extension to say's --file-format arguments""" 48 49SAY_BIG_ENDIAN_ONLY_FILE_FORMATS = ["AIFF", "m4af", "FLAC"] 50"""List of say's --file-format arguments which accept only BE endianness""" 51 52SAY_DEFAULT_FLOAT_SAMPLE_SIZE = 32 53"""The default sample size to use when the data_type is F""" 54 55SAY_VALID_FLOAT_SAMPLE_SIZES = [32, 64] 56"""A list of valid sample sizes when the data_type is F""" 57 58SAY_MAX_SAMPLE_RATE = 22050 59""" 60This is the max sample rate, 61anything above this [will generate up-sampled audio](https://stackoverflow.com/questions/9729153/error-on-say-when-output-format-is-wave) 62""" 63 64SAY_ALL_PHONEMES = [ 65 "AE", 66 "EY", 67 "AO", 68 "AX", 69 "IY", 70 "EH", 71 "IH", 72 "AY", 73 "IX", 74 "AA", 75 "UW", 76 "UH", 77 "UX", 78 "OW", 79 "AW", 80 "OY", 81 "b", 82 "C", 83 "d", 84 "D", 85 "f", 86 "g", 87 "h", 88 "J", 89 "k", 90 "l", 91 "m", 92 "n", 93 "N", 94 "p", 95 "r", 96 "s", 97 "S", 98 "t", 99 "T", 100 "v", 101 "w", 102 "y", 103 "z", 104 "Z", 105] 106""" 107[A list of all valid phonemes to pass into `say`]( https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 108""" 109 110SAY_PHONEME_CLASSES = ["drone", "noise", "note"] 111""" Classes of phonemes as defined in scripts/classify_phonemes.py """ 112 113SAY_PHONEME_VOICE_CLASSES = { 114 "Alex": { 115 "drone": [ 116 "AE", 117 "EY", 118 "AO", 119 "AX", 120 "IY", 121 "EH", 122 "IH", 123 "AY", 124 "IX", 125 "UW", 126 "OW", 127 "OY", 128 "h", 129 "l", 130 "m", 131 "n", 132 "N", 133 "r", 134 "Z", 135 ], 136 "noise": [ 137 "C", 138 "d", 139 "D", 140 "f", 141 "J", 142 "k", 143 "p", 144 "s", 145 "S", 146 "t", 147 "T", 148 "v", 149 "z", 150 "Z", 151 ], 152 "note": ["AA", "UH", "UX", "AW", "b", "g", "w", "y", "Z"], 153 }, 154 "Fred": { 155 "drone": [ 156 "AE", 157 "EY", 158 "AO", 159 "AX", 160 "IY", 161 "EH", 162 "IH", 163 "AY", 164 "IX", 165 "AA", 166 "UW", 167 "UH", 168 "UX", 169 "OW", 170 "AW", 171 "OY", 172 "D", 173 "l", 174 "m", 175 "n", 176 "N", 177 "r", 178 "v", 179 "w", 180 "y", 181 # "z", 182 # "Z", 183 ], 184 "note": [ # all of Fred's drones work as notes. 185 "AE", 186 "EY", 187 "AO", 188 "AX", 189 "IY", 190 "EH", 191 "IH", 192 "AY", 193 "IX", 194 "AA", 195 "UW", 196 "UH", 197 "UX", 198 "OW", 199 "AW", 200 "OY", 201 "D", 202 "l", 203 "m", 204 "n", 205 "N", 206 "r", 207 "v", 208 "w", 209 "y", 210 "z", 211 "Z", 212 ], 213 "noise": [ 214 "b", 215 "C", 216 "d", 217 "f", 218 "g", 219 "h", 220 "J", 221 "k", 222 "p", 223 "s", 224 "S", 225 "t", 226 "T", 227 ], 228 }, 229 "Victoria": { 230 "drone": [ 231 "AE", 232 "EY", 233 "AO", 234 "AX", 235 "IY", 236 "EH", 237 "IH", 238 "AY", 239 "IX", 240 "AA", 241 "UW", 242 "UH", 243 "UX", 244 "AW", 245 "OY", 246 "l", 247 "m", 248 "n", 249 "N", 250 "r", 251 "v", 252 "w", 253 "y", 254 ], 255 "noise": [ 256 "C", 257 "d", 258 "D", 259 "f", 260 "h", 261 "k", 262 "p", 263 "s", 264 "S", 265 "t", 266 "T", 267 "z", 268 "Z", 269 ], 270 "note": ["OW", "b", "g", "J"], 271 }, 272} 273""" Generated by running scripts/classify_phonemes.py """ 274 275SAY_PHONEME_SILENCE = "%" 276""" This is the sound of silence """ 277 278G2P_PHONEMES_TO_SAY_PHONEMES = { 279 "AA0": "AA", 280 "AA1": "1AA", 281 "AA2": "2AA", 282 "AE0": "AE", 283 "AE1": "1AE", 284 "AE2": "2AE", 285 "AH0": "AAh", 286 "AH1": "1AAh", 287 "AH2": "2AAh", 288 "AO0": "AO", 289 "AO1": "1AO", 290 "AO2": "2AO", 291 "AW0": "AW", 292 "AW1": "1AW", 293 "AW2": "2AW", 294 "AY0": "AY", 295 "AY1": "1AY", 296 "AY2": "2AY", 297 "B": "b", 298 "CH": "C", 299 "D": "d", 300 "DH": "T", 301 "EH0": "EH", 302 "EH1": "1EH", 303 "EH2": "2EH", 304 "ER0": "AXr", 305 "ER1": "1AXr", 306 "ER2": "2AXr", 307 "EY0": "EY", 308 "EY1": "1EY", 309 "EY2": "2EY", 310 "F": "f", 311 "G": "g", 312 "HH": "h", 313 "IH0": "IH", 314 "IH1": "1IH", 315 "IH2": "2IH", 316 "IY0": "IY", 317 "IY1": "1IY", 318 "IY2": "2IY", 319 "JH": "J", 320 "K": "k", 321 "L": "l", 322 "M": "m", 323 "N": "n", 324 "NG": "N", 325 "OW0": "OW", 326 "OW1": "1OW", 327 "OW2": "2OW", 328 "OY0": "OY", 329 "OY1": "1OY", 330 "OY2": "2OY", 331 "P": "p", 332 "R": "r", 333 "S": "s", 334 "SH": "S", 335 "T": "t", 336 "TH": "T", 337 "UH0": "UH", 338 "UH1": "1UH", 339 "UH2": "2UH", 340 "UW": "UW", 341 "UW0": "0UW", 342 "UW1": "1UW", 343 "UW2": "2UW", 344 "V": "v", 345 "W": "w", 346 "Y": "y", 347 "Z": "z", 348 "ZH": "Z", 349} 350""" 351A lookup between phonemes in [G2P](https://github.com/Kyubyong/g2p/blob/master/g2p_en/g2p.py#L55) 352and [say](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 353""" 354 355SAY_SEGMENT_MAX_DURATION = 1200 356""" 357The number of milliseconds at which the duration 358of an individual phoneme stops changing 359""" 360 361SAY_SEGMENT_SILENCE_DURATION = 1000 362""" 363This is the number of milliseconds to use for an individual segment of silence. 364""" 365 366SAY_EMPHASIS = [75, 100] 367""" 368The midi velocity values above which we add an emphasis to a phoneme. 369""" 370 371SAY_VOLUME_RANGE = [0.0, 1.0] 372""" 373The min and max range of volume levels to map to from midi velocities. 374""" 375 376SAY_VOLUME_LEVEL_PER_NOTE = 2 377""" 378The number of notes per sequence to show volume tags. 379Including too many volume tags in a single command can cause random drop-outs. 380""" 381 382SAY_VOLUME_LEVEL_PER_SEGMENT = 4 383""" 384The number of segments per note to show volume tags. 385Including too many volume tags in a single command can cause random drop-outs. 386""" 387 388DEFAULT_SEQUENCE_NAME = "sy" 389""" 390The sequence name to assign to a process when launched outside the context 391of a sequence. 392""" 393 394DEFAULT_BPM_TIME_SIG = "4/4" 395DEFAULT_BPM_TIME_BPM = 120 396DEFAULT_BPM_TIME_COUNT = 1 397 398SAY_EXTRA_OPTION_DELIMITER = "__"
The path to where the say
command exists.
Mapping of file extension to say's --file-format arguments
say
voices which respect [[TUNE]] input
Opening tag for tuned input to say
Colors which can used to style the interactive output of say
Values for --data-format to determine the endianness
Values for --data-format to determine the data type
Values for --data-format to determine the sample size
List of say's --file-format arguments which accept only BE endianness
The default sample size to use when the data_type is F
A list of valid sample sizes when the data_type is F
This is the max sample rate, anything above this will generate up-sampled audio
Classes of phonemes as defined in scripts/classify_phonemes.py
Generated by running scripts/classify_phonemes.py
This is the sound of silence
The number of milliseconds at which the duration of an individual phoneme stops changing
This is the number of milliseconds to use for an individual segment of silence.
The midi velocity values above which we add an emphasis to a phoneme.
The min and max range of volume levels to map to from midi velocities.
The number of notes per sequence to show volume tags. Including too many volume tags in a single command can cause random drop-outs.
The number of segments per note to show volume tags. Including too many volume tags in a single command can cause random drop-outs.
The sequence name to assign to a process when launched outside the context of a sequence.