saysynth.lib.say
A Python-wrapper for Apple's say
command.
1""" 2A Python-wrapper for Apple's [`say`](https://ss64.com/osx/say.html) command. 3""" 4 5import os 6import subprocess 7import warnings 8from typing import Any, Dict, List, Optional 9 10from ..constants import (SAY_BIG_ENDIAN_ONLY_FILE_FORMATS, SAY_COLORS, 11 SAY_DATA_TYPES, SAY_DEFAULT_FLOAT_SAMPLE_SIZE, 12 SAY_ENDIANNESS, SAY_EXECUTABLE, SAY_FILE_FORMATS, 13 SAY_MAX_SAMPLE_RATE, SAY_SAMPLE_SIZES, 14 SAY_VALID_FLOAT_SAMPLE_SIZES) 15from ..core import controller 16from ..utils import make_tempfile 17 18 19def _gen_data_format_arg( 20 file_format: str, 21 endianness: str, 22 data_type: str, 23 sample_size: int, 24 sample_rate: int, 25): 26 """ 27 Generate a string to pass to --data-format 28 """ 29 if endianness not in SAY_ENDIANNESS: 30 raise ValueError( 31 "Invalid `endianess`. Choose from: LE (little endian) or BE (big endian)" 32 ) 33 if data_type not in SAY_DATA_TYPES: 34 raise ValueError( 35 "Invalid `data_type`. Choose from: F (float), I (integer), UI (unsigned integer)" 36 ) 37 if sample_size not in SAY_SAMPLE_SIZES: 38 raise ValueError( 39 f'Invalid `sample_size`. Choose from: {", ".join(SAY_SAMPLE_SIZES)}' 40 ) 41 42 # allow pass passing sample rate as small number (eg: 24 -> 24000) 43 if sample_rate < 1000: 44 sample_rate *= 1000 45 46 # don't allow a sample rate greater than the maximum 47 if sample_rate > SAY_MAX_SAMPLE_RATE: 48 sample_rate = SAY_MAX_SAMPLE_RATE 49 50 # big endian-only formats: 51 if file_format in SAY_BIG_ENDIAN_ONLY_FILE_FORMATS and file_format != "BE": 52 msg = ( 53 f"file_format '{file_format}' only accepts and endianness of 'BE'" 54 ) 55 warnings.warn(msg, SyntaxWarning) 56 endianness = "BE" 57 58 # check sample size by data_type 59 if data_type == "F" and sample_size not in SAY_VALID_FLOAT_SAMPLE_SIZES: 60 msg = f"data_type 'F' only accepts sample_sizes of '32' and '64', setting '{sample_size}' to '{SAY_DEFAULT_FLOAT_SAMPLE_SIZE}'" 61 warnings.warn(msg, SyntaxWarning) 62 sample_size = SAY_DEFAULT_FLOAT_SAMPLE_SIZE 63 64 return f"{endianness}{data_type}{sample_size}@{int(sample_rate)}" 65 66 67def _gen_interactive_arg(text_color: str = "white", bg_color: str = "black"): 68 """ 69 Generate a string to pass to --interactive 70 """ 71 if bg_color and not text_color: 72 text_color = ( 73 "white" # default text color if only background is supplied 74 ) 75 if text_color not in SAY_COLORS: 76 raise ValueError( 77 f'Invalid `text_color`, choose from: {", ".join(SAY_COLORS)}' 78 ) 79 if bg_color not in SAY_COLORS: 80 raise ValueError( 81 f'Invalid `bg_color`, choose from: {", ".join(SAY_COLORS)}' 82 ) 83 return f"--interactive={text_color}/{bg_color}" 84 85 86def cmd( 87 input_text: Optional[str] = None, 88 voice: Optional[str] = None, 89 rate: Optional[int] = None, 90 input_file: Optional[str] = None, 91 audio_output_file: Optional[str] = None, 92 service_name: Optional[str] = None, 93 network_send: Optional[str] = None, 94 audio_device: Optional[str] = None, 95 stereo: bool = False, # whether or not 96 endianness: str = "LE", # LE/BE 97 data_type: str = "I", # F/I/UI 98 sample_size: Optional[int] = 8, 99 sample_rate: Optional[int] = 22050, 100 quality: int = 127, 101 progress: bool = False, 102 interactive: bool = False, 103 text_color: Optional[str] = None, 104 bg_color: Optional[str] = None, 105 executable: str = SAY_EXECUTABLE, 106 **kwargs, 107) -> List[str]: 108 """ 109 A python wrapper around the say command. 110 111 Args: 112 input_text: The text to speak 113 voice: Specify the voice to be used. Default is the voice selected in System Preferences. To obtain a list of voices installed in the system, specify "?" as the voice name. 114 rate: Speech rate to be used, in words per minute. 115 input_file: Specify a file to be spoken. If file is - or neither this parameter nor a message is specified, read from standard input. 116 audio_output_file: Specify the path for an audio file to be written. AIFF is the default and should be supported for most voices, but some 117 voices support many more file formats. 118 service_name: Specify a service name (default "AUNetSend") 119 network_send: Specify an IP and port to be used for redirecting the speech output through AUNetSend. 120 audio_device: Specify, by ID or name prefix, an audio device to be used to play the audio. To obtain a list of audio output devices, specify "?" as the device name. 121 stereo: Whether or not to output a stereo signal 122 endianness: str = "LE", # LE/BE 123 data_type: str = "F", # F/I/U 124 sample_size: One of 8, 16, 24, 32, 64. 125 sample_rate: Optional[int] = 22050, 126 quality: The audio converter quality level between 0 (lowest) and 127 (highest). 127 progress: Display a progress meter during synthesis. 128 interactive: Print the text line by line during synthesis, highlighting words as they are spoken. Markup can be one of: 129 * A terminfo capability as described in terminfo(5), e.g. bold, smul, setaf 1.: 130 * A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.: 131 * A foreground and background color from the above list, separated by a slash, e.g. green/black. If the foreground color is omitted, only the background color is set.: 132 * If markup is not specified, it defaults to smso, i.e. reverse video.: 133 * If the input is a TTY, text is spoken line by line, and the output file, if specified, will only contain audio for the last line of the input. Otherwise, text is spoken all at once. 134 text_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white. 135 bg_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white. 136 executable: The path to the `say` executable (default '/usr/bin/say') 137 138 """ # noqa: E501 139 if not input_text and not input_file: 140 raise ValueError("Must provide `input_text` or `input_file`") 141 142 # override text if input file is provided 143 if input_file: 144 # verify that input file exists 145 if not os.path.exists(input_file): 146 raise ValueError("`input_file`: {input_file} does not exist!") 147 148 # verify quality 149 if quality < 0 or quality > 127: 150 raise ValueError("`quality` must be between 0 and 127") 151 152 # construct base command 153 cmd = [executable] 154 if input_text: 155 cmd.append(input_text) 156 elif input_file: 157 cmd.extend(["-f", input_file]) 158 if voice: 159 cmd.extend(["-v", voice]) 160 if rate: 161 cmd.extend(["-r", rate]) 162 163 if audio_output_file: 164 # verify file_format: 165 extension = audio_output_file.lower().split(".")[-1] 166 if extension not in SAY_FILE_FORMATS: 167 raise ValueError( 168 f"Invalid extension: '.{extension}'. Choose from: {', '.join(SAY_FILE_FORMATS.keys())}" 169 ) 170 file_format = SAY_FILE_FORMATS.get(extension) 171 cmd.extend(["--file-format", file_format]) 172 cmd.extend(["-o", audio_output_file]) 173 data_format = _gen_data_format_arg( 174 file_format, endianness, data_type, sample_size, sample_rate 175 ) 176 cmd.extend(["--data-format", data_format]) 177 178 if stereo: 179 cmd.append("--channels=2") 180 else: 181 cmd.append(f"--quality={quality}") 182 # handle network output if output file is not specified 183 if service_name: 184 cmd.extend(["-n", service_name]) 185 if network_send: 186 cmd.extend(f"--network-send={network_send}") 187 if audio_device: 188 cmd.extend(["-a", audio_device]) 189 190 # progress bar 191 if progress: 192 cmd.append("--progress") 193 194 # interactivity 195 if interactive: 196 cmd.append(_gen_interactive_arg(text_color, bg_color)) 197 args = [str(a) for a in cmd] 198 # TODO: setup debug logging 199 # msg = f"Executing say command:\n$ {' '.join(args)}" 200 # print(msg) 201 return args 202 203 204def run(args: Optional[List] = None, **kwargs) -> None: 205 """ 206 Execute a command given a list of arguments outputted by `cmd` 207 or by supplying the kwargs that `cmd` accepts 208 Args: 209 args: A list of args generated by `cmd` 210 """ 211 wait_for_process = kwargs.pop("wait", False) 212 if not args: 213 parent_pid = kwargs.pop("parent_pid", os.getpid()) 214 parent_pid_file = kwargs.pop("parent_pid_file", None) 215 # write text as input file 216 tempfile = None 217 if kwargs.get("input_text") and not kwargs.get("input_file", None): 218 text = kwargs.pop("input_text") 219 # override text with tempfile 220 tempfile = make_tempfile() 221 with open(tempfile, "w") as f: 222 f.write(text) 223 kwargs["input_file"] = tempfile 224 args = cmd(**kwargs) 225 process = None 226 try: 227 process = subprocess.Popen(args, stdout=subprocess.PIPE) 228 # register each process with the parent pid. 229 controller.add_child_pid(process.pid, parent_pid, parent_pid_file) 230 if wait_for_process: 231 process.wait() 232 except KeyboardInterrupt: 233 pass 234 235 236def _run_spawn(kwargs: Dict[str, Any]) -> None: 237 """ 238 Utility for passing kwargs into `run` within `spawn` 239 """ 240 return run(**kwargs) 241 242 243def spawn(commands: List[Dict[str, Any]]) -> None: 244 """ 245 Spawn multiple say processes in parallel by 246 passing in a list of commands generated by `cmd` 247 248 Args: 249 commands: A list of command args generated by `cmd` 250 """ 251 for command in commands: 252 _run_spawn(command)
def
cmd( input_text: Optional[str] = None, voice: Optional[str] = None, rate: Optional[int] = None, input_file: Optional[str] = None, audio_output_file: Optional[str] = None, service_name: Optional[str] = None, network_send: Optional[str] = None, audio_device: Optional[str] = None, stereo: bool = False, endianness: str = 'LE', data_type: str = 'I', sample_size: Optional[int] = 8, sample_rate: Optional[int] = 22050, quality: int = 127, progress: bool = False, interactive: bool = False, text_color: Optional[str] = None, bg_color: Optional[str] = None, executable: str = '/usr/bin/say', **kwargs) -> List[str]:
87def cmd( 88 input_text: Optional[str] = None, 89 voice: Optional[str] = None, 90 rate: Optional[int] = None, 91 input_file: Optional[str] = None, 92 audio_output_file: Optional[str] = None, 93 service_name: Optional[str] = None, 94 network_send: Optional[str] = None, 95 audio_device: Optional[str] = None, 96 stereo: bool = False, # whether or not 97 endianness: str = "LE", # LE/BE 98 data_type: str = "I", # F/I/UI 99 sample_size: Optional[int] = 8, 100 sample_rate: Optional[int] = 22050, 101 quality: int = 127, 102 progress: bool = False, 103 interactive: bool = False, 104 text_color: Optional[str] = None, 105 bg_color: Optional[str] = None, 106 executable: str = SAY_EXECUTABLE, 107 **kwargs, 108) -> List[str]: 109 """ 110 A python wrapper around the say command. 111 112 Args: 113 input_text: The text to speak 114 voice: Specify the voice to be used. Default is the voice selected in System Preferences. To obtain a list of voices installed in the system, specify "?" as the voice name. 115 rate: Speech rate to be used, in words per minute. 116 input_file: Specify a file to be spoken. If file is - or neither this parameter nor a message is specified, read from standard input. 117 audio_output_file: Specify the path for an audio file to be written. AIFF is the default and should be supported for most voices, but some 118 voices support many more file formats. 119 service_name: Specify a service name (default "AUNetSend") 120 network_send: Specify an IP and port to be used for redirecting the speech output through AUNetSend. 121 audio_device: Specify, by ID or name prefix, an audio device to be used to play the audio. To obtain a list of audio output devices, specify "?" as the device name. 122 stereo: Whether or not to output a stereo signal 123 endianness: str = "LE", # LE/BE 124 data_type: str = "F", # F/I/U 125 sample_size: One of 8, 16, 24, 32, 64. 126 sample_rate: Optional[int] = 22050, 127 quality: The audio converter quality level between 0 (lowest) and 127 (highest). 128 progress: Display a progress meter during synthesis. 129 interactive: Print the text line by line during synthesis, highlighting words as they are spoken. Markup can be one of: 130 * A terminfo capability as described in terminfo(5), e.g. bold, smul, setaf 1.: 131 * A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.: 132 * A foreground and background color from the above list, separated by a slash, e.g. green/black. If the foreground color is omitted, only the background color is set.: 133 * If markup is not specified, it defaults to smso, i.e. reverse video.: 134 * If the input is a TTY, text is spoken line by line, and the output file, if specified, will only contain audio for the last line of the input. Otherwise, text is spoken all at once. 135 text_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white. 136 bg_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white. 137 executable: The path to the `say` executable (default '/usr/bin/say') 138 139 """ # noqa: E501 140 if not input_text and not input_file: 141 raise ValueError("Must provide `input_text` or `input_file`") 142 143 # override text if input file is provided 144 if input_file: 145 # verify that input file exists 146 if not os.path.exists(input_file): 147 raise ValueError("`input_file`: {input_file} does not exist!") 148 149 # verify quality 150 if quality < 0 or quality > 127: 151 raise ValueError("`quality` must be between 0 and 127") 152 153 # construct base command 154 cmd = [executable] 155 if input_text: 156 cmd.append(input_text) 157 elif input_file: 158 cmd.extend(["-f", input_file]) 159 if voice: 160 cmd.extend(["-v", voice]) 161 if rate: 162 cmd.extend(["-r", rate]) 163 164 if audio_output_file: 165 # verify file_format: 166 extension = audio_output_file.lower().split(".")[-1] 167 if extension not in SAY_FILE_FORMATS: 168 raise ValueError( 169 f"Invalid extension: '.{extension}'. Choose from: {', '.join(SAY_FILE_FORMATS.keys())}" 170 ) 171 file_format = SAY_FILE_FORMATS.get(extension) 172 cmd.extend(["--file-format", file_format]) 173 cmd.extend(["-o", audio_output_file]) 174 data_format = _gen_data_format_arg( 175 file_format, endianness, data_type, sample_size, sample_rate 176 ) 177 cmd.extend(["--data-format", data_format]) 178 179 if stereo: 180 cmd.append("--channels=2") 181 else: 182 cmd.append(f"--quality={quality}") 183 # handle network output if output file is not specified 184 if service_name: 185 cmd.extend(["-n", service_name]) 186 if network_send: 187 cmd.extend(f"--network-send={network_send}") 188 if audio_device: 189 cmd.extend(["-a", audio_device]) 190 191 # progress bar 192 if progress: 193 cmd.append("--progress") 194 195 # interactivity 196 if interactive: 197 cmd.append(_gen_interactive_arg(text_color, bg_color)) 198 args = [str(a) for a in cmd] 199 # TODO: setup debug logging 200 # msg = f"Executing say command:\n$ {' '.join(args)}" 201 # print(msg) 202 return args
A python wrapper around the say command.
Arguments:
- input_text: The text to speak
- voice: Specify the voice to be used. Default is the voice selected in System Preferences. To obtain a list of voices installed in the system, specify "?" as the voice name.
- rate: Speech rate to be used, in words per minute.
- input_file: Specify a file to be spoken. If file is - or neither this parameter nor a message is specified, read from standard input.
- audio_output_file: Specify the path for an audio file to be written. AIFF is the default and should be supported for most voices, but some
- voices support many more file formats.
- service_name: Specify a service name (default "AUNetSend")
- network_send: Specify an IP and port to be used for redirecting the speech output through AUNetSend.
- audio_device: Specify, by ID or name prefix, an audio device to be used to play the audio. To obtain a list of audio output devices, specify "?" as the device name.
- stereo: Whether or not to output a stereo signal
- endianness: str = "LE", # LE/BE
- data_type: str = "F", # F/I/U
- sample_size: One of 8, 16, 24, 32, 64.
- sample_rate: Optional[int] = 22050,
- quality: The audio converter quality level between 0 (lowest) and 127 (highest).
- progress: Display a progress meter during synthesis.
- interactive: Print the text line by line during synthesis, highlighting words as they are spoken. Markup can be one of:
- A terminfo capability as described in terminfo(5), e.g. bold, smul, setaf 1.:
- A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.:
- A foreground and background color from the above list, separated by a slash, e.g. green/black. If the foreground color is omitted, only the background color is set.:
- If markup is not specified, it defaults to smso, i.e. reverse video.:
- If the input is a TTY, text is spoken line by line, and the output file, if specified, will only contain audio for the last line of the input. Otherwise, text is spoken all at once.
- text_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
- bg_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
- executable: The path to the
say
executable (default '/usr/bin/say')
def
run(args: Optional[List] = None, **kwargs) -> None:
205def run(args: Optional[List] = None, **kwargs) -> None: 206 """ 207 Execute a command given a list of arguments outputted by `cmd` 208 or by supplying the kwargs that `cmd` accepts 209 Args: 210 args: A list of args generated by `cmd` 211 """ 212 wait_for_process = kwargs.pop("wait", False) 213 if not args: 214 parent_pid = kwargs.pop("parent_pid", os.getpid()) 215 parent_pid_file = kwargs.pop("parent_pid_file", None) 216 # write text as input file 217 tempfile = None 218 if kwargs.get("input_text") and not kwargs.get("input_file", None): 219 text = kwargs.pop("input_text") 220 # override text with tempfile 221 tempfile = make_tempfile() 222 with open(tempfile, "w") as f: 223 f.write(text) 224 kwargs["input_file"] = tempfile 225 args = cmd(**kwargs) 226 process = None 227 try: 228 process = subprocess.Popen(args, stdout=subprocess.PIPE) 229 # register each process with the parent pid. 230 controller.add_child_pid(process.pid, parent_pid, parent_pid_file) 231 if wait_for_process: 232 process.wait() 233 except KeyboardInterrupt: 234 pass
def
spawn(commands: List[Dict[str, Any]]) -> None: