saysynth.lib.say

A Python-wrapper for Apple's say command.

View Source

  1"""
  2A Python-wrapper for Apple's [`say`](https://ss64.com/osx/say.html) command.
  3"""
  4
  5import os
  6import subprocess
  7import warnings
  8from typing import Any, Dict, List, Optional
  9
 10from ..constants import (SAY_BIG_ENDIAN_ONLY_FILE_FORMATS, SAY_COLORS,
 11                         SAY_DATA_TYPES, SAY_DEFAULT_FLOAT_SAMPLE_SIZE,
 12                         SAY_ENDIANNESS, SAY_EXECUTABLE, SAY_FILE_FORMATS,
 13                         SAY_MAX_SAMPLE_RATE, SAY_SAMPLE_SIZES,
 14                         SAY_VALID_FLOAT_SAMPLE_SIZES)
 15from ..core import controller
 16from ..utils import make_tempfile
 17
 18
 19def _gen_data_format_arg(
 20    file_format: str,
 21    endianness: str,
 22    data_type: str,
 23    sample_size: int,
 24    sample_rate: int,
 25):
 26    """
 27    Generate a string to pass to --data-format
 28    """
 29    if endianness not in SAY_ENDIANNESS:
 30        raise ValueError(
 31            "Invalid `endianess`. Choose from: LE (little endian) or BE (big endian)"
 32        )
 33    if data_type not in SAY_DATA_TYPES:
 34        raise ValueError(
 35            "Invalid `data_type`. Choose from: F (float), I (integer), UI (unsigned integer)"
 36        )
 37    if sample_size not in SAY_SAMPLE_SIZES:
 38        raise ValueError(
 39            f'Invalid `sample_size`. Choose from: {", ".join(SAY_SAMPLE_SIZES)}'
 40        )
 41
 42    # allow pass passing sample rate as small number (eg: 24 -> 24000)
 43    if sample_rate < 1000:
 44        sample_rate *= 1000
 45
 46    # don't allow a sample rate greater than the maximum
 47    if sample_rate > SAY_MAX_SAMPLE_RATE:
 48        sample_rate = SAY_MAX_SAMPLE_RATE
 49
 50    # big endian-only formats:
 51    if file_format in SAY_BIG_ENDIAN_ONLY_FILE_FORMATS and file_format != "BE":
 52        msg = (
 53            f"file_format '{file_format}' only accepts and endianness of 'BE'"
 54        )
 55        warnings.warn(msg, SyntaxWarning)
 56        endianness = "BE"
 57
 58    # check sample size by data_type
 59    if data_type == "F" and sample_size not in SAY_VALID_FLOAT_SAMPLE_SIZES:
 60        msg = f"data_type 'F' only accepts sample_sizes of '32' and '64', setting '{sample_size}' to '{SAY_DEFAULT_FLOAT_SAMPLE_SIZE}'"
 61        warnings.warn(msg, SyntaxWarning)
 62        sample_size = SAY_DEFAULT_FLOAT_SAMPLE_SIZE
 63
 64    return f"{endianness}{data_type}{sample_size}@{int(sample_rate)}"
 65
 66
 67def _gen_interactive_arg(text_color: str = "white", bg_color: str = "black"):
 68    """
 69    Generate a string to pass to --interactive
 70    """
 71    if bg_color and not text_color:
 72        text_color = (
 73            "white"  # default text color if only background is supplied
 74        )
 75    if text_color not in SAY_COLORS:
 76        raise ValueError(
 77            f'Invalid `text_color`, choose from: {", ".join(SAY_COLORS)}'
 78        )
 79    if bg_color not in SAY_COLORS:
 80        raise ValueError(
 81            f'Invalid `bg_color`, choose from: {", ".join(SAY_COLORS)}'
 82        )
 83    return f"--interactive={text_color}/{bg_color}"
 84
 85
 86def cmd(
 87    input_text: Optional[str] = None,
 88    voice: Optional[str] = None,
 89    rate: Optional[int] = None,
 90    input_file: Optional[str] = None,
 91    audio_output_file: Optional[str] = None,
 92    service_name: Optional[str] = None,
 93    network_send: Optional[str] = None,
 94    audio_device: Optional[str] = None,
 95    stereo: bool = False,  # whether or not
 96    endianness: str = "LE",  # LE/BE
 97    data_type: str = "I",  # F/I/UI
 98    sample_size: Optional[int] = 8,
 99    sample_rate: Optional[int] = 22050,
100    quality: int = 127,
101    progress: bool = False,
102    interactive: bool = False,
103    text_color: Optional[str] = None,
104    bg_color: Optional[str] = None,
105    executable: str = SAY_EXECUTABLE,
106    **kwargs,
107) -> List[str]:
108    """
109    A python wrapper around the say command.
110
111    Args:
112        input_text: The text to speak
113        voice: Specify the voice to be used. Default is the voice selected in System Preferences. To obtain a list of voices installed in the system, specify "?" as the voice name.
114        rate:  Speech rate to be used, in words per minute.
115        input_file:  Specify a file to be spoken. If file is - or neither this parameter nor a message is specified, read from standard input.
116        audio_output_file: Specify the path for an audio file to be written. AIFF is the default and should be supported for most voices, but some
117        voices support many more file formats.
118        service_name:  Specify a service name (default "AUNetSend")
119        network_send: Specify an IP and port to be used for redirecting the speech output through AUNetSend.
120        audio_device: Specify, by ID or name prefix, an audio device to be used to play the audio. To obtain a list of audio output devices, specify "?" as the device name.
121        stereo: Whether or not to output a stereo signal
122        endianness: str = "LE",  # LE/BE
123        data_type: str = "F",  # F/I/U
124        sample_size: One of 8, 16, 24, 32, 64.
125        sample_rate: Optional[int] = 22050,
126        quality: The audio converter quality level between 0 (lowest) and 127 (highest).
127        progress: Display a progress meter during synthesis.
128        interactive: Print the text line by line during synthesis, highlighting words as they are spoken. Markup can be one of:
129            * A terminfo capability as described in terminfo(5), e.g. bold, smul, setaf 1.:
130            * A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.:
131            * A foreground and background color from the above list, separated by a slash, e.g. green/black. If the foreground color is omitted, only the background color is set.:
132            * If markup is not specified, it defaults to smso, i.e. reverse video.:
133            * If the input is a TTY, text is spoken line by line, and the output file, if specified, will only contain audio for the last line of the input.  Otherwise, text is spoken all at once.
134        text_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
135        bg_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
136        executable: The path to the `say` executable (default '/usr/bin/say')
137
138    """  # noqa: E501
139    if not input_text and not input_file:
140        raise ValueError("Must provide `input_text` or `input_file`")
141
142    # override text if input file is provided
143    if input_file:
144        # verify that input file exists
145        if not os.path.exists(input_file):
146            raise ValueError("`input_file`: {input_file} does not exist!")
147
148    # verify quality
149    if quality < 0 or quality > 127:
150        raise ValueError("`quality` must be between 0 and 127")
151
152    # construct base command
153    cmd = [executable]
154    if input_text:
155        cmd.append(input_text)
156    elif input_file:
157        cmd.extend(["-f", input_file])
158    if voice:
159        cmd.extend(["-v", voice])
160    if rate:
161        cmd.extend(["-r", rate])
162
163    if audio_output_file:
164        # verify file_format:
165        extension = audio_output_file.lower().split(".")[-1]
166        if extension not in SAY_FILE_FORMATS:
167            raise ValueError(
168                f"Invalid extension: '.{extension}'. Choose from: {', '.join(SAY_FILE_FORMATS.keys())}"
169            )
170        file_format = SAY_FILE_FORMATS.get(extension)
171        cmd.extend(["--file-format", file_format])
172        cmd.extend(["-o", audio_output_file])
173        data_format = _gen_data_format_arg(
174            file_format, endianness, data_type, sample_size, sample_rate
175        )
176        cmd.extend(["--data-format", data_format])
177
178        if stereo:
179            cmd.append("--channels=2")
180    else:
181        cmd.append(f"--quality={quality}")
182        # handle network output if output file is not specified
183        if service_name:
184            cmd.extend(["-n", service_name])
185        if network_send:
186            cmd.extend(f"--network-send={network_send}")
187        if audio_device:
188            cmd.extend(["-a", audio_device])
189
190    # progress bar
191    if progress:
192        cmd.append("--progress")
193
194    # interactivity
195    if interactive:
196        cmd.append(_gen_interactive_arg(text_color, bg_color))
197    args = [str(a) for a in cmd]
198    # TODO: setup debug logging
199    # msg = f"Executing say command:\n$ {' '.join(args)}"
200    # print(msg)
201    return args
202
203
204def run(args: Optional[List] = None, **kwargs) -> None:
205    """
206    Execute a command given a list of arguments outputted by `cmd`
207    or by supplying the kwargs that `cmd` accepts
208    Args:
209        args: A list of args generated by `cmd`
210    """
211    wait_for_process = kwargs.pop("wait", False)
212    if not args:
213        parent_pid = kwargs.pop("parent_pid", os.getpid())
214        parent_pid_file = kwargs.pop("parent_pid_file", None)
215        # write text as input file
216        tempfile = None
217        if kwargs.get("input_text") and not kwargs.get("input_file", None):
218            text = kwargs.pop("input_text")
219            # override text with tempfile
220            tempfile = make_tempfile()
221            with open(tempfile, "w") as f:
222                f.write(text)
223            kwargs["input_file"] = tempfile
224        args = cmd(**kwargs)
225    process = None
226    try:
227        process = subprocess.Popen(args, stdout=subprocess.PIPE)
228        # register each process with the parent pid.
229        controller.add_child_pid(process.pid, parent_pid, parent_pid_file)
230        if wait_for_process:
231            process.wait()
232    except KeyboardInterrupt:
233        pass
234
235
236def _run_spawn(kwargs: Dict[str, Any]) -> None:
237    """
238    Utility for passing kwargs into `run` within `spawn`
239    """
240    return run(**kwargs)
241
242
243def spawn(commands: List[Dict[str, Any]]) -> None:
244    """
245    Spawn multiple say processes in parallel by
246    passing in a list of commands generated by `cmd`
247
248    Args:
249        commands: A list of command args generated by `cmd`
250    """
251    for command in commands:
252        _run_spawn(command)

def cmd( input_text: Optional[str] = None, voice: Optional[str] = None, rate: Optional[int] = None, input_file: Optional[str] = None, audio_output_file: Optional[str] = None, service_name: Optional[str] = None, network_send: Optional[str] = None, audio_device: Optional[str] = None, stereo: bool = False, endianness: str = 'LE', data_type: str = 'I', sample_size: Optional[int] = 8, sample_rate: Optional[int] = 22050, quality: int = 127, progress: bool = False, interactive: bool = False, text_color: Optional[str] = None, bg_color: Optional[str] = None, executable: str = '/usr/bin/say', **kwargs) -> List[str]: View Source

 87def cmd(
 88    input_text: Optional[str] = None,
 89    voice: Optional[str] = None,
 90    rate: Optional[int] = None,
 91    input_file: Optional[str] = None,
 92    audio_output_file: Optional[str] = None,
 93    service_name: Optional[str] = None,
 94    network_send: Optional[str] = None,
 95    audio_device: Optional[str] = None,
 96    stereo: bool = False,  # whether or not
 97    endianness: str = "LE",  # LE/BE
 98    data_type: str = "I",  # F/I/UI
 99    sample_size: Optional[int] = 8,
100    sample_rate: Optional[int] = 22050,
101    quality: int = 127,
102    progress: bool = False,
103    interactive: bool = False,
104    text_color: Optional[str] = None,
105    bg_color: Optional[str] = None,
106    executable: str = SAY_EXECUTABLE,
107    **kwargs,
108) -> List[str]:
109    """
110    A python wrapper around the say command.
111
112    Args:
113        input_text: The text to speak
114        voice: Specify the voice to be used. Default is the voice selected in System Preferences. To obtain a list of voices installed in the system, specify "?" as the voice name.
115        rate:  Speech rate to be used, in words per minute.
116        input_file:  Specify a file to be spoken. If file is - or neither this parameter nor a message is specified, read from standard input.
117        audio_output_file: Specify the path for an audio file to be written. AIFF is the default and should be supported for most voices, but some
118        voices support many more file formats.
119        service_name:  Specify a service name (default "AUNetSend")
120        network_send: Specify an IP and port to be used for redirecting the speech output through AUNetSend.
121        audio_device: Specify, by ID or name prefix, an audio device to be used to play the audio. To obtain a list of audio output devices, specify "?" as the device name.
122        stereo: Whether or not to output a stereo signal
123        endianness: str = "LE",  # LE/BE
124        data_type: str = "F",  # F/I/U
125        sample_size: One of 8, 16, 24, 32, 64.
126        sample_rate: Optional[int] = 22050,
127        quality: The audio converter quality level between 0 (lowest) and 127 (highest).
128        progress: Display a progress meter during synthesis.
129        interactive: Print the text line by line during synthesis, highlighting words as they are spoken. Markup can be one of:
130            * A terminfo capability as described in terminfo(5), e.g. bold, smul, setaf 1.:
131            * A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.:
132            * A foreground and background color from the above list, separated by a slash, e.g. green/black. If the foreground color is omitted, only the background color is set.:
133            * If markup is not specified, it defaults to smso, i.e. reverse video.:
134            * If the input is a TTY, text is spoken line by line, and the output file, if specified, will only contain audio for the last line of the input.  Otherwise, text is spoken all at once.
135        text_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
136        bg_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
137        executable: The path to the `say` executable (default '/usr/bin/say')
138
139    """  # noqa: E501
140    if not input_text and not input_file:
141        raise ValueError("Must provide `input_text` or `input_file`")
142
143    # override text if input file is provided
144    if input_file:
145        # verify that input file exists
146        if not os.path.exists(input_file):
147            raise ValueError("`input_file`: {input_file} does not exist!")
148
149    # verify quality
150    if quality < 0 or quality > 127:
151        raise ValueError("`quality` must be between 0 and 127")
152
153    # construct base command
154    cmd = [executable]
155    if input_text:
156        cmd.append(input_text)
157    elif input_file:
158        cmd.extend(["-f", input_file])
159    if voice:
160        cmd.extend(["-v", voice])
161    if rate:
162        cmd.extend(["-r", rate])
163
164    if audio_output_file:
165        # verify file_format:
166        extension = audio_output_file.lower().split(".")[-1]
167        if extension not in SAY_FILE_FORMATS:
168            raise ValueError(
169                f"Invalid extension: '.{extension}'. Choose from: {', '.join(SAY_FILE_FORMATS.keys())}"
170            )
171        file_format = SAY_FILE_FORMATS.get(extension)
172        cmd.extend(["--file-format", file_format])
173        cmd.extend(["-o", audio_output_file])
174        data_format = _gen_data_format_arg(
175            file_format, endianness, data_type, sample_size, sample_rate
176        )
177        cmd.extend(["--data-format", data_format])
178
179        if stereo:
180            cmd.append("--channels=2")
181    else:
182        cmd.append(f"--quality={quality}")
183        # handle network output if output file is not specified
184        if service_name:
185            cmd.extend(["-n", service_name])
186        if network_send:
187            cmd.extend(f"--network-send={network_send}")
188        if audio_device:
189            cmd.extend(["-a", audio_device])
190
191    # progress bar
192    if progress:
193        cmd.append("--progress")
194
195    # interactivity
196    if interactive:
197        cmd.append(_gen_interactive_arg(text_color, bg_color))
198    args = [str(a) for a in cmd]
199    # TODO: setup debug logging
200    # msg = f"Executing say command:\n$ {' '.join(args)}"
201    # print(msg)
202    return args

A python wrapper around the say command.

Arguments:

input_text: The text to speak
voice: Specify the voice to be used. Default is the voice selected in System Preferences. To obtain a list of voices installed in the system, specify "?" as the voice name.
rate: Speech rate to be used, in words per minute.
input_file: Specify a file to be spoken. If file is - or neither this parameter nor a message is specified, read from standard input.
audio_output_file: Specify the path for an audio file to be written. AIFF is the default and should be supported for most voices, but some
voices support many more file formats.
service_name: Specify a service name (default "AUNetSend")
network_send: Specify an IP and port to be used for redirecting the speech output through AUNetSend.
audio_device: Specify, by ID or name prefix, an audio device to be used to play the audio. To obtain a list of audio output devices, specify "?" as the device name.
stereo: Whether or not to output a stereo signal
endianness: str = "LE", # LE/BE
data_type: str = "F", # F/I/U
sample_size: One of 8, 16, 24, 32, 64.
sample_rate: Optional[int] = 22050,
quality: The audio converter quality level between 0 (lowest) and 127 (highest).
progress: Display a progress meter during synthesis.
interactive: Print the text line by line during synthesis, highlighting words as they are spoken. Markup can be one of:
- A terminfo capability as described in terminfo(5), e.g. bold, smul, setaf 1.:
- A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.:
- A foreground and background color from the above list, separated by a slash, e.g. green/black. If the foreground color is omitted, only the background color is set.:
- If markup is not specified, it defaults to smso, i.e. reverse video.:
- If the input is a TTY, text is spoken line by line, and the output file, if specified, will only contain audio for the last line of the input. Otherwise, text is spoken all at once.
text_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
bg_color: A color name, one of black, red, green, yellow, blue, magenta, cyan, or white.
executable: The path to the say executable (default '/usr/bin/say')

def run(args: Optional[List] = None, **kwargs) -> None: View Source

205def run(args: Optional[List] = None, **kwargs) -> None:
206    """
207    Execute a command given a list of arguments outputted by `cmd`
208    or by supplying the kwargs that `cmd` accepts
209    Args:
210        args: A list of args generated by `cmd`
211    """
212    wait_for_process = kwargs.pop("wait", False)
213    if not args:
214        parent_pid = kwargs.pop("parent_pid", os.getpid())
215        parent_pid_file = kwargs.pop("parent_pid_file", None)
216        # write text as input file
217        tempfile = None
218        if kwargs.get("input_text") and not kwargs.get("input_file", None):
219            text = kwargs.pop("input_text")
220            # override text with tempfile
221            tempfile = make_tempfile()
222            with open(tempfile, "w") as f:
223                f.write(text)
224            kwargs["input_file"] = tempfile
225        args = cmd(**kwargs)
226    process = None
227    try:
228        process = subprocess.Popen(args, stdout=subprocess.PIPE)
229        # register each process with the parent pid.
230        controller.add_child_pid(process.pid, parent_pid, parent_pid_file)
231        if wait_for_process:
232            process.wait()
233    except KeyboardInterrupt:
234        pass

Execute a command given a list of arguments outputted by cmd or by supplying the kwargs that cmd accepts

Arguments:

args: A list of args generated by cmd

def spawn(commands: List[Dict[str, Any]]) -> None: View Source

244def spawn(commands: List[Dict[str, Any]]) -> None:
245    """
246    Spawn multiple say processes in parallel by
247    passing in a list of commands generated by `cmd`
248
249    Args:
250        commands: A list of command args generated by `cmd`
251    """
252    for command in commands:
253        _run_spawn(command)

Spawn multiple say processes in parallel by passing in a list of commands generated by cmd

Arguments:

commands: A list of command args generated by cmd