saysynth.core.segment

A Segment is the base unit text according to Apple's Speech Synthesis Programming Guide. A segment represents an individual part-of-speech (phoneme) in say and can have one or more phonemes, a duration, and a sequences of pitches.

Segments are combined together throughout saysynth to create musical passages.

  1"""
  2A `Segment` is the base unit text according to
  3[Apple's Speech Synthesis Programming Guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/SpeechOverview/SpeechOverview.html#//apple_ref/doc/uid/TP40004365-CH3-SW1).
  4A segment represents an individual part-of-speech (phoneme) in `say` and can have
  5one or more phonemes, a duration, and a sequences of pitches.
  6
  7Segments are combined together throughout `saysynth` to create musical passages.
  8
  9<center><img src="/assets/img/cell.png"></img></center>
 10
 11"""
 12from typing import Tuple, Union
 13
 14from midi_utils import midi_to_freq, note_to_midi
 15
 16from ..constants import (SAY_EMPHASIS, SAY_PHONEME_SILENCE,
 17                         SAY_SEGMENT_MAX_DURATION, SAY_VOLUME_RANGE)
 18from ..utils import rescale
 19
 20
 21class Segment(object):
 22    def __init__(
 23        self,
 24        note: Union[int, str],
 25        velocity: int = 127,
 26        phoneme: str = "m",
 27        duration: Union[float, int] = SAY_SEGMENT_MAX_DURATION,
 28        type: str = "note",
 29        emphasis: Tuple[int, int] = SAY_EMPHASIS,
 30        volume_range: Tuple[float, float] = SAY_VOLUME_RANGE,
 31        include_volume_level: bool = True,
 32        duration_sig_digits: int = 4,
 33        **kwargs,
 34    ):
 35        """
 36        An individual segment of speech in Apple's DSL
 37        Args:
 38            note: The note to map to a frequency, eg "A3"
 39            velocity: The midi velocity value to use for the segment (0-127).
 40            phoneme:  A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
 41            duration: The duration of the segment in milliseconds.
 42            type: Either "note" or "silence"
 43            emphasis: A level of emphasis to place on this segment (0,1, or 2)
 44            volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render.
 45            include_volume_level: Whether or not to the render the volume settings for this segment.
 46                         Over-rendering these settings can lead to audio drop-outs.
 47            duration_sig_digits: The number of significant digits to use when rendering the duration value.
 48                                 A higher value should yield more rhythmically precise results.
 49        """
 50        self._phoneme = phoneme
 51        self._duration = duration
 52        self._emphasis = emphasis
 53        self.velocity = velocity
 54        self.note = note
 55        self.is_silence = type == "silence"
 56        self.volume_range = volume_range
 57        self.include_volume_level = include_volume_level
 58        self.duration_sig_digits = duration_sig_digits
 59
 60    @property
 61    def phoneme(self):
 62        return self._phoneme
 63
 64    @property
 65    def phoneme_has_emphasis(self) -> bool:
 66        """
 67        Return True if the `phoneme` has an included
 68        emphasis.
 69        """
 70        if (
 71            self._phoneme.startswith("0")
 72            or self._phoneme.startswith("1")
 73            or self._phoneme.startswith("2")
 74        ):
 75            return True
 76        return False
 77
 78    @property
 79    def frequency_envelope(self) -> str:
 80        """
 81        Translate a note name to a frequency.
 82        **TODO:** Add intra-note modulation.
 83        """
 84        freq = midi_to_freq(note_to_midi(self.note))
 85        return f"P {freq}:0"
 86
 87    @property
 88    def duration(self) -> float:
 89        """
 90        Clamp segment duration at `SAY_SEGMENT_MAX_DURATION`
 91        and round it to `self.duration_sig_digits`
 92        """
 93        return round(
 94            min(self._duration, SAY_SEGMENT_MAX_DURATION),
 95            self.duration_sig_digits,
 96        )
 97
 98    @property
 99    def volume(self) -> str:
100        """
101        Translate a midi velocity value (0-127) into a pair
102        of say volume tags, eg: "[[ volm +0.1 ]]"
103        """
104        if self.include_volume_level:
105            volume = rescale(self.velocity, [0, 127], self.volume_range)
106            return f"[[ volm {volume} ]]"
107        return ""
108
109    @property
110    def emphasis(self) -> str:
111        """
112        Translate a midi velocity value (0-127) into a phoneme
113        emphasis value ("", "1", or "2")when provided with a tuple
114        of steps (step_1, step_2) eg: (75, 100)
115        This action is not performed when the phoneme already
116        has an emphasis included.
117        """
118        if not self.phoneme_has_emphasis:
119            if not self.velocity:
120                return ""
121            if self.velocity > self._emphasis[1]:
122                return "2"
123            if self.velocity > self._emphasis[0]:
124                return "1"
125        return ""
126
127    def to_text(self) -> str:
128        """
129        Render this Segment as Apple SpeechSynthesis DSL text.
130        """
131        if self.is_silence:
132            return f"{self.volume} {SAY_PHONEME_SILENCE} {{D {self.duration}}}"
133        return f"{self.volume} {self.emphasis}{self.phoneme} {{D {self.duration}; {self.frequency_envelope}}}"
134
135    def __eq__(self, other):
136        return self.to_text() == other.to_text()
137
138    def __str__(self) -> str:
139        return self.to_text()
class Segment:
 22class Segment(object):
 23    def __init__(
 24        self,
 25        note: Union[int, str],
 26        velocity: int = 127,
 27        phoneme: str = "m",
 28        duration: Union[float, int] = SAY_SEGMENT_MAX_DURATION,
 29        type: str = "note",
 30        emphasis: Tuple[int, int] = SAY_EMPHASIS,
 31        volume_range: Tuple[float, float] = SAY_VOLUME_RANGE,
 32        include_volume_level: bool = True,
 33        duration_sig_digits: int = 4,
 34        **kwargs,
 35    ):
 36        """
 37        An individual segment of speech in Apple's DSL
 38        Args:
 39            note: The note to map to a frequency, eg "A3"
 40            velocity: The midi velocity value to use for the segment (0-127).
 41            phoneme:  A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
 42            duration: The duration of the segment in milliseconds.
 43            type: Either "note" or "silence"
 44            emphasis: A level of emphasis to place on this segment (0,1, or 2)
 45            volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render.
 46            include_volume_level: Whether or not to the render the volume settings for this segment.
 47                         Over-rendering these settings can lead to audio drop-outs.
 48            duration_sig_digits: The number of significant digits to use when rendering the duration value.
 49                                 A higher value should yield more rhythmically precise results.
 50        """
 51        self._phoneme = phoneme
 52        self._duration = duration
 53        self._emphasis = emphasis
 54        self.velocity = velocity
 55        self.note = note
 56        self.is_silence = type == "silence"
 57        self.volume_range = volume_range
 58        self.include_volume_level = include_volume_level
 59        self.duration_sig_digits = duration_sig_digits
 60
 61    @property
 62    def phoneme(self):
 63        return self._phoneme
 64
 65    @property
 66    def phoneme_has_emphasis(self) -> bool:
 67        """
 68        Return True if the `phoneme` has an included
 69        emphasis.
 70        """
 71        if (
 72            self._phoneme.startswith("0")
 73            or self._phoneme.startswith("1")
 74            or self._phoneme.startswith("2")
 75        ):
 76            return True
 77        return False
 78
 79    @property
 80    def frequency_envelope(self) -> str:
 81        """
 82        Translate a note name to a frequency.
 83        **TODO:** Add intra-note modulation.
 84        """
 85        freq = midi_to_freq(note_to_midi(self.note))
 86        return f"P {freq}:0"
 87
 88    @property
 89    def duration(self) -> float:
 90        """
 91        Clamp segment duration at `SAY_SEGMENT_MAX_DURATION`
 92        and round it to `self.duration_sig_digits`
 93        """
 94        return round(
 95            min(self._duration, SAY_SEGMENT_MAX_DURATION),
 96            self.duration_sig_digits,
 97        )
 98
 99    @property
100    def volume(self) -> str:
101        """
102        Translate a midi velocity value (0-127) into a pair
103        of say volume tags, eg: "[[ volm +0.1 ]]"
104        """
105        if self.include_volume_level:
106            volume = rescale(self.velocity, [0, 127], self.volume_range)
107            return f"[[ volm {volume} ]]"
108        return ""
109
110    @property
111    def emphasis(self) -> str:
112        """
113        Translate a midi velocity value (0-127) into a phoneme
114        emphasis value ("", "1", or "2")when provided with a tuple
115        of steps (step_1, step_2) eg: (75, 100)
116        This action is not performed when the phoneme already
117        has an emphasis included.
118        """
119        if not self.phoneme_has_emphasis:
120            if not self.velocity:
121                return ""
122            if self.velocity > self._emphasis[1]:
123                return "2"
124            if self.velocity > self._emphasis[0]:
125                return "1"
126        return ""
127
128    def to_text(self) -> str:
129        """
130        Render this Segment as Apple SpeechSynthesis DSL text.
131        """
132        if self.is_silence:
133            return f"{self.volume} {SAY_PHONEME_SILENCE} {{D {self.duration}}}"
134        return f"{self.volume} {self.emphasis}{self.phoneme} {{D {self.duration}; {self.frequency_envelope}}}"
135
136    def __eq__(self, other):
137        return self.to_text() == other.to_text()
138
139    def __str__(self) -> str:
140        return self.to_text()
Segment( note: Union[int, str], velocity: int = 127, phoneme: str = 'm', duration: Union[float, int] = 1200, type: str = 'note', emphasis: Tuple[int, int] = [75, 100], volume_range: Tuple[float, float] = [0.0, 1.0], include_volume_level: bool = True, duration_sig_digits: int = 4, **kwargs)
23    def __init__(
24        self,
25        note: Union[int, str],
26        velocity: int = 127,
27        phoneme: str = "m",
28        duration: Union[float, int] = SAY_SEGMENT_MAX_DURATION,
29        type: str = "note",
30        emphasis: Tuple[int, int] = SAY_EMPHASIS,
31        volume_range: Tuple[float, float] = SAY_VOLUME_RANGE,
32        include_volume_level: bool = True,
33        duration_sig_digits: int = 4,
34        **kwargs,
35    ):
36        """
37        An individual segment of speech in Apple's DSL
38        Args:
39            note: The note to map to a frequency, eg "A3"
40            velocity: The midi velocity value to use for the segment (0-127).
41            phoneme:  A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
42            duration: The duration of the segment in milliseconds.
43            type: Either "note" or "silence"
44            emphasis: A level of emphasis to place on this segment (0,1, or 2)
45            volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render.
46            include_volume_level: Whether or not to the render the volume settings for this segment.
47                         Over-rendering these settings can lead to audio drop-outs.
48            duration_sig_digits: The number of significant digits to use when rendering the duration value.
49                                 A higher value should yield more rhythmically precise results.
50        """
51        self._phoneme = phoneme
52        self._duration = duration
53        self._emphasis = emphasis
54        self.velocity = velocity
55        self.note = note
56        self.is_silence = type == "silence"
57        self.volume_range = volume_range
58        self.include_volume_level = include_volume_level
59        self.duration_sig_digits = duration_sig_digits

An individual segment of speech in Apple's DSL

Arguments:
  • note: The note to map to a frequency, eg "A3"
  • velocity: The midi velocity value to use for the segment (0-127).
  • phoneme: A valid combination of Phonemes documented in Apple's Speech Synthesis guide.
  • duration: The duration of the segment in milliseconds.
  • type: Either "note" or "silence"
  • emphasis: A level of emphasis to place on this segment (0,1, or 2)
  • volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render.
  • include_volume_level: Whether or not to the render the volume settings for this segment. Over-rendering these settings can lead to audio drop-outs.
  • duration_sig_digits: The number of significant digits to use when rendering the duration value. A higher value should yield more rhythmically precise results.
phoneme_has_emphasis: bool

Return True if the phoneme has an included emphasis.

frequency_envelope: str

Translate a note name to a frequency. TODO: Add intra-note modulation.

duration: float

Clamp segment duration at SAY_SEGMENT_MAX_DURATION and round it to self.duration_sig_digits

volume: str

Translate a midi velocity value (0-127) into a pair of say volume tags, eg: "[[ volm +0.1 ]]"

emphasis: str

Translate a midi velocity value (0-127) into a phoneme emphasis value ("", "1", or "2")when provided with a tuple of steps (step_1, step_2) eg: (75, 100) This action is not performed when the phoneme already has an emphasis included.

def to_text(self) -> str:
128    def to_text(self) -> str:
129        """
130        Render this Segment as Apple SpeechSynthesis DSL text.
131        """
132        if self.is_silence:
133            return f"{self.volume} {SAY_PHONEME_SILENCE} {{D {self.duration}}}"
134        return f"{self.volume} {self.emphasis}{self.phoneme} {{D {self.duration}; {self.frequency_envelope}}}"

Render this Segment as Apple SpeechSynthesis DSL text.