saysynth.core.segment
A Segment
is the base unit text according to
Apple's Speech Synthesis Programming Guide.
A segment represents an individual part-of-speech (phoneme) in say
and can have
one or more phonemes, a duration, and a sequences of pitches.
Segments are combined together throughout saysynth
to create musical passages.
1""" 2A `Segment` is the base unit text according to 3[Apple's Speech Synthesis Programming Guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/SpeechOverview/SpeechOverview.html#//apple_ref/doc/uid/TP40004365-CH3-SW1). 4A segment represents an individual part-of-speech (phoneme) in `say` and can have 5one or more phonemes, a duration, and a sequences of pitches. 6 7Segments are combined together throughout `saysynth` to create musical passages. 8 9<center><img src="/assets/img/cell.png"></img></center> 10 11""" 12from typing import Tuple, Union 13 14from midi_utils import midi_to_freq, note_to_midi 15 16from ..constants import (SAY_EMPHASIS, SAY_PHONEME_SILENCE, 17 SAY_SEGMENT_MAX_DURATION, SAY_VOLUME_RANGE) 18from ..utils import rescale 19 20 21class Segment(object): 22 def __init__( 23 self, 24 note: Union[int, str], 25 velocity: int = 127, 26 phoneme: str = "m", 27 duration: Union[float, int] = SAY_SEGMENT_MAX_DURATION, 28 type: str = "note", 29 emphasis: Tuple[int, int] = SAY_EMPHASIS, 30 volume_range: Tuple[float, float] = SAY_VOLUME_RANGE, 31 include_volume_level: bool = True, 32 duration_sig_digits: int = 4, 33 **kwargs, 34 ): 35 """ 36 An individual segment of speech in Apple's DSL 37 Args: 38 note: The note to map to a frequency, eg "A3" 39 velocity: The midi velocity value to use for the segment (0-127). 40 phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 41 duration: The duration of the segment in milliseconds. 42 type: Either "note" or "silence" 43 emphasis: A level of emphasis to place on this segment (0,1, or 2) 44 volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render. 45 include_volume_level: Whether or not to the render the volume settings for this segment. 46 Over-rendering these settings can lead to audio drop-outs. 47 duration_sig_digits: The number of significant digits to use when rendering the duration value. 48 A higher value should yield more rhythmically precise results. 49 """ 50 self._phoneme = phoneme 51 self._duration = duration 52 self._emphasis = emphasis 53 self.velocity = velocity 54 self.note = note 55 self.is_silence = type == "silence" 56 self.volume_range = volume_range 57 self.include_volume_level = include_volume_level 58 self.duration_sig_digits = duration_sig_digits 59 60 @property 61 def phoneme(self): 62 return self._phoneme 63 64 @property 65 def phoneme_has_emphasis(self) -> bool: 66 """ 67 Return True if the `phoneme` has an included 68 emphasis. 69 """ 70 if ( 71 self._phoneme.startswith("0") 72 or self._phoneme.startswith("1") 73 or self._phoneme.startswith("2") 74 ): 75 return True 76 return False 77 78 @property 79 def frequency_envelope(self) -> str: 80 """ 81 Translate a note name to a frequency. 82 **TODO:** Add intra-note modulation. 83 """ 84 freq = midi_to_freq(note_to_midi(self.note)) 85 return f"P {freq}:0" 86 87 @property 88 def duration(self) -> float: 89 """ 90 Clamp segment duration at `SAY_SEGMENT_MAX_DURATION` 91 and round it to `self.duration_sig_digits` 92 """ 93 return round( 94 min(self._duration, SAY_SEGMENT_MAX_DURATION), 95 self.duration_sig_digits, 96 ) 97 98 @property 99 def volume(self) -> str: 100 """ 101 Translate a midi velocity value (0-127) into a pair 102 of say volume tags, eg: "[[ volm +0.1 ]]" 103 """ 104 if self.include_volume_level: 105 volume = rescale(self.velocity, [0, 127], self.volume_range) 106 return f"[[ volm {volume} ]]" 107 return "" 108 109 @property 110 def emphasis(self) -> str: 111 """ 112 Translate a midi velocity value (0-127) into a phoneme 113 emphasis value ("", "1", or "2")when provided with a tuple 114 of steps (step_1, step_2) eg: (75, 100) 115 This action is not performed when the phoneme already 116 has an emphasis included. 117 """ 118 if not self.phoneme_has_emphasis: 119 if not self.velocity: 120 return "" 121 if self.velocity > self._emphasis[1]: 122 return "2" 123 if self.velocity > self._emphasis[0]: 124 return "1" 125 return "" 126 127 def to_text(self) -> str: 128 """ 129 Render this Segment as Apple SpeechSynthesis DSL text. 130 """ 131 if self.is_silence: 132 return f"{self.volume} {SAY_PHONEME_SILENCE} {{D {self.duration}}}" 133 return f"{self.volume} {self.emphasis}{self.phoneme} {{D {self.duration}; {self.frequency_envelope}}}" 134 135 def __eq__(self, other): 136 return self.to_text() == other.to_text() 137 138 def __str__(self) -> str: 139 return self.to_text()
22class Segment(object): 23 def __init__( 24 self, 25 note: Union[int, str], 26 velocity: int = 127, 27 phoneme: str = "m", 28 duration: Union[float, int] = SAY_SEGMENT_MAX_DURATION, 29 type: str = "note", 30 emphasis: Tuple[int, int] = SAY_EMPHASIS, 31 volume_range: Tuple[float, float] = SAY_VOLUME_RANGE, 32 include_volume_level: bool = True, 33 duration_sig_digits: int = 4, 34 **kwargs, 35 ): 36 """ 37 An individual segment of speech in Apple's DSL 38 Args: 39 note: The note to map to a frequency, eg "A3" 40 velocity: The midi velocity value to use for the segment (0-127). 41 phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 42 duration: The duration of the segment in milliseconds. 43 type: Either "note" or "silence" 44 emphasis: A level of emphasis to place on this segment (0,1, or 2) 45 volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render. 46 include_volume_level: Whether or not to the render the volume settings for this segment. 47 Over-rendering these settings can lead to audio drop-outs. 48 duration_sig_digits: The number of significant digits to use when rendering the duration value. 49 A higher value should yield more rhythmically precise results. 50 """ 51 self._phoneme = phoneme 52 self._duration = duration 53 self._emphasis = emphasis 54 self.velocity = velocity 55 self.note = note 56 self.is_silence = type == "silence" 57 self.volume_range = volume_range 58 self.include_volume_level = include_volume_level 59 self.duration_sig_digits = duration_sig_digits 60 61 @property 62 def phoneme(self): 63 return self._phoneme 64 65 @property 66 def phoneme_has_emphasis(self) -> bool: 67 """ 68 Return True if the `phoneme` has an included 69 emphasis. 70 """ 71 if ( 72 self._phoneme.startswith("0") 73 or self._phoneme.startswith("1") 74 or self._phoneme.startswith("2") 75 ): 76 return True 77 return False 78 79 @property 80 def frequency_envelope(self) -> str: 81 """ 82 Translate a note name to a frequency. 83 **TODO:** Add intra-note modulation. 84 """ 85 freq = midi_to_freq(note_to_midi(self.note)) 86 return f"P {freq}:0" 87 88 @property 89 def duration(self) -> float: 90 """ 91 Clamp segment duration at `SAY_SEGMENT_MAX_DURATION` 92 and round it to `self.duration_sig_digits` 93 """ 94 return round( 95 min(self._duration, SAY_SEGMENT_MAX_DURATION), 96 self.duration_sig_digits, 97 ) 98 99 @property 100 def volume(self) -> str: 101 """ 102 Translate a midi velocity value (0-127) into a pair 103 of say volume tags, eg: "[[ volm +0.1 ]]" 104 """ 105 if self.include_volume_level: 106 volume = rescale(self.velocity, [0, 127], self.volume_range) 107 return f"[[ volm {volume} ]]" 108 return "" 109 110 @property 111 def emphasis(self) -> str: 112 """ 113 Translate a midi velocity value (0-127) into a phoneme 114 emphasis value ("", "1", or "2")when provided with a tuple 115 of steps (step_1, step_2) eg: (75, 100) 116 This action is not performed when the phoneme already 117 has an emphasis included. 118 """ 119 if not self.phoneme_has_emphasis: 120 if not self.velocity: 121 return "" 122 if self.velocity > self._emphasis[1]: 123 return "2" 124 if self.velocity > self._emphasis[0]: 125 return "1" 126 return "" 127 128 def to_text(self) -> str: 129 """ 130 Render this Segment as Apple SpeechSynthesis DSL text. 131 """ 132 if self.is_silence: 133 return f"{self.volume} {SAY_PHONEME_SILENCE} {{D {self.duration}}}" 134 return f"{self.volume} {self.emphasis}{self.phoneme} {{D {self.duration}; {self.frequency_envelope}}}" 135 136 def __eq__(self, other): 137 return self.to_text() == other.to_text() 138 139 def __str__(self) -> str: 140 return self.to_text()
23 def __init__( 24 self, 25 note: Union[int, str], 26 velocity: int = 127, 27 phoneme: str = "m", 28 duration: Union[float, int] = SAY_SEGMENT_MAX_DURATION, 29 type: str = "note", 30 emphasis: Tuple[int, int] = SAY_EMPHASIS, 31 volume_range: Tuple[float, float] = SAY_VOLUME_RANGE, 32 include_volume_level: bool = True, 33 duration_sig_digits: int = 4, 34 **kwargs, 35 ): 36 """ 37 An individual segment of speech in Apple's DSL 38 Args: 39 note: The note to map to a frequency, eg "A3" 40 velocity: The midi velocity value to use for the segment (0-127). 41 phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 42 duration: The duration of the segment in milliseconds. 43 type: Either "note" or "silence" 44 emphasis: A level of emphasis to place on this segment (0,1, or 2) 45 volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render. 46 include_volume_level: Whether or not to the render the volume settings for this segment. 47 Over-rendering these settings can lead to audio drop-outs. 48 duration_sig_digits: The number of significant digits to use when rendering the duration value. 49 A higher value should yield more rhythmically precise results. 50 """ 51 self._phoneme = phoneme 52 self._duration = duration 53 self._emphasis = emphasis 54 self.velocity = velocity 55 self.note = note 56 self.is_silence = type == "silence" 57 self.volume_range = volume_range 58 self.include_volume_level = include_volume_level 59 self.duration_sig_digits = duration_sig_digits
An individual segment of speech in Apple's DSL
Arguments:
- note: The note to map to a frequency, eg "A3"
- velocity: The midi velocity value to use for the segment (0-127).
- phoneme: A valid combination of Phonemes documented in Apple's Speech Synthesis guide.
- duration: The duration of the segment in milliseconds.
- type: Either "note" or "silence"
- emphasis: A level of emphasis to place on this segment (0,1, or 2)
- volume_range: A range between 0 and 127 representing the minimum and maximum velocity values to render.
- include_volume_level: Whether or not to the render the volume settings for this segment. Over-rendering these settings can lead to audio drop-outs.
- duration_sig_digits: The number of significant digits to use when rendering the duration value. A higher value should yield more rhythmically precise results.
Clamp segment duration at SAY_SEGMENT_MAX_DURATION
and round it to self.duration_sig_digits
Translate a midi velocity value (0-127) into a pair of say volume tags, eg: "[[ volm +0.1 ]]"
Translate a midi velocity value (0-127) into a phoneme emphasis value ("", "1", or "2")when provided with a tuple of steps (step_1, step_2) eg: (75, 100) This action is not performed when the phoneme already has an emphasis included.
128 def to_text(self) -> str: 129 """ 130 Render this Segment as Apple SpeechSynthesis DSL text. 131 """ 132 if self.is_silence: 133 return f"{self.volume} {SAY_PHONEME_SILENCE} {{D {self.duration}}}" 134 return f"{self.volume} {self.emphasis}{self.phoneme} {{D {self.duration}; {self.frequency_envelope}}}"
Render this Segment as Apple SpeechSynthesis DSL text.