saysynth.core.note

The Note class creates a list of Segment with configurable duration, pitch, phonemes, and volume envelope.

  1"""
  2The Note class creates a list of `Segment` with configurable
  3duration, pitch, phonemes, and volume envelope.
  4<center><img src="/assets/img/coffee.png"></img></center>
  5"""
  6import copy
  7import random
  8from typing import Any, Dict, List, Optional, Tuple, Union
  9
 10from midi_utils import ADSR, midi_to_note, note_to_midi
 11
 12from ..constants import (SAY_ALL_PHONEMES, SAY_PHONEME_CLASSES,
 13                         SAY_PHONEME_VOICE_CLASSES, SAY_SEGMENT_MAX_DURATION,
 14                         SAY_SEGMENT_SILENCE_DURATION, SAY_TUNED_VOICES)
 15from ..utils import bpm_to_time, frange
 16from .base import SayObject
 17from .lyrics import Lyrics
 18from .segment import Segment
 19
 20
 21class Note(SayObject):
 22    def __init__(
 23        self,
 24        note: Union[int, str] = "A3",
 25        phoneme: List[str] = ["m"],
 26        text: Optional[str] = None,
 27        # start position
 28        start: Optional[int] = 0,
 29        start_bpm: Optional[Union[float, int]] = 120,
 30        start_count: Union[str, float, int] = 0,
 31        start_time_sig: str = "4/4",
 32        # envelope
 33        velocity: int = 127,
 34        volume_level_per_segment: int = 3,
 35        include_volume_level: bool = True,
 36        attack: Union[float, int] = 0,
 37        decay: Union[float, int] = 0,
 38        sustain: Union[float, int] = 1,
 39        release: Union[float, int] = 0,
 40        # length
 41        duration: Optional[Union[float, int]] = None,
 42        duration_bpm: Optional[Union[float, int]] = 120,
 43        duration_count: Union[str, float, int] = 1,
 44        duration_time_sig: str = "4/4",
 45        # segmentation
 46        segment_duration: Optional[int] = None,
 47        segment_bpm: Optional[float] = 120,
 48        segment_count: Optional[Union[str, float, int]] = 1.0 / 8.0,
 49        segment_time_sig: Optional[str] = "4/4",
 50        # randomization
 51        randomize_phoneme: Optional[str] = None,
 52        randomize_velocity: Optional[Tuple[int, int]] = None,
 53        randomize_octave: Optional[List[int]] = [],
 54        randomize_segments: Optional[List[str]] = [],
 55        randomize_start: Optional[Tuple[int, int]] = None,
 56        **segment_options,
 57    ):
 58        f"""
 59        Generate say text for a collection of phonemes with adsr, pitch modulation, and more.
 60        Args:
 61            note: The note to play, eg "A3"
 62            phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
 63            text: The text to "sing". If provided, this will override phoneme.
 64            start: The number of milliseconds of silence to add to the beginning of the track.
 65            start_bpm: A BPM to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 66            start_count: A count to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 67            start_time_sig: A time signature to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 68            velocity: The midi velocity value to use for this note (0-127).
 69            volume_level_per_segment: The number of segments after which volume settings will be rendered (eg: "3" would mean one segment would have volume settings and then the next two would not, etc.))
 70            include_volume_level: Whether or not to the render the volume settings for this note.
 71                         Over-rendering these settings can lead to audio drop-outs.
 72            attack: A value between 0 and 1 representing the ratio of the note's total length during which the note will increase to it's amplitude.
 73                    A lower number is a faster attack while a larger number is a slow attack. (see `midi_utils.ADSR`).
 74            decay:  A value between 0 and 1 representing the ratio of note's total length during which the note will decrease in amplitude from the max amplitude to the sustain level. (see `midi_utils.ADSR`).
 75            sustain: A value between 0 and 1 representing the relative volume level of the sustain phase (0 is the min volume_range, 1 is the max).
 76            release: A value between 0 and 1 representing the ratio of the note's total length during which the note will decrease in amplitude from the sustain level to zero.
 77            duration: The duration of this note in number of milliseconds.
 78            duration_bpm: A BPM to use when calculating the note's duration.
 79            duration_count: A count to use when calculating the note's duration.
 80            duration_time_sig: A time signature to use when calculating the note's duration.
 81            segment_duration: The duration of each `Segment` of this note in number of milliseconds.
 82            segment_bpm: A BPM to use when calculating the duration of each `Segment` in this note.
 83            segment_count: A count to use when calculating duration of each `Segment` of this note
 84            segment_time_sig: A time signature to use when calculating the duration of each `Segment` in this note.
 85            randomize_phoneme: Randomize the phoneme for every note. "
 86                If "all" is passed, all valid phonemes will be used.
 87                Alternatively pass a list of phonemes (eg 'm,l,n') or a voice and style, eg: Fred:drone.
 88                Valid voices include: {', '.join(SAY_TUNED_VOICES)}.
 89                Valid styles include: {', '.join(SAY_PHONEME_CLASSES)}.
 90            randomize_velocity: Randomize a note's velocity by supplying a min and max midi velocity (eg: -rv [40, 120])
 91            randomize_octave: A list of octaves to randomly vary between.
 92                              You can weight certain octaves by providing them multiple times
 93                              (eg: [0,0-1,-1,2] would prefer the root octave first, one octave down second, and two octaves up third).
 94            randomize_segments: Randomize the 'phoneme', 'octave', and/or 'velocity' according to each respective randomization setting.
 95            randomize_start: Randomize the number of milliseconds to silence to add before the say text.
 96                             The first number passed in is the minimum of the range, the second is the max (eg: [4000, 12000] would set a range for four to twelve seconds).
 97            **segment_options: Additional options to pass to each `Segment`.
 98        """
 99
100        self.segment_options = segment_options
101        root = segment_options.pop("root", None)
102
103        if root or note:
104            self.note = note_to_midi(root or note)  # root == note
105            self.name = midi_to_note(self.note)
106        else:
107            self.note = 0
108            self.name = "silence"
109
110        # phoneme
111        self.phoneme = phoneme
112        if isinstance(self.phoneme, str):
113            self.phoneme = [phoneme]
114
115        # text / lyrics
116        self.lyrics = None
117        if text:
118            self.lyrics = Lyrics(text)
119
120        # start position
121        self.start = start
122        if not self.start:
123            self.start = bpm_to_time(start_bpm, start_count, start_time_sig)
124        if randomize_start:
125            self.start = random.choice(
126                range(self.randomize_start[0], self.randomize_start[1] + 1)
127            )
128
129        # duration
130        self.duration = duration
131        if not self.duration:
132            self.duration = bpm_to_time(
133                duration_bpm, duration_count, duration_time_sig
134            )
135
136        # velocity
137        self.velocity = velocity
138        self.volume_level_per_segment = volume_level_per_segment
139        self.include_volume_level = include_volume_level
140
141        # segmentation
142        self.segment_duration = segment_duration
143        if not self.segment_duration:
144            self.segment_duration = bpm_to_time(
145                segment_bpm, segment_count, segment_time_sig
146            )
147        self.segment_duration = min(
148            SAY_SEGMENT_MAX_DURATION, self.segment_duration
149        )
150        self.segment_count = int(self.duration / self.segment_duration) + 1
151
152        # adsr
153        self.adsr = ADSR(
154            attack, decay, sustain, release, samples=self.segment_count
155        )
156
157        # randomization
158        self.randomize_phoneme = randomize_phoneme
159        self.randomize_velocity = randomize_velocity
160        self.randomize_octave = randomize_octave
161        self.randomize_segments = randomize_segments
162        self.randomize_start = randomize_start
163
164    def _get_random_phoneme(self, index: int) -> str:
165        if self.randomize_phoneme == "all":
166            return random.choice(SAY_ALL_PHONEMES)
167        elif ":" in self.randomize_phoneme:
168            voice, style = self.randomize_phoneme.split(":")
169            voice = voice.title()  # allow for lowercase
170            try:
171                return random.choice(SAY_PHONEME_VOICE_CLASSES[voice][style])
172            except KeyError:
173                raise ValueError(
174                    f"Invalid `voice` '{voice}' or `style` '{style}'. "
175                    f"`voice` must be one of: {', '.join(SAY_TUNED_VOICES)}. "
176                    f"`style` must be one of: {', '.join(SAY_PHONEME_CLASSES)}"
177                )
178        else:
179            return random.choice(
180                [c.strip() for c in self.randomize_phoneme.split(",")]
181            )
182
183    def _get_phoneme(self, index: int) -> str:
184        # handle phoneme randomization
185        if self.randomize_phoneme:
186            return self._get_random_phoneme(index)
187
188        if self.lyrics:
189            return self.lyrics.get_phoneme_for_index(index)
190
191        return self.phoneme[index % len(self.phoneme)]
192
193    def _get_note(self) -> int:
194        if len(self.randomize_octave):
195            return (random.choice(self.randomize_octave) * 12) + note_to_midi(
196                self.note
197            )
198        return self.note
199
200    def _get_velocity(self) -> int:
201        if self.randomize_velocity:
202            return random.choice(
203                range(
204                    self.randomize_velocity[0], self.randomize_velocity[1] + 1
205                )
206            )
207        return self.velocity
208
209    def _get_segment_kwargs(self, **kwargs) -> Dict[str, Any]:
210        opts = copy.copy(self.segment_options)
211        opts.update(kwargs)
212        return opts
213
214    def _randomize_segment(self, note, velocity):
215
216        # optionally randomize every segment.
217        if "octave" in self.randomize_segments and self.randomize_octave:
218            note = self._get_note()
219        if "velocity" in self.randomize_segments and self.randomize_velocity:
220            velocity = self._get_velocity()
221        return note, velocity
222
223    def _get_segment(
224        self,
225        index: int = 0,
226        note: str = None,
227        velocity: int = 0,
228        duration: Optional[float] = None,
229        **kwargs,
230    ) -> Segment:
231        """
232        Generate each segment of the Note, applying randomization, ADSR settings, phoneme generation, and other Segment parameters.
233        """
234        note, velocity = self._randomize_segment(note, velocity)
235        return Segment(
236            note=note,
237            velocity=velocity * self.adsr.get_value(index),
238            phoneme=self._get_phoneme(index),
239            duration=duration or self.segment_duration,
240            include_volume_level=self.include_volume_level
241            and index % self.volume_level_per_segment == 0,
242            **self._get_segment_kwargs(**kwargs),
243        )
244
245    @property
246    def segments(self) -> List[Segment]:
247        """
248        The generated list of `Segment` within the note.
249        """
250        _segments = []
251        # get initial value of note + velocity + phoneme
252        note = self._get_note()
253        velocity = self._get_velocity()
254
255        if self.start and self.start > 0:
256            # create multiple silent phonemes which add up to the desired start position
257            start_breaks = list(
258                frange(0.0, self.start, SAY_SEGMENT_SILENCE_DURATION, 10)
259            )
260            for index, total_start_time in enumerate(start_breaks[1:]):
261                segment = self._get_segment(index, type="silence", velocity=0)
262                _segments.append(segment)
263
264            if total_start_time < self.start:
265                # add final step of silence
266                _segments.append(
267                    self._get_segment(index + 1, type="silence", velocity=0)
268                )
269
270        # create multiple phonemes which add up to the phoneme_duration
271        segment_breaks = list(
272            frange(0.0, self.duration, self.segment_duration, 10)
273        )
274        total_time = 0
275        index = 0
276        for index, total_time in enumerate(segment_breaks[1:]):
277            segment = self._get_segment(
278                index,
279                note,
280                velocity,
281                type=self.segment_options.get("type", "note"),
282            )
283            _segments.append(segment)
284
285        if total_time < self.duration and len(_segments) < self.segment_count:
286
287            # add final step
288            _segments.append(
289                self._get_segment(
290                    index + 1,
291                    note,
292                    velocity,
293                    duration=self.duration - total_time,
294                    type=self.segment_options.get("type", "note"),
295                )
296            )
297        return _segments
298
299    @property
300    def n_segments(self):
301        """
302        The number of Segments in the Note.
303        """
304        return len(self.segments)
305
306    def to_text(self) -> str:
307        """
308        Render this Note as Apple SpeechSynthesis DSL text.
309        """
310        return "\n".join([s.to_text() for s in self.segments])
class Note(saysynth.core.base.SayObject):
 22class Note(SayObject):
 23    def __init__(
 24        self,
 25        note: Union[int, str] = "A3",
 26        phoneme: List[str] = ["m"],
 27        text: Optional[str] = None,
 28        # start position
 29        start: Optional[int] = 0,
 30        start_bpm: Optional[Union[float, int]] = 120,
 31        start_count: Union[str, float, int] = 0,
 32        start_time_sig: str = "4/4",
 33        # envelope
 34        velocity: int = 127,
 35        volume_level_per_segment: int = 3,
 36        include_volume_level: bool = True,
 37        attack: Union[float, int] = 0,
 38        decay: Union[float, int] = 0,
 39        sustain: Union[float, int] = 1,
 40        release: Union[float, int] = 0,
 41        # length
 42        duration: Optional[Union[float, int]] = None,
 43        duration_bpm: Optional[Union[float, int]] = 120,
 44        duration_count: Union[str, float, int] = 1,
 45        duration_time_sig: str = "4/4",
 46        # segmentation
 47        segment_duration: Optional[int] = None,
 48        segment_bpm: Optional[float] = 120,
 49        segment_count: Optional[Union[str, float, int]] = 1.0 / 8.0,
 50        segment_time_sig: Optional[str] = "4/4",
 51        # randomization
 52        randomize_phoneme: Optional[str] = None,
 53        randomize_velocity: Optional[Tuple[int, int]] = None,
 54        randomize_octave: Optional[List[int]] = [],
 55        randomize_segments: Optional[List[str]] = [],
 56        randomize_start: Optional[Tuple[int, int]] = None,
 57        **segment_options,
 58    ):
 59        f"""
 60        Generate say text for a collection of phonemes with adsr, pitch modulation, and more.
 61        Args:
 62            note: The note to play, eg "A3"
 63            phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
 64            text: The text to "sing". If provided, this will override phoneme.
 65            start: The number of milliseconds of silence to add to the beginning of the track.
 66            start_bpm: A BPM to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 67            start_count: A count to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 68            start_time_sig: A time signature to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 69            velocity: The midi velocity value to use for this note (0-127).
 70            volume_level_per_segment: The number of segments after which volume settings will be rendered (eg: "3" would mean one segment would have volume settings and then the next two would not, etc.))
 71            include_volume_level: Whether or not to the render the volume settings for this note.
 72                         Over-rendering these settings can lead to audio drop-outs.
 73            attack: A value between 0 and 1 representing the ratio of the note's total length during which the note will increase to it's amplitude.
 74                    A lower number is a faster attack while a larger number is a slow attack. (see `midi_utils.ADSR`).
 75            decay:  A value between 0 and 1 representing the ratio of note's total length during which the note will decrease in amplitude from the max amplitude to the sustain level. (see `midi_utils.ADSR`).
 76            sustain: A value between 0 and 1 representing the relative volume level of the sustain phase (0 is the min volume_range, 1 is the max).
 77            release: A value between 0 and 1 representing the ratio of the note's total length during which the note will decrease in amplitude from the sustain level to zero.
 78            duration: The duration of this note in number of milliseconds.
 79            duration_bpm: A BPM to use when calculating the note's duration.
 80            duration_count: A count to use when calculating the note's duration.
 81            duration_time_sig: A time signature to use when calculating the note's duration.
 82            segment_duration: The duration of each `Segment` of this note in number of milliseconds.
 83            segment_bpm: A BPM to use when calculating the duration of each `Segment` in this note.
 84            segment_count: A count to use when calculating duration of each `Segment` of this note
 85            segment_time_sig: A time signature to use when calculating the duration of each `Segment` in this note.
 86            randomize_phoneme: Randomize the phoneme for every note. "
 87                If "all" is passed, all valid phonemes will be used.
 88                Alternatively pass a list of phonemes (eg 'm,l,n') or a voice and style, eg: Fred:drone.
 89                Valid voices include: {', '.join(SAY_TUNED_VOICES)}.
 90                Valid styles include: {', '.join(SAY_PHONEME_CLASSES)}.
 91            randomize_velocity: Randomize a note's velocity by supplying a min and max midi velocity (eg: -rv [40, 120])
 92            randomize_octave: A list of octaves to randomly vary between.
 93                              You can weight certain octaves by providing them multiple times
 94                              (eg: [0,0-1,-1,2] would prefer the root octave first, one octave down second, and two octaves up third).
 95            randomize_segments: Randomize the 'phoneme', 'octave', and/or 'velocity' according to each respective randomization setting.
 96            randomize_start: Randomize the number of milliseconds to silence to add before the say text.
 97                             The first number passed in is the minimum of the range, the second is the max (eg: [4000, 12000] would set a range for four to twelve seconds).
 98            **segment_options: Additional options to pass to each `Segment`.
 99        """
100
101        self.segment_options = segment_options
102        root = segment_options.pop("root", None)
103
104        if root or note:
105            self.note = note_to_midi(root or note)  # root == note
106            self.name = midi_to_note(self.note)
107        else:
108            self.note = 0
109            self.name = "silence"
110
111        # phoneme
112        self.phoneme = phoneme
113        if isinstance(self.phoneme, str):
114            self.phoneme = [phoneme]
115
116        # text / lyrics
117        self.lyrics = None
118        if text:
119            self.lyrics = Lyrics(text)
120
121        # start position
122        self.start = start
123        if not self.start:
124            self.start = bpm_to_time(start_bpm, start_count, start_time_sig)
125        if randomize_start:
126            self.start = random.choice(
127                range(self.randomize_start[0], self.randomize_start[1] + 1)
128            )
129
130        # duration
131        self.duration = duration
132        if not self.duration:
133            self.duration = bpm_to_time(
134                duration_bpm, duration_count, duration_time_sig
135            )
136
137        # velocity
138        self.velocity = velocity
139        self.volume_level_per_segment = volume_level_per_segment
140        self.include_volume_level = include_volume_level
141
142        # segmentation
143        self.segment_duration = segment_duration
144        if not self.segment_duration:
145            self.segment_duration = bpm_to_time(
146                segment_bpm, segment_count, segment_time_sig
147            )
148        self.segment_duration = min(
149            SAY_SEGMENT_MAX_DURATION, self.segment_duration
150        )
151        self.segment_count = int(self.duration / self.segment_duration) + 1
152
153        # adsr
154        self.adsr = ADSR(
155            attack, decay, sustain, release, samples=self.segment_count
156        )
157
158        # randomization
159        self.randomize_phoneme = randomize_phoneme
160        self.randomize_velocity = randomize_velocity
161        self.randomize_octave = randomize_octave
162        self.randomize_segments = randomize_segments
163        self.randomize_start = randomize_start
164
165    def _get_random_phoneme(self, index: int) -> str:
166        if self.randomize_phoneme == "all":
167            return random.choice(SAY_ALL_PHONEMES)
168        elif ":" in self.randomize_phoneme:
169            voice, style = self.randomize_phoneme.split(":")
170            voice = voice.title()  # allow for lowercase
171            try:
172                return random.choice(SAY_PHONEME_VOICE_CLASSES[voice][style])
173            except KeyError:
174                raise ValueError(
175                    f"Invalid `voice` '{voice}' or `style` '{style}'. "
176                    f"`voice` must be one of: {', '.join(SAY_TUNED_VOICES)}. "
177                    f"`style` must be one of: {', '.join(SAY_PHONEME_CLASSES)}"
178                )
179        else:
180            return random.choice(
181                [c.strip() for c in self.randomize_phoneme.split(",")]
182            )
183
184    def _get_phoneme(self, index: int) -> str:
185        # handle phoneme randomization
186        if self.randomize_phoneme:
187            return self._get_random_phoneme(index)
188
189        if self.lyrics:
190            return self.lyrics.get_phoneme_for_index(index)
191
192        return self.phoneme[index % len(self.phoneme)]
193
194    def _get_note(self) -> int:
195        if len(self.randomize_octave):
196            return (random.choice(self.randomize_octave) * 12) + note_to_midi(
197                self.note
198            )
199        return self.note
200
201    def _get_velocity(self) -> int:
202        if self.randomize_velocity:
203            return random.choice(
204                range(
205                    self.randomize_velocity[0], self.randomize_velocity[1] + 1
206                )
207            )
208        return self.velocity
209
210    def _get_segment_kwargs(self, **kwargs) -> Dict[str, Any]:
211        opts = copy.copy(self.segment_options)
212        opts.update(kwargs)
213        return opts
214
215    def _randomize_segment(self, note, velocity):
216
217        # optionally randomize every segment.
218        if "octave" in self.randomize_segments and self.randomize_octave:
219            note = self._get_note()
220        if "velocity" in self.randomize_segments and self.randomize_velocity:
221            velocity = self._get_velocity()
222        return note, velocity
223
224    def _get_segment(
225        self,
226        index: int = 0,
227        note: str = None,
228        velocity: int = 0,
229        duration: Optional[float] = None,
230        **kwargs,
231    ) -> Segment:
232        """
233        Generate each segment of the Note, applying randomization, ADSR settings, phoneme generation, and other Segment parameters.
234        """
235        note, velocity = self._randomize_segment(note, velocity)
236        return Segment(
237            note=note,
238            velocity=velocity * self.adsr.get_value(index),
239            phoneme=self._get_phoneme(index),
240            duration=duration or self.segment_duration,
241            include_volume_level=self.include_volume_level
242            and index % self.volume_level_per_segment == 0,
243            **self._get_segment_kwargs(**kwargs),
244        )
245
246    @property
247    def segments(self) -> List[Segment]:
248        """
249        The generated list of `Segment` within the note.
250        """
251        _segments = []
252        # get initial value of note + velocity + phoneme
253        note = self._get_note()
254        velocity = self._get_velocity()
255
256        if self.start and self.start > 0:
257            # create multiple silent phonemes which add up to the desired start position
258            start_breaks = list(
259                frange(0.0, self.start, SAY_SEGMENT_SILENCE_DURATION, 10)
260            )
261            for index, total_start_time in enumerate(start_breaks[1:]):
262                segment = self._get_segment(index, type="silence", velocity=0)
263                _segments.append(segment)
264
265            if total_start_time < self.start:
266                # add final step of silence
267                _segments.append(
268                    self._get_segment(index + 1, type="silence", velocity=0)
269                )
270
271        # create multiple phonemes which add up to the phoneme_duration
272        segment_breaks = list(
273            frange(0.0, self.duration, self.segment_duration, 10)
274        )
275        total_time = 0
276        index = 0
277        for index, total_time in enumerate(segment_breaks[1:]):
278            segment = self._get_segment(
279                index,
280                note,
281                velocity,
282                type=self.segment_options.get("type", "note"),
283            )
284            _segments.append(segment)
285
286        if total_time < self.duration and len(_segments) < self.segment_count:
287
288            # add final step
289            _segments.append(
290                self._get_segment(
291                    index + 1,
292                    note,
293                    velocity,
294                    duration=self.duration - total_time,
295                    type=self.segment_options.get("type", "note"),
296                )
297            )
298        return _segments
299
300    @property
301    def n_segments(self):
302        """
303        The number of Segments in the Note.
304        """
305        return len(self.segments)
306
307    def to_text(self) -> str:
308        """
309        Render this Note as Apple SpeechSynthesis DSL text.
310        """
311        return "\n".join([s.to_text() for s in self.segments])
Note( note: Union[int, str] = 'A3', phoneme: List[str] = ['m'], text: Optional[str] = None, start: Optional[int] = 0, start_bpm: Union[int, float, NoneType] = 120, start_count: Union[str, float, int] = 0, start_time_sig: str = '4/4', velocity: int = 127, volume_level_per_segment: int = 3, include_volume_level: bool = True, attack: Union[float, int] = 0, decay: Union[float, int] = 0, sustain: Union[float, int] = 1, release: Union[float, int] = 0, duration: Union[int, float, NoneType] = None, duration_bpm: Union[int, float, NoneType] = 120, duration_count: Union[str, float, int] = 1, duration_time_sig: str = '4/4', segment_duration: Optional[int] = None, segment_bpm: Optional[float] = 120, segment_count: Union[str, float, int, NoneType] = 0.125, segment_time_sig: Optional[str] = '4/4', randomize_phoneme: Optional[str] = None, randomize_velocity: Optional[Tuple[int, int]] = None, randomize_octave: Optional[List[int]] = [], randomize_segments: Optional[List[str]] = [], randomize_start: Optional[Tuple[int, int]] = None, **segment_options)
 23    def __init__(
 24        self,
 25        note: Union[int, str] = "A3",
 26        phoneme: List[str] = ["m"],
 27        text: Optional[str] = None,
 28        # start position
 29        start: Optional[int] = 0,
 30        start_bpm: Optional[Union[float, int]] = 120,
 31        start_count: Union[str, float, int] = 0,
 32        start_time_sig: str = "4/4",
 33        # envelope
 34        velocity: int = 127,
 35        volume_level_per_segment: int = 3,
 36        include_volume_level: bool = True,
 37        attack: Union[float, int] = 0,
 38        decay: Union[float, int] = 0,
 39        sustain: Union[float, int] = 1,
 40        release: Union[float, int] = 0,
 41        # length
 42        duration: Optional[Union[float, int]] = None,
 43        duration_bpm: Optional[Union[float, int]] = 120,
 44        duration_count: Union[str, float, int] = 1,
 45        duration_time_sig: str = "4/4",
 46        # segmentation
 47        segment_duration: Optional[int] = None,
 48        segment_bpm: Optional[float] = 120,
 49        segment_count: Optional[Union[str, float, int]] = 1.0 / 8.0,
 50        segment_time_sig: Optional[str] = "4/4",
 51        # randomization
 52        randomize_phoneme: Optional[str] = None,
 53        randomize_velocity: Optional[Tuple[int, int]] = None,
 54        randomize_octave: Optional[List[int]] = [],
 55        randomize_segments: Optional[List[str]] = [],
 56        randomize_start: Optional[Tuple[int, int]] = None,
 57        **segment_options,
 58    ):
 59        f"""
 60        Generate say text for a collection of phonemes with adsr, pitch modulation, and more.
 61        Args:
 62            note: The note to play, eg "A3"
 63            phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1).
 64            text: The text to "sing". If provided, this will override phoneme.
 65            start: The number of milliseconds of silence to add to the beginning of the track.
 66            start_bpm: A BPM to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 67            start_count: A count to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 68            start_time_sig: A time signature to use when calculating the number of milliseconds of silence to add to the beginning of the track.
 69            velocity: The midi velocity value to use for this note (0-127).
 70            volume_level_per_segment: The number of segments after which volume settings will be rendered (eg: "3" would mean one segment would have volume settings and then the next two would not, etc.))
 71            include_volume_level: Whether or not to the render the volume settings for this note.
 72                         Over-rendering these settings can lead to audio drop-outs.
 73            attack: A value between 0 and 1 representing the ratio of the note's total length during which the note will increase to it's amplitude.
 74                    A lower number is a faster attack while a larger number is a slow attack. (see `midi_utils.ADSR`).
 75            decay:  A value between 0 and 1 representing the ratio of note's total length during which the note will decrease in amplitude from the max amplitude to the sustain level. (see `midi_utils.ADSR`).
 76            sustain: A value between 0 and 1 representing the relative volume level of the sustain phase (0 is the min volume_range, 1 is the max).
 77            release: A value between 0 and 1 representing the ratio of the note's total length during which the note will decrease in amplitude from the sustain level to zero.
 78            duration: The duration of this note in number of milliseconds.
 79            duration_bpm: A BPM to use when calculating the note's duration.
 80            duration_count: A count to use when calculating the note's duration.
 81            duration_time_sig: A time signature to use when calculating the note's duration.
 82            segment_duration: The duration of each `Segment` of this note in number of milliseconds.
 83            segment_bpm: A BPM to use when calculating the duration of each `Segment` in this note.
 84            segment_count: A count to use when calculating duration of each `Segment` of this note
 85            segment_time_sig: A time signature to use when calculating the duration of each `Segment` in this note.
 86            randomize_phoneme: Randomize the phoneme for every note. "
 87                If "all" is passed, all valid phonemes will be used.
 88                Alternatively pass a list of phonemes (eg 'm,l,n') or a voice and style, eg: Fred:drone.
 89                Valid voices include: {', '.join(SAY_TUNED_VOICES)}.
 90                Valid styles include: {', '.join(SAY_PHONEME_CLASSES)}.
 91            randomize_velocity: Randomize a note's velocity by supplying a min and max midi velocity (eg: -rv [40, 120])
 92            randomize_octave: A list of octaves to randomly vary between.
 93                              You can weight certain octaves by providing them multiple times
 94                              (eg: [0,0-1,-1,2] would prefer the root octave first, one octave down second, and two octaves up third).
 95            randomize_segments: Randomize the 'phoneme', 'octave', and/or 'velocity' according to each respective randomization setting.
 96            randomize_start: Randomize the number of milliseconds to silence to add before the say text.
 97                             The first number passed in is the minimum of the range, the second is the max (eg: [4000, 12000] would set a range for four to twelve seconds).
 98            **segment_options: Additional options to pass to each `Segment`.
 99        """
100
101        self.segment_options = segment_options
102        root = segment_options.pop("root", None)
103
104        if root or note:
105            self.note = note_to_midi(root or note)  # root == note
106            self.name = midi_to_note(self.note)
107        else:
108            self.note = 0
109            self.name = "silence"
110
111        # phoneme
112        self.phoneme = phoneme
113        if isinstance(self.phoneme, str):
114            self.phoneme = [phoneme]
115
116        # text / lyrics
117        self.lyrics = None
118        if text:
119            self.lyrics = Lyrics(text)
120
121        # start position
122        self.start = start
123        if not self.start:
124            self.start = bpm_to_time(start_bpm, start_count, start_time_sig)
125        if randomize_start:
126            self.start = random.choice(
127                range(self.randomize_start[0], self.randomize_start[1] + 1)
128            )
129
130        # duration
131        self.duration = duration
132        if not self.duration:
133            self.duration = bpm_to_time(
134                duration_bpm, duration_count, duration_time_sig
135            )
136
137        # velocity
138        self.velocity = velocity
139        self.volume_level_per_segment = volume_level_per_segment
140        self.include_volume_level = include_volume_level
141
142        # segmentation
143        self.segment_duration = segment_duration
144        if not self.segment_duration:
145            self.segment_duration = bpm_to_time(
146                segment_bpm, segment_count, segment_time_sig
147            )
148        self.segment_duration = min(
149            SAY_SEGMENT_MAX_DURATION, self.segment_duration
150        )
151        self.segment_count = int(self.duration / self.segment_duration) + 1
152
153        # adsr
154        self.adsr = ADSR(
155            attack, decay, sustain, release, samples=self.segment_count
156        )
157
158        # randomization
159        self.randomize_phoneme = randomize_phoneme
160        self.randomize_velocity = randomize_velocity
161        self.randomize_octave = randomize_octave
162        self.randomize_segments = randomize_segments
163        self.randomize_start = randomize_start

The generated list of Segment within the note.

n_segments

The number of Segments in the Note.

def to_text(self) -> str:
307    def to_text(self) -> str:
308        """
309        Render this Note as Apple SpeechSynthesis DSL text.
310        """
311        return "\n".join([s.to_text() for s in self.segments])

Render this Note as Apple SpeechSynthesis DSL text.