saysynth.core.note
The Note class creates a list of Segment
with configurable
duration, pitch, phonemes, and volume envelope.
1""" 2The Note class creates a list of `Segment` with configurable 3duration, pitch, phonemes, and volume envelope. 4<center><img src="/assets/img/coffee.png"></img></center> 5""" 6import copy 7import random 8from typing import Any, Dict, List, Optional, Tuple, Union 9 10from midi_utils import ADSR, midi_to_note, note_to_midi 11 12from ..constants import (SAY_ALL_PHONEMES, SAY_PHONEME_CLASSES, 13 SAY_PHONEME_VOICE_CLASSES, SAY_SEGMENT_MAX_DURATION, 14 SAY_SEGMENT_SILENCE_DURATION, SAY_TUNED_VOICES) 15from ..utils import bpm_to_time, frange 16from .base import SayObject 17from .lyrics import Lyrics 18from .segment import Segment 19 20 21class Note(SayObject): 22 def __init__( 23 self, 24 note: Union[int, str] = "A3", 25 phoneme: List[str] = ["m"], 26 text: Optional[str] = None, 27 # start position 28 start: Optional[int] = 0, 29 start_bpm: Optional[Union[float, int]] = 120, 30 start_count: Union[str, float, int] = 0, 31 start_time_sig: str = "4/4", 32 # envelope 33 velocity: int = 127, 34 volume_level_per_segment: int = 3, 35 include_volume_level: bool = True, 36 attack: Union[float, int] = 0, 37 decay: Union[float, int] = 0, 38 sustain: Union[float, int] = 1, 39 release: Union[float, int] = 0, 40 # length 41 duration: Optional[Union[float, int]] = None, 42 duration_bpm: Optional[Union[float, int]] = 120, 43 duration_count: Union[str, float, int] = 1, 44 duration_time_sig: str = "4/4", 45 # segmentation 46 segment_duration: Optional[int] = None, 47 segment_bpm: Optional[float] = 120, 48 segment_count: Optional[Union[str, float, int]] = 1.0 / 8.0, 49 segment_time_sig: Optional[str] = "4/4", 50 # randomization 51 randomize_phoneme: Optional[str] = None, 52 randomize_velocity: Optional[Tuple[int, int]] = None, 53 randomize_octave: Optional[List[int]] = [], 54 randomize_segments: Optional[List[str]] = [], 55 randomize_start: Optional[Tuple[int, int]] = None, 56 **segment_options, 57 ): 58 f""" 59 Generate say text for a collection of phonemes with adsr, pitch modulation, and more. 60 Args: 61 note: The note to play, eg "A3" 62 phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 63 text: The text to "sing". If provided, this will override phoneme. 64 start: The number of milliseconds of silence to add to the beginning of the track. 65 start_bpm: A BPM to use when calculating the number of milliseconds of silence to add to the beginning of the track. 66 start_count: A count to use when calculating the number of milliseconds of silence to add to the beginning of the track. 67 start_time_sig: A time signature to use when calculating the number of milliseconds of silence to add to the beginning of the track. 68 velocity: The midi velocity value to use for this note (0-127). 69 volume_level_per_segment: The number of segments after which volume settings will be rendered (eg: "3" would mean one segment would have volume settings and then the next two would not, etc.)) 70 include_volume_level: Whether or not to the render the volume settings for this note. 71 Over-rendering these settings can lead to audio drop-outs. 72 attack: A value between 0 and 1 representing the ratio of the note's total length during which the note will increase to it's amplitude. 73 A lower number is a faster attack while a larger number is a slow attack. (see `midi_utils.ADSR`). 74 decay: A value between 0 and 1 representing the ratio of note's total length during which the note will decrease in amplitude from the max amplitude to the sustain level. (see `midi_utils.ADSR`). 75 sustain: A value between 0 and 1 representing the relative volume level of the sustain phase (0 is the min volume_range, 1 is the max). 76 release: A value between 0 and 1 representing the ratio of the note's total length during which the note will decrease in amplitude from the sustain level to zero. 77 duration: The duration of this note in number of milliseconds. 78 duration_bpm: A BPM to use when calculating the note's duration. 79 duration_count: A count to use when calculating the note's duration. 80 duration_time_sig: A time signature to use when calculating the note's duration. 81 segment_duration: The duration of each `Segment` of this note in number of milliseconds. 82 segment_bpm: A BPM to use when calculating the duration of each `Segment` in this note. 83 segment_count: A count to use when calculating duration of each `Segment` of this note 84 segment_time_sig: A time signature to use when calculating the duration of each `Segment` in this note. 85 randomize_phoneme: Randomize the phoneme for every note. " 86 If "all" is passed, all valid phonemes will be used. 87 Alternatively pass a list of phonemes (eg 'm,l,n') or a voice and style, eg: Fred:drone. 88 Valid voices include: {', '.join(SAY_TUNED_VOICES)}. 89 Valid styles include: {', '.join(SAY_PHONEME_CLASSES)}. 90 randomize_velocity: Randomize a note's velocity by supplying a min and max midi velocity (eg: -rv [40, 120]) 91 randomize_octave: A list of octaves to randomly vary between. 92 You can weight certain octaves by providing them multiple times 93 (eg: [0,0-1,-1,2] would prefer the root octave first, one octave down second, and two octaves up third). 94 randomize_segments: Randomize the 'phoneme', 'octave', and/or 'velocity' according to each respective randomization setting. 95 randomize_start: Randomize the number of milliseconds to silence to add before the say text. 96 The first number passed in is the minimum of the range, the second is the max (eg: [4000, 12000] would set a range for four to twelve seconds). 97 **segment_options: Additional options to pass to each `Segment`. 98 """ 99 100 self.segment_options = segment_options 101 root = segment_options.pop("root", None) 102 103 if root or note: 104 self.note = note_to_midi(root or note) # root == note 105 self.name = midi_to_note(self.note) 106 else: 107 self.note = 0 108 self.name = "silence" 109 110 # phoneme 111 self.phoneme = phoneme 112 if isinstance(self.phoneme, str): 113 self.phoneme = [phoneme] 114 115 # text / lyrics 116 self.lyrics = None 117 if text: 118 self.lyrics = Lyrics(text) 119 120 # start position 121 self.start = start 122 if not self.start: 123 self.start = bpm_to_time(start_bpm, start_count, start_time_sig) 124 if randomize_start: 125 self.start = random.choice( 126 range(self.randomize_start[0], self.randomize_start[1] + 1) 127 ) 128 129 # duration 130 self.duration = duration 131 if not self.duration: 132 self.duration = bpm_to_time( 133 duration_bpm, duration_count, duration_time_sig 134 ) 135 136 # velocity 137 self.velocity = velocity 138 self.volume_level_per_segment = volume_level_per_segment 139 self.include_volume_level = include_volume_level 140 141 # segmentation 142 self.segment_duration = segment_duration 143 if not self.segment_duration: 144 self.segment_duration = bpm_to_time( 145 segment_bpm, segment_count, segment_time_sig 146 ) 147 self.segment_duration = min( 148 SAY_SEGMENT_MAX_DURATION, self.segment_duration 149 ) 150 self.segment_count = int(self.duration / self.segment_duration) + 1 151 152 # adsr 153 self.adsr = ADSR( 154 attack, decay, sustain, release, samples=self.segment_count 155 ) 156 157 # randomization 158 self.randomize_phoneme = randomize_phoneme 159 self.randomize_velocity = randomize_velocity 160 self.randomize_octave = randomize_octave 161 self.randomize_segments = randomize_segments 162 self.randomize_start = randomize_start 163 164 def _get_random_phoneme(self, index: int) -> str: 165 if self.randomize_phoneme == "all": 166 return random.choice(SAY_ALL_PHONEMES) 167 elif ":" in self.randomize_phoneme: 168 voice, style = self.randomize_phoneme.split(":") 169 voice = voice.title() # allow for lowercase 170 try: 171 return random.choice(SAY_PHONEME_VOICE_CLASSES[voice][style]) 172 except KeyError: 173 raise ValueError( 174 f"Invalid `voice` '{voice}' or `style` '{style}'. " 175 f"`voice` must be one of: {', '.join(SAY_TUNED_VOICES)}. " 176 f"`style` must be one of: {', '.join(SAY_PHONEME_CLASSES)}" 177 ) 178 else: 179 return random.choice( 180 [c.strip() for c in self.randomize_phoneme.split(",")] 181 ) 182 183 def _get_phoneme(self, index: int) -> str: 184 # handle phoneme randomization 185 if self.randomize_phoneme: 186 return self._get_random_phoneme(index) 187 188 if self.lyrics: 189 return self.lyrics.get_phoneme_for_index(index) 190 191 return self.phoneme[index % len(self.phoneme)] 192 193 def _get_note(self) -> int: 194 if len(self.randomize_octave): 195 return (random.choice(self.randomize_octave) * 12) + note_to_midi( 196 self.note 197 ) 198 return self.note 199 200 def _get_velocity(self) -> int: 201 if self.randomize_velocity: 202 return random.choice( 203 range( 204 self.randomize_velocity[0], self.randomize_velocity[1] + 1 205 ) 206 ) 207 return self.velocity 208 209 def _get_segment_kwargs(self, **kwargs) -> Dict[str, Any]: 210 opts = copy.copy(self.segment_options) 211 opts.update(kwargs) 212 return opts 213 214 def _randomize_segment(self, note, velocity): 215 216 # optionally randomize every segment. 217 if "octave" in self.randomize_segments and self.randomize_octave: 218 note = self._get_note() 219 if "velocity" in self.randomize_segments and self.randomize_velocity: 220 velocity = self._get_velocity() 221 return note, velocity 222 223 def _get_segment( 224 self, 225 index: int = 0, 226 note: str = None, 227 velocity: int = 0, 228 duration: Optional[float] = None, 229 **kwargs, 230 ) -> Segment: 231 """ 232 Generate each segment of the Note, applying randomization, ADSR settings, phoneme generation, and other Segment parameters. 233 """ 234 note, velocity = self._randomize_segment(note, velocity) 235 return Segment( 236 note=note, 237 velocity=velocity * self.adsr.get_value(index), 238 phoneme=self._get_phoneme(index), 239 duration=duration or self.segment_duration, 240 include_volume_level=self.include_volume_level 241 and index % self.volume_level_per_segment == 0, 242 **self._get_segment_kwargs(**kwargs), 243 ) 244 245 @property 246 def segments(self) -> List[Segment]: 247 """ 248 The generated list of `Segment` within the note. 249 """ 250 _segments = [] 251 # get initial value of note + velocity + phoneme 252 note = self._get_note() 253 velocity = self._get_velocity() 254 255 if self.start and self.start > 0: 256 # create multiple silent phonemes which add up to the desired start position 257 start_breaks = list( 258 frange(0.0, self.start, SAY_SEGMENT_SILENCE_DURATION, 10) 259 ) 260 for index, total_start_time in enumerate(start_breaks[1:]): 261 segment = self._get_segment(index, type="silence", velocity=0) 262 _segments.append(segment) 263 264 if total_start_time < self.start: 265 # add final step of silence 266 _segments.append( 267 self._get_segment(index + 1, type="silence", velocity=0) 268 ) 269 270 # create multiple phonemes which add up to the phoneme_duration 271 segment_breaks = list( 272 frange(0.0, self.duration, self.segment_duration, 10) 273 ) 274 total_time = 0 275 index = 0 276 for index, total_time in enumerate(segment_breaks[1:]): 277 segment = self._get_segment( 278 index, 279 note, 280 velocity, 281 type=self.segment_options.get("type", "note"), 282 ) 283 _segments.append(segment) 284 285 if total_time < self.duration and len(_segments) < self.segment_count: 286 287 # add final step 288 _segments.append( 289 self._get_segment( 290 index + 1, 291 note, 292 velocity, 293 duration=self.duration - total_time, 294 type=self.segment_options.get("type", "note"), 295 ) 296 ) 297 return _segments 298 299 @property 300 def n_segments(self): 301 """ 302 The number of Segments in the Note. 303 """ 304 return len(self.segments) 305 306 def to_text(self) -> str: 307 """ 308 Render this Note as Apple SpeechSynthesis DSL text. 309 """ 310 return "\n".join([s.to_text() for s in self.segments])
22class Note(SayObject): 23 def __init__( 24 self, 25 note: Union[int, str] = "A3", 26 phoneme: List[str] = ["m"], 27 text: Optional[str] = None, 28 # start position 29 start: Optional[int] = 0, 30 start_bpm: Optional[Union[float, int]] = 120, 31 start_count: Union[str, float, int] = 0, 32 start_time_sig: str = "4/4", 33 # envelope 34 velocity: int = 127, 35 volume_level_per_segment: int = 3, 36 include_volume_level: bool = True, 37 attack: Union[float, int] = 0, 38 decay: Union[float, int] = 0, 39 sustain: Union[float, int] = 1, 40 release: Union[float, int] = 0, 41 # length 42 duration: Optional[Union[float, int]] = None, 43 duration_bpm: Optional[Union[float, int]] = 120, 44 duration_count: Union[str, float, int] = 1, 45 duration_time_sig: str = "4/4", 46 # segmentation 47 segment_duration: Optional[int] = None, 48 segment_bpm: Optional[float] = 120, 49 segment_count: Optional[Union[str, float, int]] = 1.0 / 8.0, 50 segment_time_sig: Optional[str] = "4/4", 51 # randomization 52 randomize_phoneme: Optional[str] = None, 53 randomize_velocity: Optional[Tuple[int, int]] = None, 54 randomize_octave: Optional[List[int]] = [], 55 randomize_segments: Optional[List[str]] = [], 56 randomize_start: Optional[Tuple[int, int]] = None, 57 **segment_options, 58 ): 59 f""" 60 Generate say text for a collection of phonemes with adsr, pitch modulation, and more. 61 Args: 62 note: The note to play, eg "A3" 63 phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 64 text: The text to "sing". If provided, this will override phoneme. 65 start: The number of milliseconds of silence to add to the beginning of the track. 66 start_bpm: A BPM to use when calculating the number of milliseconds of silence to add to the beginning of the track. 67 start_count: A count to use when calculating the number of milliseconds of silence to add to the beginning of the track. 68 start_time_sig: A time signature to use when calculating the number of milliseconds of silence to add to the beginning of the track. 69 velocity: The midi velocity value to use for this note (0-127). 70 volume_level_per_segment: The number of segments after which volume settings will be rendered (eg: "3" would mean one segment would have volume settings and then the next two would not, etc.)) 71 include_volume_level: Whether or not to the render the volume settings for this note. 72 Over-rendering these settings can lead to audio drop-outs. 73 attack: A value between 0 and 1 representing the ratio of the note's total length during which the note will increase to it's amplitude. 74 A lower number is a faster attack while a larger number is a slow attack. (see `midi_utils.ADSR`). 75 decay: A value between 0 and 1 representing the ratio of note's total length during which the note will decrease in amplitude from the max amplitude to the sustain level. (see `midi_utils.ADSR`). 76 sustain: A value between 0 and 1 representing the relative volume level of the sustain phase (0 is the min volume_range, 1 is the max). 77 release: A value between 0 and 1 representing the ratio of the note's total length during which the note will decrease in amplitude from the sustain level to zero. 78 duration: The duration of this note in number of milliseconds. 79 duration_bpm: A BPM to use when calculating the note's duration. 80 duration_count: A count to use when calculating the note's duration. 81 duration_time_sig: A time signature to use when calculating the note's duration. 82 segment_duration: The duration of each `Segment` of this note in number of milliseconds. 83 segment_bpm: A BPM to use when calculating the duration of each `Segment` in this note. 84 segment_count: A count to use when calculating duration of each `Segment` of this note 85 segment_time_sig: A time signature to use when calculating the duration of each `Segment` in this note. 86 randomize_phoneme: Randomize the phoneme for every note. " 87 If "all" is passed, all valid phonemes will be used. 88 Alternatively pass a list of phonemes (eg 'm,l,n') or a voice and style, eg: Fred:drone. 89 Valid voices include: {', '.join(SAY_TUNED_VOICES)}. 90 Valid styles include: {', '.join(SAY_PHONEME_CLASSES)}. 91 randomize_velocity: Randomize a note's velocity by supplying a min and max midi velocity (eg: -rv [40, 120]) 92 randomize_octave: A list of octaves to randomly vary between. 93 You can weight certain octaves by providing them multiple times 94 (eg: [0,0-1,-1,2] would prefer the root octave first, one octave down second, and two octaves up third). 95 randomize_segments: Randomize the 'phoneme', 'octave', and/or 'velocity' according to each respective randomization setting. 96 randomize_start: Randomize the number of milliseconds to silence to add before the say text. 97 The first number passed in is the minimum of the range, the second is the max (eg: [4000, 12000] would set a range for four to twelve seconds). 98 **segment_options: Additional options to pass to each `Segment`. 99 """ 100 101 self.segment_options = segment_options 102 root = segment_options.pop("root", None) 103 104 if root or note: 105 self.note = note_to_midi(root or note) # root == note 106 self.name = midi_to_note(self.note) 107 else: 108 self.note = 0 109 self.name = "silence" 110 111 # phoneme 112 self.phoneme = phoneme 113 if isinstance(self.phoneme, str): 114 self.phoneme = [phoneme] 115 116 # text / lyrics 117 self.lyrics = None 118 if text: 119 self.lyrics = Lyrics(text) 120 121 # start position 122 self.start = start 123 if not self.start: 124 self.start = bpm_to_time(start_bpm, start_count, start_time_sig) 125 if randomize_start: 126 self.start = random.choice( 127 range(self.randomize_start[0], self.randomize_start[1] + 1) 128 ) 129 130 # duration 131 self.duration = duration 132 if not self.duration: 133 self.duration = bpm_to_time( 134 duration_bpm, duration_count, duration_time_sig 135 ) 136 137 # velocity 138 self.velocity = velocity 139 self.volume_level_per_segment = volume_level_per_segment 140 self.include_volume_level = include_volume_level 141 142 # segmentation 143 self.segment_duration = segment_duration 144 if not self.segment_duration: 145 self.segment_duration = bpm_to_time( 146 segment_bpm, segment_count, segment_time_sig 147 ) 148 self.segment_duration = min( 149 SAY_SEGMENT_MAX_DURATION, self.segment_duration 150 ) 151 self.segment_count = int(self.duration / self.segment_duration) + 1 152 153 # adsr 154 self.adsr = ADSR( 155 attack, decay, sustain, release, samples=self.segment_count 156 ) 157 158 # randomization 159 self.randomize_phoneme = randomize_phoneme 160 self.randomize_velocity = randomize_velocity 161 self.randomize_octave = randomize_octave 162 self.randomize_segments = randomize_segments 163 self.randomize_start = randomize_start 164 165 def _get_random_phoneme(self, index: int) -> str: 166 if self.randomize_phoneme == "all": 167 return random.choice(SAY_ALL_PHONEMES) 168 elif ":" in self.randomize_phoneme: 169 voice, style = self.randomize_phoneme.split(":") 170 voice = voice.title() # allow for lowercase 171 try: 172 return random.choice(SAY_PHONEME_VOICE_CLASSES[voice][style]) 173 except KeyError: 174 raise ValueError( 175 f"Invalid `voice` '{voice}' or `style` '{style}'. " 176 f"`voice` must be one of: {', '.join(SAY_TUNED_VOICES)}. " 177 f"`style` must be one of: {', '.join(SAY_PHONEME_CLASSES)}" 178 ) 179 else: 180 return random.choice( 181 [c.strip() for c in self.randomize_phoneme.split(",")] 182 ) 183 184 def _get_phoneme(self, index: int) -> str: 185 # handle phoneme randomization 186 if self.randomize_phoneme: 187 return self._get_random_phoneme(index) 188 189 if self.lyrics: 190 return self.lyrics.get_phoneme_for_index(index) 191 192 return self.phoneme[index % len(self.phoneme)] 193 194 def _get_note(self) -> int: 195 if len(self.randomize_octave): 196 return (random.choice(self.randomize_octave) * 12) + note_to_midi( 197 self.note 198 ) 199 return self.note 200 201 def _get_velocity(self) -> int: 202 if self.randomize_velocity: 203 return random.choice( 204 range( 205 self.randomize_velocity[0], self.randomize_velocity[1] + 1 206 ) 207 ) 208 return self.velocity 209 210 def _get_segment_kwargs(self, **kwargs) -> Dict[str, Any]: 211 opts = copy.copy(self.segment_options) 212 opts.update(kwargs) 213 return opts 214 215 def _randomize_segment(self, note, velocity): 216 217 # optionally randomize every segment. 218 if "octave" in self.randomize_segments and self.randomize_octave: 219 note = self._get_note() 220 if "velocity" in self.randomize_segments and self.randomize_velocity: 221 velocity = self._get_velocity() 222 return note, velocity 223 224 def _get_segment( 225 self, 226 index: int = 0, 227 note: str = None, 228 velocity: int = 0, 229 duration: Optional[float] = None, 230 **kwargs, 231 ) -> Segment: 232 """ 233 Generate each segment of the Note, applying randomization, ADSR settings, phoneme generation, and other Segment parameters. 234 """ 235 note, velocity = self._randomize_segment(note, velocity) 236 return Segment( 237 note=note, 238 velocity=velocity * self.adsr.get_value(index), 239 phoneme=self._get_phoneme(index), 240 duration=duration or self.segment_duration, 241 include_volume_level=self.include_volume_level 242 and index % self.volume_level_per_segment == 0, 243 **self._get_segment_kwargs(**kwargs), 244 ) 245 246 @property 247 def segments(self) -> List[Segment]: 248 """ 249 The generated list of `Segment` within the note. 250 """ 251 _segments = [] 252 # get initial value of note + velocity + phoneme 253 note = self._get_note() 254 velocity = self._get_velocity() 255 256 if self.start and self.start > 0: 257 # create multiple silent phonemes which add up to the desired start position 258 start_breaks = list( 259 frange(0.0, self.start, SAY_SEGMENT_SILENCE_DURATION, 10) 260 ) 261 for index, total_start_time in enumerate(start_breaks[1:]): 262 segment = self._get_segment(index, type="silence", velocity=0) 263 _segments.append(segment) 264 265 if total_start_time < self.start: 266 # add final step of silence 267 _segments.append( 268 self._get_segment(index + 1, type="silence", velocity=0) 269 ) 270 271 # create multiple phonemes which add up to the phoneme_duration 272 segment_breaks = list( 273 frange(0.0, self.duration, self.segment_duration, 10) 274 ) 275 total_time = 0 276 index = 0 277 for index, total_time in enumerate(segment_breaks[1:]): 278 segment = self._get_segment( 279 index, 280 note, 281 velocity, 282 type=self.segment_options.get("type", "note"), 283 ) 284 _segments.append(segment) 285 286 if total_time < self.duration and len(_segments) < self.segment_count: 287 288 # add final step 289 _segments.append( 290 self._get_segment( 291 index + 1, 292 note, 293 velocity, 294 duration=self.duration - total_time, 295 type=self.segment_options.get("type", "note"), 296 ) 297 ) 298 return _segments 299 300 @property 301 def n_segments(self): 302 """ 303 The number of Segments in the Note. 304 """ 305 return len(self.segments) 306 307 def to_text(self) -> str: 308 """ 309 Render this Note as Apple SpeechSynthesis DSL text. 310 """ 311 return "\n".join([s.to_text() for s in self.segments])
Note( note: Union[int, str] = 'A3', phoneme: List[str] = ['m'], text: Optional[str] = None, start: Optional[int] = 0, start_bpm: Union[int, float, NoneType] = 120, start_count: Union[str, float, int] = 0, start_time_sig: str = '4/4', velocity: int = 127, volume_level_per_segment: int = 3, include_volume_level: bool = True, attack: Union[float, int] = 0, decay: Union[float, int] = 0, sustain: Union[float, int] = 1, release: Union[float, int] = 0, duration: Union[int, float, NoneType] = None, duration_bpm: Union[int, float, NoneType] = 120, duration_count: Union[str, float, int] = 1, duration_time_sig: str = '4/4', segment_duration: Optional[int] = None, segment_bpm: Optional[float] = 120, segment_count: Union[str, float, int, NoneType] = 0.125, segment_time_sig: Optional[str] = '4/4', randomize_phoneme: Optional[str] = None, randomize_velocity: Optional[Tuple[int, int]] = None, randomize_octave: Optional[List[int]] = [], randomize_segments: Optional[List[str]] = [], randomize_start: Optional[Tuple[int, int]] = None, **segment_options)
23 def __init__( 24 self, 25 note: Union[int, str] = "A3", 26 phoneme: List[str] = ["m"], 27 text: Optional[str] = None, 28 # start position 29 start: Optional[int] = 0, 30 start_bpm: Optional[Union[float, int]] = 120, 31 start_count: Union[str, float, int] = 0, 32 start_time_sig: str = "4/4", 33 # envelope 34 velocity: int = 127, 35 volume_level_per_segment: int = 3, 36 include_volume_level: bool = True, 37 attack: Union[float, int] = 0, 38 decay: Union[float, int] = 0, 39 sustain: Union[float, int] = 1, 40 release: Union[float, int] = 0, 41 # length 42 duration: Optional[Union[float, int]] = None, 43 duration_bpm: Optional[Union[float, int]] = 120, 44 duration_count: Union[str, float, int] = 1, 45 duration_time_sig: str = "4/4", 46 # segmentation 47 segment_duration: Optional[int] = None, 48 segment_bpm: Optional[float] = 120, 49 segment_count: Optional[Union[str, float, int]] = 1.0 / 8.0, 50 segment_time_sig: Optional[str] = "4/4", 51 # randomization 52 randomize_phoneme: Optional[str] = None, 53 randomize_velocity: Optional[Tuple[int, int]] = None, 54 randomize_octave: Optional[List[int]] = [], 55 randomize_segments: Optional[List[str]] = [], 56 randomize_start: Optional[Tuple[int, int]] = None, 57 **segment_options, 58 ): 59 f""" 60 Generate say text for a collection of phonemes with adsr, pitch modulation, and more. 61 Args: 62 note: The note to play, eg "A3" 63 phoneme: A valid combination of Phonemes documented in [Apple's Speech Synthesis guide](https://developer.apple.com/library/archive/documentation/UserExperience/Conceptual/SpeechSynthesisProgrammingGuide/Phonemes/Phonemes.html#//apple_ref/doc/uid/TP40004365-CH9-SW1). 64 text: The text to "sing". If provided, this will override phoneme. 65 start: The number of milliseconds of silence to add to the beginning of the track. 66 start_bpm: A BPM to use when calculating the number of milliseconds of silence to add to the beginning of the track. 67 start_count: A count to use when calculating the number of milliseconds of silence to add to the beginning of the track. 68 start_time_sig: A time signature to use when calculating the number of milliseconds of silence to add to the beginning of the track. 69 velocity: The midi velocity value to use for this note (0-127). 70 volume_level_per_segment: The number of segments after which volume settings will be rendered (eg: "3" would mean one segment would have volume settings and then the next two would not, etc.)) 71 include_volume_level: Whether or not to the render the volume settings for this note. 72 Over-rendering these settings can lead to audio drop-outs. 73 attack: A value between 0 and 1 representing the ratio of the note's total length during which the note will increase to it's amplitude. 74 A lower number is a faster attack while a larger number is a slow attack. (see `midi_utils.ADSR`). 75 decay: A value between 0 and 1 representing the ratio of note's total length during which the note will decrease in amplitude from the max amplitude to the sustain level. (see `midi_utils.ADSR`). 76 sustain: A value between 0 and 1 representing the relative volume level of the sustain phase (0 is the min volume_range, 1 is the max). 77 release: A value between 0 and 1 representing the ratio of the note's total length during which the note will decrease in amplitude from the sustain level to zero. 78 duration: The duration of this note in number of milliseconds. 79 duration_bpm: A BPM to use when calculating the note's duration. 80 duration_count: A count to use when calculating the note's duration. 81 duration_time_sig: A time signature to use when calculating the note's duration. 82 segment_duration: The duration of each `Segment` of this note in number of milliseconds. 83 segment_bpm: A BPM to use when calculating the duration of each `Segment` in this note. 84 segment_count: A count to use when calculating duration of each `Segment` of this note 85 segment_time_sig: A time signature to use when calculating the duration of each `Segment` in this note. 86 randomize_phoneme: Randomize the phoneme for every note. " 87 If "all" is passed, all valid phonemes will be used. 88 Alternatively pass a list of phonemes (eg 'm,l,n') or a voice and style, eg: Fred:drone. 89 Valid voices include: {', '.join(SAY_TUNED_VOICES)}. 90 Valid styles include: {', '.join(SAY_PHONEME_CLASSES)}. 91 randomize_velocity: Randomize a note's velocity by supplying a min and max midi velocity (eg: -rv [40, 120]) 92 randomize_octave: A list of octaves to randomly vary between. 93 You can weight certain octaves by providing them multiple times 94 (eg: [0,0-1,-1,2] would prefer the root octave first, one octave down second, and two octaves up third). 95 randomize_segments: Randomize the 'phoneme', 'octave', and/or 'velocity' according to each respective randomization setting. 96 randomize_start: Randomize the number of milliseconds to silence to add before the say text. 97 The first number passed in is the minimum of the range, the second is the max (eg: [4000, 12000] would set a range for four to twelve seconds). 98 **segment_options: Additional options to pass to each `Segment`. 99 """ 100 101 self.segment_options = segment_options 102 root = segment_options.pop("root", None) 103 104 if root or note: 105 self.note = note_to_midi(root or note) # root == note 106 self.name = midi_to_note(self.note) 107 else: 108 self.note = 0 109 self.name = "silence" 110 111 # phoneme 112 self.phoneme = phoneme 113 if isinstance(self.phoneme, str): 114 self.phoneme = [phoneme] 115 116 # text / lyrics 117 self.lyrics = None 118 if text: 119 self.lyrics = Lyrics(text) 120 121 # start position 122 self.start = start 123 if not self.start: 124 self.start = bpm_to_time(start_bpm, start_count, start_time_sig) 125 if randomize_start: 126 self.start = random.choice( 127 range(self.randomize_start[0], self.randomize_start[1] + 1) 128 ) 129 130 # duration 131 self.duration = duration 132 if not self.duration: 133 self.duration = bpm_to_time( 134 duration_bpm, duration_count, duration_time_sig 135 ) 136 137 # velocity 138 self.velocity = velocity 139 self.volume_level_per_segment = volume_level_per_segment 140 self.include_volume_level = include_volume_level 141 142 # segmentation 143 self.segment_duration = segment_duration 144 if not self.segment_duration: 145 self.segment_duration = bpm_to_time( 146 segment_bpm, segment_count, segment_time_sig 147 ) 148 self.segment_duration = min( 149 SAY_SEGMENT_MAX_DURATION, self.segment_duration 150 ) 151 self.segment_count = int(self.duration / self.segment_duration) + 1 152 153 # adsr 154 self.adsr = ADSR( 155 attack, decay, sustain, release, samples=self.segment_count 156 ) 157 158 # randomization 159 self.randomize_phoneme = randomize_phoneme 160 self.randomize_velocity = randomize_velocity 161 self.randomize_octave = randomize_octave 162 self.randomize_segments = randomize_segments 163 self.randomize_start = randomize_start
def
to_text(self) -> str:
307 def to_text(self) -> str: 308 """ 309 Render this Note as Apple SpeechSynthesis DSL text. 310 """ 311 return "\n".join([s.to_text() for s in self.segments])
Render this Note as Apple SpeechSynthesis DSL text.