generated from thinkode/modelRepository
initial commit and version 1.0
This commit is contained in:
441
moviepy/audio/AudioClip.py
Normal file
441
moviepy/audio/AudioClip.py
Normal file
@@ -0,0 +1,441 @@
|
||||
"""Implements AudioClip (base class for audio clips) and its main subclasses:
|
||||
|
||||
- Audio clips: AudioClip, AudioFileClip, AudioArrayClip
|
||||
- Composition: CompositeAudioClip
|
||||
"""
|
||||
|
||||
import numbers
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
import proglog
|
||||
|
||||
from moviepy.audio.io.ffmpeg_audiowriter import ffmpeg_audiowrite
|
||||
from moviepy.audio.io.ffplay_audiopreviewer import ffplay_audiopreview
|
||||
from moviepy.Clip import Clip
|
||||
from moviepy.decorators import convert_path_to_string, requires_duration
|
||||
from moviepy.tools import extensions_dict
|
||||
|
||||
|
||||
class AudioClip(Clip):
|
||||
"""Base class for audio clips.
|
||||
|
||||
See ``AudioFileClip`` and ``CompositeAudioClip`` for usable classes.
|
||||
|
||||
An AudioClip is a Clip with a ``frame_function`` attribute of
|
||||
the form `` t -> [ f_t ]`` for mono sound and
|
||||
``t-> [ f1_t, f2_t ]`` for stereo sound (the arrays are Numpy arrays).
|
||||
The `f_t` are floats between -1 and 1. These bounds can be
|
||||
trespassed without problems (the program will put the
|
||||
sound back into the bounds at conversion time, without much impact).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
frame_function
|
||||
A function `t-> frame at time t`. The frame does not mean much
|
||||
for a sound, it is just a float. What 'makes' the sound are
|
||||
the variations of that float in the time.
|
||||
|
||||
duration
|
||||
Duration of the clip (in seconds). Some clips are infinite, in
|
||||
this case their duration will be ``None``.
|
||||
|
||||
nchannels
|
||||
Number of channels (one or two for mono or stereo).
|
||||
|
||||
Examples
|
||||
--------
|
||||
|
||||
.. code:: python
|
||||
|
||||
# Plays the note A in mono (a sine wave of frequency 440 Hz)
|
||||
import numpy as np
|
||||
frame_function = lambda t: np.sin(440 * 2 * np.pi * t)
|
||||
clip = AudioClip(frame_function, duration=5, fps=44100)
|
||||
clip.preview()
|
||||
|
||||
# Plays the note A in stereo (two sine waves of frequencies 440 and 880 Hz)
|
||||
frame_function = lambda t: np.array([
|
||||
np.sin(440 * 2 * np.pi * t),
|
||||
np.sin(880 * 2 * np.pi * t)
|
||||
]).T.copy(order="C")
|
||||
clip = AudioClip(frame_function, duration=3, fps=44100)
|
||||
clip.preview()
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, frame_function=None, duration=None, fps=None):
|
||||
super().__init__()
|
||||
|
||||
if fps is not None:
|
||||
self.fps = fps
|
||||
|
||||
if frame_function is not None:
|
||||
self.frame_function = frame_function
|
||||
frame0 = self.get_frame(0)
|
||||
if hasattr(frame0, "__iter__"):
|
||||
self.nchannels = len(list(frame0))
|
||||
else:
|
||||
self.nchannels = 1
|
||||
if duration is not None:
|
||||
self.duration = duration
|
||||
self.end = duration
|
||||
|
||||
@requires_duration
|
||||
def iter_chunks(
|
||||
self,
|
||||
chunksize=None,
|
||||
chunk_duration=None,
|
||||
fps=None,
|
||||
quantize=False,
|
||||
nbytes=2,
|
||||
logger=None,
|
||||
):
|
||||
"""Iterator that returns the whole sound array of the clip by chunks"""
|
||||
if fps is None:
|
||||
fps = self.fps
|
||||
logger = proglog.default_bar_logger(logger)
|
||||
if chunk_duration is not None:
|
||||
chunksize = int(chunk_duration * fps)
|
||||
|
||||
total_size = int(fps * self.duration)
|
||||
|
||||
nchunks = total_size // chunksize + 1
|
||||
|
||||
positions = np.linspace(0, total_size, nchunks + 1, endpoint=True, dtype=int)
|
||||
|
||||
for i in logger.iter_bar(chunk=list(range(nchunks))):
|
||||
size = positions[i + 1] - positions[i]
|
||||
assert size <= chunksize
|
||||
timings = (1.0 / fps) * np.arange(positions[i], positions[i + 1])
|
||||
yield self.to_soundarray(
|
||||
timings, nbytes=nbytes, quantize=quantize, fps=fps, buffersize=chunksize
|
||||
)
|
||||
|
||||
@requires_duration
|
||||
def to_soundarray(
|
||||
self, tt=None, fps=None, quantize=False, nbytes=2, buffersize=50000
|
||||
):
|
||||
"""
|
||||
Transforms the sound into an array that can be played by pygame
|
||||
or written in a wav file. See ``AudioClip.preview``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
fps
|
||||
Frame rate of the sound for the conversion.
|
||||
44100 for top quality.
|
||||
|
||||
nbytes
|
||||
Number of bytes to encode the sound: 1 for 8bit sound,
|
||||
2 for 16bit, 4 for 32bit sound.
|
||||
|
||||
"""
|
||||
if tt is None:
|
||||
if fps is None:
|
||||
fps = self.fps
|
||||
|
||||
max_duration = 1 * buffersize / fps
|
||||
if self.duration > max_duration:
|
||||
stacker = np.vstack if self.nchannels == 2 else np.hstack
|
||||
return stacker(
|
||||
tuple(
|
||||
self.iter_chunks(
|
||||
fps=fps, quantize=quantize, nbytes=2, chunksize=buffersize
|
||||
)
|
||||
)
|
||||
)
|
||||
else:
|
||||
tt = np.arange(0, self.duration, 1.0 / fps)
|
||||
"""
|
||||
elif len(tt)> 1.5*buffersize:
|
||||
nchunks = int(len(tt)/buffersize+1)
|
||||
tt_chunks = np.array_split(tt, nchunks)
|
||||
return stacker([self.to_soundarray(tt=ttc, buffersize=buffersize, fps=fps,
|
||||
quantize=quantize, nbytes=nbytes)
|
||||
for ttc in tt_chunks])
|
||||
"""
|
||||
snd_array = self.get_frame(tt)
|
||||
|
||||
if quantize:
|
||||
snd_array = np.maximum(-0.99, np.minimum(0.99, snd_array))
|
||||
inttype = {1: "int8", 2: "int16", 4: "int32"}[nbytes]
|
||||
snd_array = (2 ** (8 * nbytes - 1) * snd_array).astype(inttype)
|
||||
|
||||
return snd_array
|
||||
|
||||
def max_volume(self, stereo=False, chunksize=50000, logger=None):
|
||||
"""Returns the maximum volume level of the clip."""
|
||||
# max volume separated by channels if ``stereo`` and not mono
|
||||
stereo = stereo and self.nchannels > 1
|
||||
|
||||
# zero for each channel
|
||||
maxi = np.zeros(self.nchannels)
|
||||
for chunk in self.iter_chunks(chunksize=chunksize, logger=logger):
|
||||
maxi = np.maximum(maxi, abs(chunk).max(axis=0))
|
||||
|
||||
# if mono returns float, otherwise array of volumes by channel
|
||||
return maxi if stereo else maxi[0]
|
||||
|
||||
@requires_duration
|
||||
@convert_path_to_string("filename")
|
||||
def write_audiofile(
|
||||
self,
|
||||
filename,
|
||||
fps=None,
|
||||
nbytes=2,
|
||||
buffersize=2000,
|
||||
codec=None,
|
||||
bitrate=None,
|
||||
ffmpeg_params=None,
|
||||
write_logfile=False,
|
||||
logger="bar",
|
||||
):
|
||||
"""Writes an audio file from the AudioClip.
|
||||
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
filename
|
||||
Name of the output file, as a string or a path-like object.
|
||||
|
||||
fps
|
||||
Frames per second. If not set, it will try default to self.fps if
|
||||
already set, otherwise it will default to 44100.
|
||||
|
||||
nbytes
|
||||
Sample width (set to 2 for 16-bit sound, 4 for 32-bit sound)
|
||||
|
||||
buffersize
|
||||
The sound is not generated all at once, but rather made by bunches
|
||||
of frames (chunks). ``buffersize`` is the size of such a chunk.
|
||||
Try varying it if you meet audio problems (but you shouldn't
|
||||
have to). Default to 2000
|
||||
|
||||
codec
|
||||
Which audio codec should be used. If None provided, the codec is
|
||||
determined based on the extension of the filename. Choose
|
||||
'pcm_s16le' for 16-bit wav and 'pcm_s32le' for 32-bit wav.
|
||||
|
||||
bitrate
|
||||
Audio bitrate, given as a string like '50k', '500k', '3000k'.
|
||||
Will determine the size and quality of the output file.
|
||||
Note that it mainly an indicative goal, the bitrate won't
|
||||
necessarily be the this in the output file.
|
||||
|
||||
ffmpeg_params
|
||||
Any additional parameters you would like to pass, as a list
|
||||
of terms, like ['-option1', 'value1', '-option2', 'value2']
|
||||
|
||||
write_logfile
|
||||
If true, produces a detailed logfile named filename + '.log'
|
||||
when writing the file
|
||||
|
||||
logger
|
||||
Either ``"bar"`` for progress bar or ``None`` or any Proglog logger.
|
||||
|
||||
"""
|
||||
|
||||
# print(filename)
|
||||
|
||||
|
||||
if not fps:
|
||||
if not self.fps:
|
||||
fps = 44100
|
||||
else:
|
||||
fps = self.fps
|
||||
|
||||
if codec is None:
|
||||
name, ext = os.path.splitext(os.path.basename(filename))
|
||||
try:
|
||||
codec = extensions_dict[ext[1:]]["codec"][0]
|
||||
except KeyError:
|
||||
raise ValueError(
|
||||
"MoviePy couldn't find the codec associated "
|
||||
"with the filename. Provide the 'codec' "
|
||||
"parameter in write_audiofile."
|
||||
)
|
||||
|
||||
return ffmpeg_audiowrite(
|
||||
self,
|
||||
filename,
|
||||
fps,
|
||||
nbytes,
|
||||
buffersize,
|
||||
codec=codec,
|
||||
bitrate=bitrate,
|
||||
write_logfile=write_logfile,
|
||||
ffmpeg_params=ffmpeg_params,
|
||||
logger=logger,
|
||||
)
|
||||
|
||||
@requires_duration
|
||||
def audiopreview(
|
||||
self, fps=None, buffersize=2000, nbytes=2, audio_flag=None, video_flag=None
|
||||
):
|
||||
"""
|
||||
Preview an AudioClip using ffplay
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
fps
|
||||
Frame rate of the sound. 44100 gives top quality, but may cause
|
||||
problems if your computer is not fast enough and your clip is
|
||||
complicated. If the sound jumps during the preview, lower it
|
||||
(11025 is still fine, 5000 is tolerable).
|
||||
|
||||
buffersize
|
||||
The sound is not generated all at once, but rather made by bunches
|
||||
of frames (chunks). ``buffersize`` is the size of such a chunk.
|
||||
Try varying it if you meet audio problems (but you shouldn't
|
||||
have to).
|
||||
|
||||
nbytes:
|
||||
Number of bytes to encode the sound: 1 for 8bit sound, 2 for
|
||||
16bit, 4 for 32bit sound. 2 bytes is fine.
|
||||
|
||||
audio_flag, video_flag:
|
||||
Instances of class threading events that are used to synchronize
|
||||
video and audio during ``VideoClip.preview()``.
|
||||
"""
|
||||
ffplay_audiopreview(
|
||||
clip=self,
|
||||
fps=fps,
|
||||
buffersize=buffersize,
|
||||
nbytes=nbytes,
|
||||
audio_flag=audio_flag,
|
||||
video_flag=video_flag,
|
||||
)
|
||||
|
||||
def __add__(self, other):
|
||||
if isinstance(other, AudioClip):
|
||||
return concatenate_audioclips([self, other])
|
||||
return super(AudioClip, self).__add__(other)
|
||||
|
||||
|
||||
class AudioArrayClip(AudioClip):
|
||||
"""
|
||||
|
||||
An audio clip made from a sound array.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
array
|
||||
A Numpy array representing the sound, of size Nx1 for mono,
|
||||
Nx2 for stereo.
|
||||
|
||||
fps
|
||||
Frames per second : speed at which the sound is supposed to be
|
||||
played.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, array, fps):
|
||||
Clip.__init__(self)
|
||||
self.array = array
|
||||
self.fps = fps
|
||||
self.duration = 1.0 * len(array) / fps
|
||||
|
||||
def frame_function(t):
|
||||
"""Complicated, but must be able to handle the case where t
|
||||
is a list of the form sin(t).
|
||||
"""
|
||||
if isinstance(t, np.ndarray):
|
||||
array_inds = np.round(self.fps * t).astype(int)
|
||||
in_array = (array_inds >= 0) & (array_inds < len(self.array))
|
||||
result = np.zeros((len(t), 2))
|
||||
result[in_array] = self.array[array_inds[in_array]]
|
||||
return result
|
||||
else:
|
||||
i = int(self.fps * t)
|
||||
if i < 0 or i >= len(self.array):
|
||||
return 0 * self.array[0]
|
||||
else:
|
||||
return self.array[i]
|
||||
|
||||
self.frame_function = frame_function
|
||||
self.nchannels = len(list(self.get_frame(0)))
|
||||
|
||||
|
||||
class CompositeAudioClip(AudioClip):
|
||||
"""Clip made by composing several AudioClips.
|
||||
|
||||
An audio clip made by putting together several audio clips.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
clips
|
||||
List of audio clips, which may start playing at different times or
|
||||
together, depends on their ``start`` attributes. If all have their
|
||||
``duration`` attribute set, the duration of the composite clip is
|
||||
computed automatically.
|
||||
"""
|
||||
|
||||
def __init__(self, clips):
|
||||
self.clips = clips
|
||||
self.nchannels = max(clip.nchannels for clip in self.clips)
|
||||
|
||||
# self.duration is set at AudioClip
|
||||
duration = None
|
||||
for end in self.ends:
|
||||
if end is None:
|
||||
break
|
||||
duration = max(end, duration or 0)
|
||||
|
||||
# self.fps is set at AudioClip
|
||||
fps = None
|
||||
for clip in self.clips:
|
||||
if hasattr(clip, "fps") and isinstance(clip.fps, numbers.Number):
|
||||
fps = max(clip.fps, fps or 0)
|
||||
|
||||
super().__init__(duration=duration, fps=fps)
|
||||
|
||||
@property
|
||||
def starts(self):
|
||||
"""Returns starting times for all clips in the composition."""
|
||||
return (clip.start for clip in self.clips)
|
||||
|
||||
@property
|
||||
def ends(self):
|
||||
"""Returns ending times for all clips in the composition."""
|
||||
return (clip.end for clip in self.clips)
|
||||
|
||||
def frame_function(self, t):
|
||||
"""Renders a frame for the composition for the time ``t``."""
|
||||
played_parts = [clip.is_playing(t) for clip in self.clips]
|
||||
|
||||
sounds = [
|
||||
clip.get_frame(t - clip.start) * np.array([part]).T
|
||||
for clip, part in zip(self.clips, played_parts)
|
||||
if (part is not False)
|
||||
]
|
||||
|
||||
if isinstance(t, np.ndarray):
|
||||
zero = np.zeros((len(t), self.nchannels))
|
||||
else:
|
||||
zero = np.zeros(self.nchannels)
|
||||
|
||||
return zero + sum(sounds)
|
||||
|
||||
|
||||
def concatenate_audioclips(clips):
|
||||
"""Concatenates one AudioClip after another, in the order that are passed
|
||||
to ``clips`` parameter.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
||||
clips
|
||||
List of audio clips, which will be played one after other.
|
||||
"""
|
||||
# start, end/start2, end2/start3... end
|
||||
starts_end = np.cumsum([0, *[clip.duration for clip in clips]])
|
||||
newclips = [clip.with_start(t) for clip, t in zip(clips, starts_end[:-1])]
|
||||
|
||||
return CompositeAudioClip(newclips).with_duration(starts_end[-1])
|
||||
Reference in New Issue
Block a user