initial commit and version 1.0

2025-04-21 15:14:03 +02:00
commit ae6b2bbf44
82 changed files with 10782 additions and 0 deletions
--- a/moviepy/audio/AudioClip.py
+++ b/moviepy/audio/AudioClip.py
@@ -0,0 +1,441 @@
+"""Implements AudioClip (base class for audio clips) and its main subclasses:
+
+- Audio clips: AudioClip, AudioFileClip, AudioArrayClip
+- Composition: CompositeAudioClip
+"""
+
+import numbers
+import os
+
+import numpy as np
+import proglog
+
+from moviepy.audio.io.ffmpeg_audiowriter import ffmpeg_audiowrite
+from moviepy.audio.io.ffplay_audiopreviewer import ffplay_audiopreview
+from moviepy.Clip import Clip
+from moviepy.decorators import convert_path_to_string, requires_duration
+from moviepy.tools import extensions_dict
+
+
+class AudioClip(Clip):
+    """Base class for audio clips.
+
+    See ``AudioFileClip`` and ``CompositeAudioClip`` for usable classes.
+
+    An AudioClip is a Clip with a ``frame_function``  attribute of
+    the form `` t -> [ f_t ]`` for mono sound and
+    ``t-> [ f1_t, f2_t ]`` for stereo sound (the arrays are Numpy arrays).
+    The `f_t` are floats between -1 and 1. These bounds can be
+    trespassed without problems (the program will put the
+    sound back into the bounds at conversion time, without much impact).
+
+    Parameters
+    ----------
+
+    frame_function
+      A function `t-> frame at time t`. The frame does not mean much
+      for a sound, it is just a float. What 'makes' the sound are
+      the variations of that float in the time.
+
+    duration
+      Duration of the clip (in seconds). Some clips are infinite, in
+      this case their duration will be ``None``.
+
+    nchannels
+      Number of channels (one or two for mono or stereo).
+
+    Examples
+    --------
+
+    .. code:: python
+
+        # Plays the note A in mono (a sine wave of frequency 440 Hz)
+        import numpy as np
+        frame_function = lambda t: np.sin(440 * 2 * np.pi * t)
+        clip = AudioClip(frame_function, duration=5, fps=44100)
+        clip.preview()
+
+        # Plays the note A in stereo (two sine waves of frequencies 440 and 880 Hz)
+        frame_function = lambda t: np.array([
+            np.sin(440 * 2 * np.pi * t),
+            np.sin(880 * 2 * np.pi * t)
+        ]).T.copy(order="C")
+        clip = AudioClip(frame_function, duration=3, fps=44100)
+        clip.preview()
+
+    """
+
+    def __init__(self, frame_function=None, duration=None, fps=None):
+        super().__init__()
+
+        if fps is not None:
+            self.fps = fps
+
+        if frame_function is not None:
+            self.frame_function = frame_function
+            frame0 = self.get_frame(0)
+            if hasattr(frame0, "__iter__"):
+                self.nchannels = len(list(frame0))
+            else:
+                self.nchannels = 1
+        if duration is not None:
+            self.duration = duration
+            self.end = duration
+
+    @requires_duration
+    def iter_chunks(
+        self,
+        chunksize=None,
+        chunk_duration=None,
+        fps=None,
+        quantize=False,
+        nbytes=2,
+        logger=None,
+    ):
+        """Iterator that returns the whole sound array of the clip by chunks"""
+        if fps is None:
+            fps = self.fps
+        logger = proglog.default_bar_logger(logger)
+        if chunk_duration is not None:
+            chunksize = int(chunk_duration * fps)
+
+        total_size = int(fps * self.duration)
+
+        nchunks = total_size // chunksize + 1
+
+        positions = np.linspace(0, total_size, nchunks + 1, endpoint=True, dtype=int)
+
+        for i in logger.iter_bar(chunk=list(range(nchunks))):
+            size = positions[i + 1] - positions[i]
+            assert size <= chunksize
+            timings = (1.0 / fps) * np.arange(positions[i], positions[i + 1])
+            yield self.to_soundarray(
+                timings, nbytes=nbytes, quantize=quantize, fps=fps, buffersize=chunksize
+            )
+
+    @requires_duration
+    def to_soundarray(
+        self, tt=None, fps=None, quantize=False, nbytes=2, buffersize=50000
+    ):
+        """
+        Transforms the sound into an array that can be played by pygame
+        or written in a wav file. See ``AudioClip.preview``.
+
+        Parameters
+        ----------
+
+        fps
+          Frame rate of the sound for the conversion.
+          44100 for top quality.
+
+        nbytes
+          Number of bytes to encode the sound: 1 for 8bit sound,
+          2 for 16bit, 4 for 32bit sound.
+
+        """
+        if tt is None:
+            if fps is None:
+                fps = self.fps
+
+            max_duration = 1 * buffersize / fps
+            if self.duration > max_duration:
+                stacker = np.vstack if self.nchannels == 2 else np.hstack
+                return stacker(
+                    tuple(
+                        self.iter_chunks(
+                            fps=fps, quantize=quantize, nbytes=2, chunksize=buffersize
+                        )
+                    )
+                )
+            else:
+                tt = np.arange(0, self.duration, 1.0 / fps)
+        """
+        elif len(tt)> 1.5*buffersize:
+            nchunks = int(len(tt)/buffersize+1)
+            tt_chunks = np.array_split(tt, nchunks)
+            return stacker([self.to_soundarray(tt=ttc, buffersize=buffersize, fps=fps,
+                                        quantize=quantize, nbytes=nbytes)
+                              for ttc in tt_chunks])
+        """
+        snd_array = self.get_frame(tt)
+
+        if quantize:
+            snd_array = np.maximum(-0.99, np.minimum(0.99, snd_array))
+            inttype = {1: "int8", 2: "int16", 4: "int32"}[nbytes]
+            snd_array = (2 ** (8 * nbytes - 1) * snd_array).astype(inttype)
+
+        return snd_array
+
+    def max_volume(self, stereo=False, chunksize=50000, logger=None):
+        """Returns the maximum volume level of the clip."""
+        # max volume separated by channels if ``stereo`` and not mono
+        stereo = stereo and self.nchannels > 1
+
+        # zero for each channel
+        maxi = np.zeros(self.nchannels)
+        for chunk in self.iter_chunks(chunksize=chunksize, logger=logger):
+            maxi = np.maximum(maxi, abs(chunk).max(axis=0))
+
+        # if mono returns float, otherwise array of volumes by channel
+        return maxi if stereo else maxi[0]
+
+    @requires_duration
+    @convert_path_to_string("filename")
+    def write_audiofile(
+        self,
+        filename,
+        fps=None,
+        nbytes=2,
+        buffersize=2000,
+        codec=None,
+        bitrate=None,
+        ffmpeg_params=None,
+        write_logfile=False,
+        logger="bar",
+    ):
+        """Writes an audio file from the AudioClip.
+
+
+        Parameters
+        ----------
+
+        filename
+          Name of the output file, as a string or a path-like object.
+
+        fps
+          Frames per second. If not set, it will try default to self.fps if
+          already set, otherwise it will default to 44100.
+
+        nbytes
+          Sample width (set to 2 for 16-bit sound, 4 for 32-bit sound)
+
+        buffersize
+          The sound is not generated all at once, but rather made by bunches
+          of frames (chunks). ``buffersize`` is the size of such a chunk.
+          Try varying it if you meet audio problems (but you shouldn't
+          have to). Default to 2000
+
+        codec
+          Which audio codec should be used. If None provided, the codec is
+          determined based on the extension of the filename. Choose
+          'pcm_s16le' for 16-bit wav and 'pcm_s32le' for 32-bit wav.
+
+        bitrate
+          Audio bitrate, given as a string like '50k', '500k', '3000k'.
+          Will determine the size and quality of the output file.
+          Note that it mainly an indicative goal, the bitrate won't
+          necessarily be the this in the output file.
+
+        ffmpeg_params
+          Any additional parameters you would like to pass, as a list
+          of terms, like ['-option1', 'value1', '-option2', 'value2']
+
+        write_logfile
+          If true, produces a detailed logfile named filename + '.log'
+          when writing the file
+
+        logger
+          Either ``"bar"`` for progress bar or ``None`` or any Proglog logger.
+
+        """
+
+        # print(filename)
+
+
+        if not fps:
+            if not self.fps:
+                fps = 44100
+            else:
+                fps = self.fps
+
+        if codec is None:
+            name, ext = os.path.splitext(os.path.basename(filename))
+            try:
+                codec = extensions_dict[ext[1:]]["codec"][0]
+            except KeyError:
+                raise ValueError(
+                    "MoviePy couldn't find the codec associated "
+                    "with the filename. Provide the 'codec' "
+                    "parameter in write_audiofile."
+                )
+
+        return ffmpeg_audiowrite(
+            self,
+            filename,
+            fps,
+            nbytes,
+            buffersize,
+            codec=codec,
+            bitrate=bitrate,
+            write_logfile=write_logfile,
+            ffmpeg_params=ffmpeg_params,
+            logger=logger,
+        )
+
+    @requires_duration
+    def audiopreview(
+        self, fps=None, buffersize=2000, nbytes=2, audio_flag=None, video_flag=None
+    ):
+        """
+        Preview an AudioClip using ffplay
+
+        Parameters
+        ----------
+
+        fps
+            Frame rate of the sound. 44100 gives top quality, but may cause
+            problems if your computer is not fast enough and your clip is
+            complicated. If the sound jumps during the preview, lower it
+            (11025 is still fine, 5000 is tolerable).
+
+        buffersize
+            The sound is not generated all at once, but rather made by bunches
+            of frames (chunks). ``buffersize`` is the size of such a chunk.
+            Try varying it if you meet audio problems (but you shouldn't
+            have to).
+
+        nbytes:
+            Number of bytes to encode the sound: 1 for 8bit sound, 2 for
+            16bit, 4 for 32bit sound. 2 bytes is fine.
+
+        audio_flag, video_flag:
+            Instances of class threading events that are used to synchronize
+            video and audio during ``VideoClip.preview()``.
+        """
+        ffplay_audiopreview(
+            clip=self,
+            fps=fps,
+            buffersize=buffersize,
+            nbytes=nbytes,
+            audio_flag=audio_flag,
+            video_flag=video_flag,
+        )
+
+    def __add__(self, other):
+        if isinstance(other, AudioClip):
+            return concatenate_audioclips([self, other])
+        return super(AudioClip, self).__add__(other)
+
+
+class AudioArrayClip(AudioClip):
+    """
+
+    An audio clip made from a sound array.
+
+    Parameters
+    ----------
+
+    array
+      A Numpy array representing the sound, of size Nx1 for mono,
+      Nx2 for stereo.
+
+    fps
+      Frames per second : speed at which the sound is supposed to be
+      played.
+
+    """
+
+    def __init__(self, array, fps):
+        Clip.__init__(self)
+        self.array = array
+        self.fps = fps
+        self.duration = 1.0 * len(array) / fps
+
+        def frame_function(t):
+            """Complicated, but must be able to handle the case where t
+            is a list of the form sin(t).
+            """
+            if isinstance(t, np.ndarray):
+                array_inds = np.round(self.fps * t).astype(int)
+                in_array = (array_inds >= 0) & (array_inds < len(self.array))
+                result = np.zeros((len(t), 2))
+                result[in_array] = self.array[array_inds[in_array]]
+                return result
+            else:
+                i = int(self.fps * t)
+                if i < 0 or i >= len(self.array):
+                    return 0 * self.array[0]
+                else:
+                    return self.array[i]
+
+        self.frame_function = frame_function
+        self.nchannels = len(list(self.get_frame(0)))
+
+
+class CompositeAudioClip(AudioClip):
+    """Clip made by composing several AudioClips.
+
+    An audio clip made by putting together several audio clips.
+
+    Parameters
+    ----------
+
+    clips
+      List of audio clips, which may start playing at different times or
+      together, depends on their ``start`` attributes. If all have their
+      ``duration`` attribute set, the duration of the composite clip is
+      computed automatically.
+    """
+
+    def __init__(self, clips):
+        self.clips = clips
+        self.nchannels = max(clip.nchannels for clip in self.clips)
+
+        # self.duration is set at AudioClip
+        duration = None
+        for end in self.ends:
+            if end is None:
+                break
+            duration = max(end, duration or 0)
+
+        # self.fps is set at AudioClip
+        fps = None
+        for clip in self.clips:
+            if hasattr(clip, "fps") and isinstance(clip.fps, numbers.Number):
+                fps = max(clip.fps, fps or 0)
+
+        super().__init__(duration=duration, fps=fps)
+
+    @property
+    def starts(self):
+        """Returns starting times for all clips in the composition."""
+        return (clip.start for clip in self.clips)
+
+    @property
+    def ends(self):
+        """Returns ending times for all clips in the composition."""
+        return (clip.end for clip in self.clips)
+
+    def frame_function(self, t):
+        """Renders a frame for the composition for the time ``t``."""
+        played_parts = [clip.is_playing(t) for clip in self.clips]
+
+        sounds = [
+            clip.get_frame(t - clip.start) * np.array([part]).T
+            for clip, part in zip(self.clips, played_parts)
+            if (part is not False)
+        ]
+
+        if isinstance(t, np.ndarray):
+            zero = np.zeros((len(t), self.nchannels))
+        else:
+            zero = np.zeros(self.nchannels)
+
+        return zero + sum(sounds)
+
+
+def concatenate_audioclips(clips):
+    """Concatenates one AudioClip after another, in the order that are passed
+    to ``clips`` parameter.
+
+    Parameters
+    ----------
+
+    clips
+      List of audio clips, which will be played one after other.
+    """
+    # start, end/start2, end2/start3... end
+    starts_end = np.cumsum([0, *[clip.duration for clip in clips]])
+    newclips = [clip.with_start(t) for clip, t in zip(clips, starts_end[:-1])]
+
+    return CompositeAudioClip(newclips).with_duration(starts_end[-1])