add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,69 @@
# MUST import the core before anything else in order to initialize the underlying
# library that is being wrapped.
from av._core import time_base, library_versions, ffmpeg_version_info
# Capture logging (by importing it).
from av import logging
# For convenience, import all common attributes.
from av.about import __version__
from av.audio.codeccontext import AudioCodecContext
from av.audio.fifo import AudioFifo
from av.audio.format import AudioFormat
from av.audio.frame import AudioFrame
from av.audio.layout import AudioLayout
from av.audio.resampler import AudioResampler
from av.audio.stream import AudioStream
from av.bitstream import BitStreamFilterContext, bitstream_filters_available
from av.codec.codec import Codec, codecs_available
from av.codec.context import CodecContext
from av.codec.hwaccel import HWConfig
from av.container import open
from av.format import ContainerFormat, formats_available
from av.packet import Packet
from av.error import * # noqa: F403; This is limited to exception types.
from av.video.codeccontext import VideoCodecContext
from av.video.format import VideoFormat
from av.video.frame import VideoFrame
from av.video.stream import VideoStream
__all__ = (
"__version__",
"time_base",
"ffmpeg_version_info",
"library_versions",
"AudioCodecContext",
"AudioFifo",
"AudioFormat",
"AudioFrame",
"AudioLayout",
"AudioResampler",
"AudioStream",
"BitStreamFilterContext",
"bitstream_filters_available",
"Codec",
"codecs_available",
"CodecContext",
"open",
"ContainerFormat",
"formats_available",
"Packet",
"VideoCodecContext",
"VideoFormat",
"VideoFrame",
"VideoStream",
)
def get_include() -> str:
"""
Returns the path to the `include` folder to be used when building extensions to av.
"""
import os
# Installed package
include_path = os.path.join(os.path.dirname(__file__), "include")
if os.path.exists(include_path):
return include_path
# Running from source directory
return os.path.join(os.path.dirname(__file__), os.pardir, "include")

View File

@@ -0,0 +1,54 @@
from __future__ import annotations
import argparse
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--codecs", action="store_true")
parser.add_argument("--hwdevices", action="store_true")
parser.add_argument("--hwconfigs", action="store_true")
parser.add_argument("--version", action="store_true")
args = parser.parse_args()
if args.version:
import av
import av._core
print(f"PyAV v{av.__version__}")
by_config: dict = {}
for libname, config in sorted(av._core.library_meta.items()):
version = config["version"]
if version[0] >= 0:
by_config.setdefault(
(config["configuration"], config["license"]), []
).append((libname, config))
for (config, license), libs in sorted(by_config.items()):
print("library configuration:", config)
print("library license:", license)
for libname, config in libs:
version = config["version"]
print(f"{libname:<13} {version[0]:3d}.{version[1]:3d}.{version[2]:3d}")
if args.hwdevices:
from av.codec.hwaccel import hwdevices_available
print("Hardware device types:")
for x in hwdevices_available():
print(" ", x)
if args.hwconfigs:
from av.codec.codec import dump_hwconfigs
dump_hwconfigs()
if args.codecs:
from av.codec.codec import dump_codecs
dump_codecs()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,12 @@
from typing import TypedDict
class _Meta(TypedDict):
version: tuple[int, int, int]
configuration: str
license: str
library_meta: dict[str, _Meta]
library_versions: dict[str, tuple[int, int, int]]
ffmpeg_version_info: str
time_base: int

View File

@@ -0,0 +1,65 @@
cimport libav as lib
# Initialise libraries.
lib.avformat_network_init()
lib.avdevice_register_all()
# Exports.
time_base = lib.AV_TIME_BASE
cdef decode_version(v):
if v < 0:
return (-1, -1, -1)
cdef int major = (v >> 16) & 0xff
cdef int minor = (v >> 8) & 0xff
cdef int micro = (v) & 0xff
return (major, minor, micro)
# Return an informative version string.
# This usually is the actual release version number or a git commit
# description. This string has no fixed format and can change any time. It
# should never be parsed by code.
ffmpeg_version_info = lib.av_version_info()
library_meta = {
"libavutil": dict(
version=decode_version(lib.avutil_version()),
configuration=lib.avutil_configuration(),
license=lib.avutil_license()
),
"libavcodec": dict(
version=decode_version(lib.avcodec_version()),
configuration=lib.avcodec_configuration(),
license=lib.avcodec_license()
),
"libavformat": dict(
version=decode_version(lib.avformat_version()),
configuration=lib.avformat_configuration(),
license=lib.avformat_license()
),
"libavdevice": dict(
version=decode_version(lib.avdevice_version()),
configuration=lib.avdevice_configuration(),
license=lib.avdevice_license()
),
"libavfilter": dict(
version=decode_version(lib.avfilter_version()),
configuration=lib.avfilter_configuration(),
license=lib.avfilter_license()
),
"libswscale": dict(
version=decode_version(lib.swscale_version()),
configuration=lib.swscale_configuration(),
license=lib.swscale_license()
),
"libswresample": dict(
version=decode_version(lib.swresample_version()),
configuration=lib.swresample_configuration(),
license=lib.swresample_license()
),
}
library_versions = {name: meta["version"] for name, meta in library_meta.items()}

View File

@@ -0,0 +1 @@
__version__ = "16.0.1"

View File

@@ -0,0 +1,2 @@
from .frame import AudioFrame
from .stream import AudioStream

View File

@@ -0,0 +1,16 @@
from typing import Literal
from .frame import AudioFrame
from .stream import AudioStream
_AudioCodecName = Literal[
"aac",
"libopus",
"mp2",
"mp3",
"pcm_alaw",
"pcm_mulaw",
"pcm_s16le",
]
__all__ = ("AudioFrame", "AudioStream")

View File

@@ -0,0 +1,11 @@
from av.audio.frame cimport AudioFrame
from av.audio.resampler cimport AudioResampler
from av.codec.context cimport CodecContext
cdef class AudioCodecContext(CodecContext):
# Hold onto the frames that we will decode until we have a full one.
cdef AudioFrame next_frame
# For encoding.
cdef AudioResampler resampler

View File

@@ -0,0 +1,105 @@
import cython
from cython.cimports import libav as lib
from cython.cimports.av.audio.format import AudioFormat, get_audio_format
from cython.cimports.av.audio.frame import AudioFrame, alloc_audio_frame
from cython.cimports.av.audio.layout import AudioLayout, get_audio_layout
from cython.cimports.av.frame import Frame
from cython.cimports.av.packet import Packet
@cython.cclass
class AudioCodecContext(CodecContext):
@cython.cfunc
def _prepare_frames_for_encode(self, input_frame: Frame | None):
frame: AudioFrame | None = input_frame
allow_var_frame_size: cython.bint = (
self.ptr.codec.capabilities & lib.AV_CODEC_CAP_VARIABLE_FRAME_SIZE
)
# Note that the resampler will simply return an input frame if there is
# no resampling to be done. The control flow was just a little easier this way.
if not self.resampler:
self.resampler = AudioResampler(
format=self.format,
layout=self.layout,
rate=self.ptr.sample_rate,
frame_size=None if allow_var_frame_size else self.ptr.frame_size,
)
frames = self.resampler.resample(frame)
if input_frame is None:
frames.append(None) # flush if input frame is None
return frames
@cython.cfunc
def _alloc_next_frame(self) -> Frame:
return alloc_audio_frame()
@cython.cfunc
def _setup_decoded_frame(self, frame: Frame, packet: Packet):
CodecContext._setup_decoded_frame(self, frame, packet)
aframe: AudioFrame = frame
aframe._init_user_attributes()
@property
def frame_size(self):
"""
Number of samples per channel in an audio frame.
:type: int
"""
return self.ptr.frame_size
@property
def sample_rate(self):
"""
Sample rate of the audio data, in samples per second.
:type: int
"""
return self.ptr.sample_rate
@sample_rate.setter
def sample_rate(self, value: cython.int):
self.ptr.sample_rate = value
@property
def rate(self):
"""Another name for :attr:`sample_rate`."""
return self.sample_rate
@rate.setter
def rate(self, value):
self.sample_rate = value
@property
def channels(self):
return self.layout.nb_channels
@property
def layout(self):
"""
The audio channel layout.
:type: AudioLayout
"""
return get_audio_layout(self.ptr.ch_layout)
@layout.setter
def layout(self, value):
layout: AudioLayout = AudioLayout(value)
self.ptr.ch_layout = layout.layout
@property
def format(self):
"""
The audio sample format.
:type: AudioFormat
"""
return get_audio_format(self.ptr.sample_fmt)
@format.setter
def format(self, value):
format: AudioFormat = AudioFormat(value)
self.ptr.sample_fmt = format.sample_fmt

View File

@@ -0,0 +1,29 @@
from typing import Iterator, Literal
from av.codec.context import CodecContext
from av.packet import Packet
from .format import AudioFormat
from .frame import AudioFrame
from .layout import AudioLayout
class _Format:
def __get__(self, i: object | None, owner: type | None = None) -> AudioFormat: ...
def __set__(self, instance: object, value: AudioFormat | str) -> None: ...
class _Layout:
def __get__(self, i: object | None, owner: type | None = None) -> AudioLayout: ...
def __set__(self, instance: object, value: AudioLayout | str) -> None: ...
class AudioCodecContext(CodecContext):
frame_size: int
sample_rate: int
rate: int
type: Literal["audio"]
format: _Format
layout: _Layout
@property
def channels(self) -> int: ...
def encode(self, frame: AudioFrame | None = None) -> list[Packet]: ...
def encode_lazy(self, frame: AudioFrame | None = None) -> Iterator[Packet]: ...
def decode(self, packet: Packet | None = None) -> list[AudioFrame]: ...

View File

@@ -0,0 +1,19 @@
cimport libav as lib
from libc.stdint cimport int64_t, uint64_t
from av.audio.frame cimport AudioFrame
cdef class AudioFifo:
cdef lib.AVAudioFifo *ptr
cdef AudioFrame template
cdef readonly uint64_t samples_written
cdef readonly uint64_t samples_read
cdef readonly double pts_per_sample
cpdef write(self, AudioFrame frame)
cpdef read(self, int samples=*, bint partial=*)
cpdef read_many(self, int samples, bint partial=*)

View File

@@ -0,0 +1,22 @@
from .format import AudioFormat
from .frame import AudioFrame
from .layout import AudioLayout
class AudioFifo:
def write(self, frame: AudioFrame) -> None: ...
def read(self, samples: int = 0, partial: bool = False) -> AudioFrame | None: ...
def read_many(self, samples: int, partial: bool = False) -> list[AudioFrame]: ...
@property
def format(self) -> AudioFormat: ...
@property
def layout(self) -> AudioLayout: ...
@property
def sample_rate(self) -> int: ...
@property
def samples(self) -> int: ...
@property
def samples_written(self) -> int: ...
@property
def samples_read(self) -> int: ...
@property
def pts_per_sample(self) -> float: ...

View File

@@ -0,0 +1,197 @@
from av.audio.frame cimport alloc_audio_frame
from av.error cimport err_check
cdef class AudioFifo:
"""A simple audio sample FIFO (First In First Out) buffer."""
def __repr__(self):
try:
result = (
f"<av.{self.__class__.__name__} {self.samples} samples of "
f"{self.sample_rate}hz {self.layout} {self.format} at 0x{id(self):x}>"
)
except AttributeError:
result = (
f"<av.{self.__class__.__name__} uninitialized, use fifo.write(frame),"
f" at 0x{id(self):x}>"
)
return result
def __dealloc__(self):
if self.ptr:
lib.av_audio_fifo_free(self.ptr)
cpdef write(self, AudioFrame frame):
"""write(frame)
Push a frame of samples into the queue.
:param AudioFrame frame: The frame of samples to push.
The FIFO will remember the attributes from the first frame, and use those
to populate all output frames.
If there is a :attr:`~.Frame.pts` and :attr:`~.Frame.time_base` and
:attr:`~.AudioFrame.sample_rate`, then the FIFO will assert that the incoming
timestamps are continuous.
"""
if frame is None:
raise TypeError("AudioFifo must be given an AudioFrame.")
if not frame.ptr.nb_samples:
return
if not self.ptr:
# Hold onto a copy of the attributes of the first frame to populate
# output frames with.
self.template = alloc_audio_frame()
self.template._copy_internal_attributes(frame)
self.template._init_user_attributes()
# Figure out our "time_base".
if frame._time_base.num and frame.ptr.sample_rate:
self.pts_per_sample = frame._time_base.den / float(frame._time_base.num)
self.pts_per_sample /= frame.ptr.sample_rate
else:
self.pts_per_sample = 0
self.ptr = lib.av_audio_fifo_alloc(
<lib.AVSampleFormat>frame.ptr.format,
frame.layout.nb_channels,
frame.ptr.nb_samples * 2, # Just a default number of samples; it will adjust.
)
if not self.ptr:
raise RuntimeError("Could not allocate AVAudioFifo.")
# Make sure nothing changed.
elif (
frame.ptr.format != self.template.ptr.format or
# TODO: frame.ptr.ch_layout != self.template.ptr.ch_layout or
frame.ptr.sample_rate != self.template.ptr.sample_rate or
(frame._time_base.num and self.template._time_base.num and (
frame._time_base.num != self.template._time_base.num or
frame._time_base.den != self.template._time_base.den
))
):
raise ValueError("Frame does not match AudioFifo parameters.")
# Assert that the PTS are what we expect.
cdef int64_t expected_pts
if self.pts_per_sample and frame.ptr.pts != lib.AV_NOPTS_VALUE:
expected_pts = <int64_t>(self.pts_per_sample * self.samples_written)
if frame.ptr.pts != expected_pts:
raise ValueError(
"Frame.pts (%d) != expected (%d); fix or set to None." % (frame.ptr.pts, expected_pts)
)
err_check(lib.av_audio_fifo_write(
self.ptr,
<void **>frame.ptr.extended_data,
frame.ptr.nb_samples,
))
self.samples_written += frame.ptr.nb_samples
cpdef read(self, int samples=0, bint partial=False):
"""read(samples=0, partial=False)
Read samples from the queue.
:param int samples: The number of samples to pull; 0 gets all.
:param bool partial: Allow returning less than requested.
:returns: New :class:`AudioFrame` or ``None`` (if empty).
If the incoming frames had valid a :attr:`~.Frame.time_base`,
:attr:`~.AudioFrame.sample_rate` and :attr:`~.Frame.pts`, the returned frames
will have accurate timing.
"""
if not self.ptr:
return
cdef int buffered_samples = lib.av_audio_fifo_size(self.ptr)
if buffered_samples < 1:
return
samples = samples or buffered_samples
if buffered_samples < samples:
if partial:
samples = buffered_samples
else:
return
cdef AudioFrame frame = alloc_audio_frame()
frame._copy_internal_attributes(self.template)
frame._init(
<lib.AVSampleFormat>self.template.ptr.format,
<lib.AVChannelLayout>self.template.ptr.ch_layout,
samples,
1, # Align?
)
err_check(lib.av_audio_fifo_read(
self.ptr,
<void **>frame.ptr.extended_data,
samples,
))
if self.pts_per_sample:
frame.ptr.pts = <uint64_t>(self.pts_per_sample * self.samples_read)
else:
frame.ptr.pts = lib.AV_NOPTS_VALUE
self.samples_read += samples
return frame
cpdef read_many(self, int samples, bint partial=False):
"""read_many(samples, partial=False)
Read as many frames as we can.
:param int samples: How large for the frames to be.
:param bool partial: If we should return a partial frame.
:returns: A ``list`` of :class:`AudioFrame`.
"""
cdef AudioFrame frame
frames = []
while True:
frame = self.read(samples, partial=partial)
if frame is not None:
frames.append(frame)
else:
break
return frames
@property
def format(self):
"""The :class:`.AudioFormat` of this FIFO."""
if not self.ptr:
raise AttributeError(f"'{__name__}.AudioFifo' object has no attribute 'format'")
return self.template.format
@property
def layout(self):
"""The :class:`.AudioLayout` of this FIFO."""
if not self.ptr:
raise AttributeError(f"'{__name__}.AudioFifo' object has no attribute 'layout'")
return self.template.layout
@property
def sample_rate(self):
if not self.ptr:
raise AttributeError(f"'{__name__}.AudioFifo' object has no attribute 'sample_rate'")
return self.template.sample_rate
@property
def samples(self):
"""Number of audio samples (per channel) in the buffer."""
return lib.av_audio_fifo_size(self.ptr) if self.ptr else 0

View File

@@ -0,0 +1,7 @@
cimport libav as lib
cdef class AudioFormat:
cdef lib.AVSampleFormat sample_fmt
cdef AudioFormat get_audio_format(lib.AVSampleFormat format)

View File

@@ -0,0 +1,142 @@
import sys
import cython
container_format_postfix: str = "le" if sys.byteorder == "little" else "be"
_cinit_bypass_sentinel = object()
@cython.cfunc
def get_audio_format(c_format: lib.AVSampleFormat) -> AudioFormat:
"""Get an AudioFormat without going through a string."""
if c_format < 0:
return None
format: AudioFormat = AudioFormat(_cinit_bypass_sentinel)
format.sample_fmt = c_format
return format
@cython.cclass
class AudioFormat:
"""Descriptor of audio formats."""
def __cinit__(self, name):
if name is _cinit_bypass_sentinel:
return
sample_fmt: lib.AVSampleFormat
if isinstance(name, AudioFormat):
sample_fmt = cython.cast(AudioFormat, name).sample_fmt
else:
sample_fmt = lib.av_get_sample_fmt(name)
if sample_fmt < 0:
raise ValueError(f"Not a sample format: {name!r}")
self.sample_fmt = sample_fmt
def __repr__(self):
return f"<av.AudioFormat {self.name}>"
@property
def name(self):
"""Canonical name of the sample format.
>>> AudioFormat('s16p').name
's16p'
"""
return lib.av_get_sample_fmt_name(self.sample_fmt)
@property
def bytes(self):
"""Number of bytes per sample.
>>> AudioFormat('s16p').bytes
2
"""
return lib.av_get_bytes_per_sample(self.sample_fmt)
@property
def bits(self):
"""Number of bits per sample.
>>> AudioFormat('s16p').bits
16
"""
return lib.av_get_bytes_per_sample(self.sample_fmt) << 3
@property
def is_planar(self):
"""Is this a planar format?
Strictly opposite of :attr:`is_packed`.
"""
return bool(lib.av_sample_fmt_is_planar(self.sample_fmt))
@property
def is_packed(self):
"""Is this a packed format?
Strictly opposite of :attr:`is_planar`.
"""
return not lib.av_sample_fmt_is_planar(self.sample_fmt)
@property
def planar(self):
"""The planar variant of this format.
Is itself when planar:
>>> fmt = AudioFormat('s16p')
>>> fmt.planar is fmt
True
"""
if self.is_planar:
return self
return get_audio_format(lib.av_get_planar_sample_fmt(self.sample_fmt))
@property
def packed(self):
"""The packed variant of this format.
Is itself when packed:
>>> fmt = AudioFormat('s16')
>>> fmt.packed is fmt
True
"""
if self.is_packed:
return self
return get_audio_format(lib.av_get_packed_sample_fmt(self.sample_fmt))
@property
def container_name(self):
"""The name of a :class:`ContainerFormat` which directly accepts this data.
:raises ValueError: when planar, since there are no such containers.
"""
if self.is_planar:
raise ValueError("no planar container formats")
if self.sample_fmt == lib.AV_SAMPLE_FMT_U8:
return "u8"
elif self.sample_fmt == lib.AV_SAMPLE_FMT_S16:
return "s16" + container_format_postfix
elif self.sample_fmt == lib.AV_SAMPLE_FMT_S32:
return "s32" + container_format_postfix
elif self.sample_fmt == lib.AV_SAMPLE_FMT_FLT:
return "f32" + container_format_postfix
elif self.sample_fmt == lib.AV_SAMPLE_FMT_DBL:
return "f64" + container_format_postfix
raise ValueError("unknown layout")

View File

@@ -0,0 +1,11 @@
class AudioFormat:
name: str
bytes: int
bits: int
is_planar: bool
is_packed: bool
planar: AudioFormat
packed: AudioFormat
container_name: str
def __init__(self, name: str | AudioFormat) -> None: ...

View File

@@ -0,0 +1,31 @@
cimport libav as lib
from libc.stdint cimport uint8_t, uint64_t
from av.audio.format cimport AudioFormat
from av.audio.layout cimport AudioLayout
from av.frame cimport Frame
cdef class AudioFrame(Frame):
# For raw storage of the frame's data; don't ever touch this.
cdef uint8_t *_buffer
cdef size_t _buffer_size
cdef readonly AudioLayout layout
"""
The audio channel layout.
:type: AudioLayout
"""
cdef readonly AudioFormat format
"""
The audio sample format.
:type: AudioFormat
"""
cdef _init(self, lib.AVSampleFormat format, lib.AVChannelLayout layout, unsigned int nb_samples, unsigned int align)
cdef _init_user_attributes(self)
cdef AudioFrame alloc_audio_frame()

View File

@@ -0,0 +1,205 @@
import cython
from cython.cimports.av.audio.format import get_audio_format
from cython.cimports.av.audio.layout import get_audio_layout
from cython.cimports.av.audio.plane import AudioPlane
from cython.cimports.av.error import err_check
from cython.cimports.av.utils import check_ndarray
_cinit_bypass_sentinel = object()
@cython.cfunc
def alloc_audio_frame() -> AudioFrame:
return AudioFrame(_cinit_bypass_sentinel)
format_dtypes = {
"dbl": "f8",
"dblp": "f8",
"flt": "f4",
"fltp": "f4",
"s16": "i2",
"s16p": "i2",
"s32": "i4",
"s32p": "i4",
"u8": "u1",
"u8p": "u1",
}
@cython.cclass
class AudioFrame(Frame):
"""A frame of audio."""
def __cinit__(self, format="s16", layout="stereo", samples=0, align=1):
if format is _cinit_bypass_sentinel:
return
cy_format: AudioFormat = AudioFormat(format)
cy_layout: AudioLayout = AudioLayout(layout)
self._init(cy_format.sample_fmt, cy_layout.layout, samples, align)
@cython.cfunc
def _init(
self,
format: lib.AVSampleFormat,
layout: lib.AVChannelLayout,
nb_samples: cython.uint,
align: cython.uint,
):
self.ptr.nb_samples = nb_samples
self.ptr.format = format
self.ptr.ch_layout = layout
# Sometimes this is called twice. Oh well.
self._init_user_attributes()
if self.layout.nb_channels != 0 and nb_samples:
# Cleanup the old buffer.
lib.av_freep(cython.address(self._buffer))
# Get a new one.
self._buffer_size = err_check(
lib.av_samples_get_buffer_size(
cython.NULL, self.layout.nb_channels, nb_samples, format, align
)
)
self._buffer = cython.cast(
cython.pointer[uint8_t], lib.av_malloc(self._buffer_size)
)
if not self._buffer:
raise MemoryError("cannot allocate AudioFrame buffer")
# Connect the data pointers to the buffer.
err_check(
lib.avcodec_fill_audio_frame(
self.ptr,
self.layout.nb_channels,
cython.cast(lib.AVSampleFormat, self.ptr.format),
self._buffer,
self._buffer_size,
align,
)
)
def __dealloc__(self):
lib.av_freep(cython.address(self._buffer))
@cython.cfunc
def _init_user_attributes(self):
self.layout = get_audio_layout(self.ptr.ch_layout)
self.format = get_audio_format(cython.cast(lib.AVSampleFormat, self.ptr.format))
def __repr__(self):
return (
f"<av.{self.__class__.__name__} pts={self.pts}, {self.samples} "
f"samples at {self.rate}Hz, {self.layout.name}, {self.format.name} at 0x{id(self):x}>"
)
@staticmethod
def from_ndarray(array, format="s16", layout="stereo"):
"""
Construct a frame from a numpy array.
"""
import numpy as np
py_format = format if isinstance(format, AudioFormat) else AudioFormat(format)
py_layout = layout if isinstance(layout, AudioLayout) else AudioLayout(layout)
format = py_format.name
# map avcodec type to numpy type
try:
dtype = np.dtype(format_dtypes[format])
except KeyError:
raise ValueError(
f"Conversion from numpy array with format `{format}` is not yet supported"
)
# check input format
nb_channels = py_layout.nb_channels
check_ndarray(array, dtype, 2)
if py_format.is_planar:
if array.shape[0] != nb_channels:
raise ValueError(
f"Expected planar `array.shape[0]` to equal `{nb_channels}` but got `{array.shape[0]}`"
)
samples = array.shape[1]
else:
if array.shape[0] != 1:
raise ValueError(
f"Expected packed `array.shape[0]` to equal `1` but got `{array.shape[0]}`"
)
samples = array.shape[1] // nb_channels
frame = AudioFrame(format=py_format, layout=py_layout, samples=samples)
for i, plane in enumerate(frame.planes):
plane.update(array[i, :])
return frame
@property
def planes(self):
"""
A tuple of :class:`~av.audio.plane.AudioPlane`.
:type: tuple
"""
plane_count: cython.int = 0
while self.ptr.extended_data[plane_count]:
plane_count += 1
return tuple([AudioPlane(self, i) for i in range(plane_count)])
@property
def samples(self):
"""
Number of audio samples (per channel).
:type: int
"""
return self.ptr.nb_samples
@property
def sample_rate(self):
"""
Sample rate of the audio data, in samples per second.
:type: int
"""
return self.ptr.sample_rate
@sample_rate.setter
def sample_rate(self, value):
self.ptr.sample_rate = value
@property
def rate(self):
"""Another name for :attr:`sample_rate`."""
return self.ptr.sample_rate
@rate.setter
def rate(self, value):
self.ptr.sample_rate = value
def to_ndarray(self):
"""Get a numpy array of this frame.
.. note:: Numpy must be installed.
"""
import numpy as np
try:
dtype = np.dtype(format_dtypes[self.format.name])
except KeyError:
raise ValueError(
f"Conversion from {self.format.name!r} format to numpy array is not supported."
)
if self.format.is_planar:
count = self.samples
else:
count = self.samples * self.layout.nb_channels
return np.vstack(
[np.frombuffer(x, dtype=dtype, count=count) for x in self.planes]
)

View File

@@ -0,0 +1,49 @@
from typing import Any, Union
import numpy as np
from av.frame import Frame
from .format import AudioFormat
from .layout import AudioLayout
from .plane import AudioPlane
format_dtypes: dict[str, str]
_SupportedNDarray = Union[
np.ndarray[Any, np.dtype[np.float64]], # f8
np.ndarray[Any, np.dtype[np.float32]], # f4
np.ndarray[Any, np.dtype[np.int32]], # i4
np.ndarray[Any, np.dtype[np.int16]], # i2
np.ndarray[Any, np.dtype[np.uint8]], # u1
]
class _Format:
def __get__(self, i: object | None, owner: type | None = None) -> AudioFormat: ...
def __set__(self, instance: object, value: AudioFormat | str) -> None: ...
class _Layout:
def __get__(self, i: object | None, owner: type | None = None) -> AudioLayout: ...
def __set__(self, instance: object, value: AudioLayout | str) -> None: ...
class AudioFrame(Frame):
planes: tuple[AudioPlane, ...]
samples: int
sample_rate: int
rate: int
format: _Format
layout: _Layout
def __init__(
self,
format: AudioFormat | str = "s16",
layout: AudioLayout | str = "stereo",
samples: int = 0,
align: int = 1,
) -> None: ...
@staticmethod
def from_ndarray(
array: _SupportedNDarray,
format: AudioFormat | str = "s16",
layout: AudioLayout | str = "stereo",
) -> AudioFrame: ...
def to_ndarray(self) -> _SupportedNDarray: ...

View File

@@ -0,0 +1,8 @@
cimport libav as lib
cdef class AudioLayout:
cdef lib.AVChannelLayout layout
cdef _init(self, lib.AVChannelLayout layout)
cdef AudioLayout get_audio_layout(lib.AVChannelLayout c_layout)

View File

@@ -0,0 +1,12 @@
from dataclasses import dataclass
class AudioLayout:
name: str
nb_channels: int
channels: tuple[AudioChannel, ...]
def __init__(self, layout: str | AudioLayout): ...
@dataclass
class AudioChannel:
name: str
description: str

View File

@@ -0,0 +1,82 @@
cimport libav as lib
from cpython.bytes cimport PyBytes_FromStringAndSize
from dataclasses import dataclass
@dataclass
class AudioChannel:
name: str
description: str
def __repr__(self):
return f"<av.AudioChannel '{self.name}' ({self.description})>"
cdef object _cinit_bypass_sentinel
cdef AudioLayout get_audio_layout(lib.AVChannelLayout c_layout):
"""Get an AudioLayout from Cython land."""
cdef AudioLayout layout = AudioLayout.__new__(AudioLayout, _cinit_bypass_sentinel)
layout._init(c_layout)
return layout
cdef class AudioLayout:
def __init__(self, layout):
if layout is _cinit_bypass_sentinel:
return
if type(layout) is str:
ret = lib.av_channel_layout_from_string(&c_layout, layout)
if ret != 0:
raise ValueError(f"Invalid layout: {layout}")
elif isinstance(layout, AudioLayout):
c_layout = (<AudioLayout>layout).layout
else:
raise TypeError(f"layout must be of type: string | av.AudioLayout, got {type(layout)}")
self._init(c_layout)
cdef _init(self, lib.AVChannelLayout layout):
self.layout = layout
def __repr__(self):
return f"<av.{self.__class__.__name__} {self.name!r}>"
def __eq__(self, other):
return isinstance(other, AudioLayout) and self.name == other.name and self.nb_channels == other.nb_channels
@property
def nb_channels(self):
return self.layout.nb_channels
@property
def channels(self):
cdef char buf[16]
cdef char buf2[128]
results = []
for index in range(self.layout.nb_channels):
size = lib.av_channel_name(buf, sizeof(buf), lib.av_channel_layout_channel_from_index(&self.layout, index)) - 1
size2 = lib.av_channel_description(buf2, sizeof(buf2), lib.av_channel_layout_channel_from_index(&self.layout, index)) - 1
results.append(
AudioChannel(
PyBytes_FromStringAndSize(buf, size).decode("utf-8"),
PyBytes_FromStringAndSize(buf2, size2).decode("utf-8"),
)
)
return tuple(results)
@property
def name(self) -> str:
"""The canonical name of the audio layout."""
cdef char layout_name[128]
cdef int ret
ret = lib.av_channel_layout_describe(&self.layout, layout_name, sizeof(layout_name))
if ret < 0:
raise RuntimeError(f"Failed to get layout name: {ret}")
return layout_name

View File

@@ -0,0 +1,6 @@
from av.plane cimport Plane
cdef class AudioPlane(Plane):
cdef readonly size_t buffer_size
cdef size_t _buffer_size(self)

View File

@@ -0,0 +1,13 @@
import cython
from cython.cimports.av.audio.frame import AudioFrame
@cython.cclass
class AudioPlane(Plane):
def __cinit__(self, frame: AudioFrame, index: cython.int):
# Only the first linesize is ever populated, but it applies to every plane.
self.buffer_size = self.frame.ptr.linesize[0]
@cython.cfunc
def _buffer_size(self) -> cython.size_t:
return self.buffer_size

View File

@@ -0,0 +1,4 @@
from av.plane import Plane
class AudioPlane(Plane):
buffer_size: int

View File

@@ -0,0 +1,18 @@
from av.audio.format cimport AudioFormat
from av.audio.frame cimport AudioFrame
from av.audio.layout cimport AudioLayout
from av.filter.graph cimport Graph
cdef class AudioResampler:
cdef readonly bint is_passthrough
cdef AudioFrame template
# Destination descriptors
cdef readonly AudioFormat format
cdef readonly AudioLayout layout
cdef readonly int rate
cdef readonly unsigned int frame_size
cdef Graph graph
cpdef list resample(self, AudioFrame)

View File

@@ -0,0 +1,122 @@
from errno import EAGAIN
import cython
from cython.cimports.av.filter.context import FilterContext
from cython.cimports.av.filter.graph import Graph
from av.error import FFmpegError
@cython.cclass
class AudioResampler:
"""AudioResampler(format=None, layout=None, rate=None)
:param AudioFormat format: The target format, or string that parses to one
(e.g. ``"s16"``).
:param AudioLayout layout: The target layout, or an int/string that parses
to one (e.g. ``"stereo"``).
:param int rate: The target sample rate.
"""
def __cinit__(self, format=None, layout=None, rate=None, frame_size=None):
if format is not None:
self.format = (
format if isinstance(format, AudioFormat) else AudioFormat(format)
)
if layout is not None:
self.layout = AudioLayout(layout)
self.rate = int(rate) if rate else 0
self.frame_size = int(frame_size) if frame_size else 0
self.graph = None
@cython.ccall
def resample(self, frame: AudioFrame | None) -> list:
"""resample(frame)
Convert the ``sample_rate``, ``channel_layout`` and/or ``format`` of
a :class:`~.AudioFrame`.
:param AudioFrame frame: The frame to convert or `None` to flush.
:returns: A list of :class:`AudioFrame` in new parameters. If the nothing is to be done return the same frame
as a single element list.
"""
# We don't have any input, so don't bother even setting up.
if not self.graph and frame is None:
return []
# Shortcut for passthrough.
if self.is_passthrough:
return [frame]
# Take source settings from the first frame.
if not self.graph:
self.template = frame
# Set some default descriptors.
self.format = self.format or frame.format
self.layout = self.layout or frame.layout
self.rate = self.rate or frame.sample_rate
# Check if we can passthrough or if there is actually work to do.
if (
frame.format.sample_fmt == self.format.sample_fmt
and frame.layout == self.layout
and frame.sample_rate == self.rate
and self.frame_size == 0
):
self.is_passthrough = True
return [frame]
# handle resampling with aformat filter
# (similar to configure_output_audio_filter from ffmpeg)
self.graph = Graph()
extra_args = {}
if frame.time_base is not None:
extra_args["time_base"] = f"{frame.time_base}"
abuffer = self.graph.add(
"abuffer",
sample_rate=f"{frame.sample_rate}",
sample_fmt=AudioFormat(frame.format).name,
channel_layout=frame.layout.name,
**extra_args,
)
aformat = self.graph.add(
"aformat",
sample_rates=f"{self.rate}",
sample_fmts=self.format.name,
channel_layouts=self.layout.name,
)
abuffersink = self.graph.add("abuffersink")
abuffer.link_to(aformat)
aformat.link_to(abuffersink)
self.graph.configure()
if self.frame_size > 0:
self.graph.set_audio_frame_size(self.frame_size)
if frame is not None:
if (
frame.format.sample_fmt != self.template.format.sample_fmt
or frame.layout != self.template.layout
or frame.sample_rate != self.template.rate
):
raise ValueError("Frame does not match AudioResampler setup.")
self.graph.push(frame)
output: list = []
while True:
try:
output.append(self.graph.pull())
except EOFError:
break
except FFmpegError as e:
if e.errno != EAGAIN:
raise
break
return output

View File

@@ -0,0 +1,20 @@
from av.filter.graph import Graph
from .format import AudioFormat
from .frame import AudioFrame
from .layout import AudioLayout
class AudioResampler:
rate: int
frame_size: int
format: AudioFormat
graph: Graph | None
def __init__(
self,
format: str | int | AudioFormat | None = None,
layout: str | int | AudioLayout | None = None,
rate: int | None = None,
frame_size: int | None = None,
) -> None: ...
def resample(self, frame: AudioFrame | None) -> list[AudioFrame]: ...

View File

@@ -0,0 +1,9 @@
from av.packet cimport Packet
from av.stream cimport Stream
from .frame cimport AudioFrame
cdef class AudioStream(Stream):
cpdef encode(self, AudioFrame frame=?)
cpdef decode(self, Packet packet=?)

View File

@@ -0,0 +1,46 @@
import cython
from cython.cimports.av.audio.frame import AudioFrame
from cython.cimports.av.packet import Packet
@cython.cclass
class AudioStream(Stream):
def __repr__(self):
form = self.format.name if self.format else None
return (
f"<av.AudioStream #{self.index} {self.name} at {self.rate}Hz,"
f" {self.layout.name}, {form} at 0x{id(self):x}>"
)
def __getattr__(self, name):
return getattr(self.codec_context, name)
@cython.ccall
def encode(self, frame: AudioFrame | None = None):
"""
Encode an :class:`.AudioFrame` and return a list of :class:`.Packet`.
:rtype: list[Packet]
.. seealso:: This is mostly a passthrough to :meth:`.CodecContext.encode`.
"""
packets = self.codec_context.encode(frame)
packet: Packet
for packet in packets:
packet._stream = self
packet.ptr.stream_index = self.ptr.index
return packets
@cython.ccall
def decode(self, packet: Packet | None = None):
"""
Decode a :class:`.Packet` and return a list of :class:`.AudioFrame`.
:rtype: list[AudioFrame]
.. seealso:: This is a passthrough to :meth:`.CodecContext.decode`.
"""
return self.codec_context.decode(packet)

View File

@@ -0,0 +1,32 @@
from typing import Literal
from av.packet import Packet
from av.stream import Stream
from .codeccontext import AudioCodecContext
from .format import AudioFormat
from .frame import AudioFrame
from .layout import AudioLayout
class _Format:
def __get__(self, i: object | None, owner: type | None = None) -> AudioFormat: ...
def __set__(self, instance: object, value: AudioFormat | str) -> None: ...
class _Layout:
def __get__(self, i: object | None, owner: type | None = None) -> AudioLayout: ...
def __set__(self, instance: object, value: AudioLayout | str) -> None: ...
class AudioStream(Stream):
codec_context: AudioCodecContext
def encode(self, frame: AudioFrame | None = None) -> list[Packet]: ...
def decode(self, packet: Packet | None = None) -> list[AudioFrame]: ...
# From codec context
frame_size: int
sample_rate: int
bit_rate: int
rate: int
channels: int
type: Literal["audio"]
format: _Format
layout: _Layout

View File

@@ -0,0 +1,11 @@
cimport libav as lib
from av.packet cimport Packet
cdef class BitStreamFilterContext:
cdef lib.AVBSFContext *ptr
cpdef filter(self, Packet packet=?)
cpdef flush(self)

View File

@@ -0,0 +1,14 @@
from .packet import Packet
from .stream import Stream
class BitStreamFilterContext:
def __init__(
self,
filter_description: str | bytes,
in_stream: Stream | None = None,
out_stream: Stream | None = None,
): ...
def filter(self, packet: Packet | None) -> list[Packet]: ...
def flush(self) -> None: ...
bitstream_filters_available: set[str]

View File

@@ -0,0 +1,95 @@
cimport libav as lib
from libc.errno cimport EAGAIN
from av.error cimport err_check
from av.packet cimport Packet
from av.stream cimport Stream
cdef class BitStreamFilterContext:
"""
Initializes a bitstream filter: a way to directly modify packet data.
Wraps :ffmpeg:`AVBSFContext`
:param Stream in_stream: A stream that defines the input codec for the bitfilter.
:param Stream out_stream: A stream whose codec is overwritten using the output parameters from the bitfilter.
"""
def __cinit__(self, filter_description, Stream in_stream=None, Stream out_stream=None):
cdef int res
cdef char *filter_str = filter_description
with nogil:
res = lib.av_bsf_list_parse_str(filter_str, &self.ptr)
err_check(res)
if in_stream is not None:
with nogil:
res = lib.avcodec_parameters_copy(self.ptr.par_in, in_stream.ptr.codecpar)
err_check(res)
with nogil:
res = lib.av_bsf_init(self.ptr)
err_check(res)
if out_stream is not None:
with nogil:
res = lib.avcodec_parameters_copy(out_stream.ptr.codecpar, self.ptr.par_out)
err_check(res)
lib.avcodec_parameters_to_context(out_stream.codec_context.ptr, out_stream.ptr.codecpar)
def __dealloc__(self):
if self.ptr:
lib.av_bsf_free(&self.ptr)
cpdef filter(self, Packet packet=None):
"""
Processes a packet based on the filter_description set during initialization.
Multiple packets may be created.
:type: list[Packet]
"""
cdef int res
cdef Packet new_packet
with nogil:
res = lib.av_bsf_send_packet(self.ptr, packet.ptr if packet is not None else NULL)
err_check(res)
output = []
while True:
new_packet = Packet()
with nogil:
res = lib.av_bsf_receive_packet(self.ptr, new_packet.ptr)
if res == -EAGAIN or res == lib.AVERROR_EOF:
return output
err_check(res)
if res:
return output
output.append(new_packet)
cpdef flush(self):
"""
Reset the internal state of the filter.
Should be called e.g. when seeking.
Can be used to make the filter usable again after draining it with EOF marker packet.
"""
lib.av_bsf_flush(self.ptr)
cdef get_filter_names():
names = set()
cdef const lib.AVBitStreamFilter *ptr
cdef void *opaque = NULL
while True:
ptr = lib.av_bsf_iterate(&opaque)
if ptr:
names.add(ptr.name)
else:
break
return names
bitstream_filters_available = get_filter_names()

View File

@@ -0,0 +1,6 @@
cdef class Buffer:
cdef size_t _buffer_size(self)
cdef void* _buffer_ptr(self)
cdef bint _buffer_writable(self)

View File

@@ -0,0 +1,9 @@
# When Python 3.12 becomes our lowest supported version, we could make this
# class inherit `collections.abc.Buffer`.
class Buffer:
buffer_size: int
buffer_ptr: int
def update(self, input: bytes) -> None: ...
def __buffer__(self, flags: int) -> memoryview: ...
def __bytes__(self) -> bytes: ...

View File

@@ -0,0 +1,54 @@
from cpython cimport PyBUF_WRITABLE, PyBuffer_FillInfo
from libc.string cimport memcpy
from av.bytesource cimport ByteSource, bytesource
cdef class Buffer:
"""A base class for PyAV objects which support the buffer protocol, such
as :class:`.Packet` and :class:`.Plane`.
"""
cdef size_t _buffer_size(self):
return 0
cdef void* _buffer_ptr(self):
return NULL
cdef bint _buffer_writable(self):
return True
def __getbuffer__(self, Py_buffer *view, int flags):
if flags & PyBUF_WRITABLE and not self._buffer_writable():
raise ValueError("buffer is not writable")
PyBuffer_FillInfo(view, self, self._buffer_ptr(), self._buffer_size(), 0, flags)
@property
def buffer_size(self):
"""The size of the buffer in bytes."""
return self._buffer_size()
@property
def buffer_ptr(self):
"""The memory address of the buffer."""
return <size_t>self._buffer_ptr()
def update(self, input):
"""Replace the data in this object with the given buffer.
Accepts anything that supports the `buffer protocol <https://docs.python.org/3/c-api/buffer.html>`_,
e.g. bytes, Numpy arrays, other :class:`Buffer` objects, etc..
"""
if not self._buffer_writable():
raise ValueError("buffer is not writable")
cdef ByteSource source = bytesource(input)
cdef size_t size = self._buffer_size()
if source.length != size:
raise ValueError(f"got {source.length} bytes; need {size} bytes")
memcpy(self._buffer_ptr(), source.ptr, size)

View File

@@ -0,0 +1,14 @@
from cpython.buffer cimport Py_buffer
cdef class ByteSource:
cdef object owner
cdef bint has_view
cdef Py_buffer view
cdef unsigned char *ptr
cdef size_t length
cdef ByteSource bytesource(object, bint allow_none=*)

View File

@@ -0,0 +1,43 @@
from cpython.buffer cimport (
PyBUF_SIMPLE,
PyBuffer_Release,
PyObject_CheckBuffer,
PyObject_GetBuffer,
)
cdef class ByteSource:
def __cinit__(self, owner):
self.owner = owner
try:
self.ptr = owner
except TypeError:
pass
else:
self.length = len(owner)
return
if PyObject_CheckBuffer(owner):
# Can very likely use PyBUF_ND instead of PyBUF_SIMPLE
res = PyObject_GetBuffer(owner, &self.view, PyBUF_SIMPLE)
if not res:
self.has_view = True
self.ptr = <unsigned char *>self.view.buf
self.length = self.view.len
return
raise TypeError("expected bytes, bytearray or memoryview")
def __dealloc__(self):
if self.has_view:
PyBuffer_Release(&self.view)
cdef ByteSource bytesource(obj, bint allow_none=False):
if allow_none and obj is None:
return
elif isinstance(obj, ByteSource):
return obj
else:
return ByteSource(obj)

View File

@@ -0,0 +1,11 @@
from .codec import Capabilities, Codec, Properties, codec_descriptor, codecs_available
from .context import CodecContext
__all__ = (
"Capabilities",
"Codec",
"Properties",
"codec_descriptor",
"codecs_available",
"CodecContext",
)

View File

@@ -0,0 +1,15 @@
cimport libav as lib
cdef class Codec:
cdef const lib.AVCodec *ptr
cdef const lib.AVCodecDescriptor *desc
cdef readonly bint is_encoder
cdef tuple _hardware_configs
cdef _init(self, name=?)
cdef Codec wrap_codec(const lib.AVCodec *ptr)

View File

@@ -0,0 +1,115 @@
from enum import Flag, IntEnum
from fractions import Fraction
from typing import ClassVar, Literal, cast, overload
from av.audio.codeccontext import AudioCodecContext
from av.audio.format import AudioFormat
from av.descriptor import Descriptor
from av.subtitles.codeccontext import SubtitleCodecContext
from av.video.codeccontext import VideoCodecContext
from av.video.format import VideoFormat
from .context import CodecContext
class Properties(Flag):
NONE = cast(ClassVar[Properties], ...)
INTRA_ONLY = cast(ClassVar[Properties], ...)
LOSSY = cast(ClassVar[Properties], ...)
LOSSLESS = cast(ClassVar[Properties], ...)
REORDER = cast(ClassVar[Properties], ...)
BITMAP_SUB = cast(ClassVar[Properties], ...)
TEXT_SUB = cast(ClassVar[Properties], ...)
class Capabilities(IntEnum):
none = cast(int, ...)
draw_horiz_band = cast(int, ...)
dr1 = cast(int, ...)
hwaccel = cast(int, ...)
delay = cast(int, ...)
small_last_frame = cast(int, ...)
hwaccel_vdpau = cast(int, ...)
subframes = cast(int, ...)
experimental = cast(int, ...)
channel_conf = cast(int, ...)
neg_linesizes = cast(int, ...)
frame_threads = cast(int, ...)
slice_threads = cast(int, ...)
param_change = cast(int, ...)
auto_threads = cast(int, ...)
variable_frame_size = cast(int, ...)
avoid_probing = cast(int, ...)
hardware = cast(int, ...)
hybrid = cast(int, ...)
encoder_reordered_opaque = cast(int, ...)
encoder_flush = cast(int, ...)
encoder_recon_frame = cast(int, ...)
class UnknownCodecError(ValueError): ...
class Codec:
@property
def is_encoder(self) -> bool: ...
@property
def is_decoder(self) -> bool: ...
@property
def mode(self) -> Literal["r", "w"]: ...
descriptor: Descriptor
@property
def name(self) -> str: ...
@property
def canonical_name(self) -> str: ...
@property
def long_name(self) -> str: ...
@property
def type(self) -> Literal["video", "audio", "data", "subtitle", "attachment"]: ...
@property
def id(self) -> int: ...
frame_rates: list[Fraction] | None
audio_rates: list[int] | None
video_formats: list[VideoFormat] | None
audio_formats: list[AudioFormat] | None
@property
def properties(self) -> int: ...
@property
def intra_only(self) -> bool: ...
@property
def lossy(self) -> bool: ...
@property
def lossless(self) -> bool: ...
@property
def reorder(self) -> bool: ...
@property
def bitmap_sub(self) -> bool: ...
@property
def text_sub(self) -> bool: ...
@property
def capabilities(self) -> int: ...
@property
def experimental(self) -> bool: ...
@property
def delay(self) -> bool: ...
def __init__(self, name: str, mode: Literal["r", "w"] = "r") -> None: ...
@overload
def create(self, kind: Literal["video"]) -> VideoCodecContext: ...
@overload
def create(self, kind: Literal["audio"]) -> AudioCodecContext: ...
@overload
def create(self, kind: Literal["subtitle"]) -> SubtitleCodecContext: ...
@overload
def create(self, kind: None = None) -> CodecContext: ...
@overload
def create(
self, kind: Literal["video", "audio", "subtitle"] | None = None
) -> (
VideoCodecContext | AudioCodecContext | SubtitleCodecContext | CodecContext
): ...
class codec_descriptor:
name: str
options: tuple[int, ...]
codecs_available: set[str]
def dump_codecs() -> None: ...
def dump_hwconfigs() -> None: ...

View File

@@ -0,0 +1,389 @@
cimport libav as lib
from av.audio.format cimport get_audio_format
from av.codec.hwaccel cimport wrap_hwconfig
from av.descriptor cimport wrap_avclass
from av.utils cimport avrational_to_fraction
from av.video.format cimport get_video_format
from enum import Flag, IntEnum
cdef object _cinit_sentinel = object()
cdef Codec wrap_codec(const lib.AVCodec *ptr):
cdef Codec codec = Codec(_cinit_sentinel)
codec.ptr = ptr
codec.is_encoder = lib.av_codec_is_encoder(ptr)
codec._init()
return codec
class Properties(Flag):
NONE = 0
INTRA_ONLY = lib.AV_CODEC_PROP_INTRA_ONLY
LOSSY = lib.AV_CODEC_PROP_LOSSY
LOSSLESS = lib.AV_CODEC_PROP_LOSSLESS
REORDER = lib.AV_CODEC_PROP_REORDER
BITMAP_SUB = lib.AV_CODEC_PROP_BITMAP_SUB
TEXT_SUB = lib.AV_CODEC_PROP_TEXT_SUB
class Capabilities(IntEnum):
none = 0
draw_horiz_band = lib.AV_CODEC_CAP_DRAW_HORIZ_BAND
dr1 = lib.AV_CODEC_CAP_DR1
hwaccel = 1 << 4
delay = lib.AV_CODEC_CAP_DELAY
small_last_frame = lib.AV_CODEC_CAP_SMALL_LAST_FRAME
hwaccel_vdpau = 1 << 7
experimental = lib.AV_CODEC_CAP_EXPERIMENTAL
channel_conf = lib.AV_CODEC_CAP_CHANNEL_CONF
neg_linesizes = 1 << 11
frame_threads = lib.AV_CODEC_CAP_FRAME_THREADS
slice_threads = lib.AV_CODEC_CAP_SLICE_THREADS
param_change = lib.AV_CODEC_CAP_PARAM_CHANGE
auto_threads = lib.AV_CODEC_CAP_OTHER_THREADS
variable_frame_size = lib.AV_CODEC_CAP_VARIABLE_FRAME_SIZE
avoid_probing = lib.AV_CODEC_CAP_AVOID_PROBING
hardware = lib.AV_CODEC_CAP_HARDWARE
hybrid = lib.AV_CODEC_CAP_HYBRID
encoder_reordered_opaque = 1 << 20
encoder_flush = 1 << 21
encoder_recon_frame = 1 << 22
class UnknownCodecError(ValueError):
pass
cdef class Codec:
"""Codec(name, mode='r')
:param str name: The codec name.
:param str mode: ``'r'`` for decoding or ``'w'`` for encoding.
This object exposes information about an available codec, and an avenue to
create a :class:`.CodecContext` to encode/decode directly.
::
>>> codec = Codec('mpeg4', 'r')
>>> codec.name
'mpeg4'
>>> codec.type
'video'
>>> codec.is_encoder
False
"""
def __cinit__(self, name, mode="r"):
if name is _cinit_sentinel:
return
if mode == "w":
self.ptr = lib.avcodec_find_encoder_by_name(name)
if not self.ptr:
self.desc = lib.avcodec_descriptor_get_by_name(name)
if self.desc:
self.ptr = lib.avcodec_find_encoder(self.desc.id)
elif mode == "r":
self.ptr = lib.avcodec_find_decoder_by_name(name)
if not self.ptr:
self.desc = lib.avcodec_descriptor_get_by_name(name)
if self.desc:
self.ptr = lib.avcodec_find_decoder(self.desc.id)
else:
raise ValueError('Invalid mode; must be "r" or "w".', mode)
self._init(name)
# Sanity check.
if (mode == "w") != self.is_encoder:
raise RuntimeError("Found codec does not match mode.", name, mode)
cdef _init(self, name=None):
if not self.ptr:
raise UnknownCodecError(name)
if not self.desc:
self.desc = lib.avcodec_descriptor_get(self.ptr.id)
if not self.desc:
raise RuntimeError("No codec descriptor for %r." % name)
self.is_encoder = lib.av_codec_is_encoder(self.ptr)
# Sanity check.
if self.is_encoder and lib.av_codec_is_decoder(self.ptr):
raise RuntimeError("%s is both encoder and decoder.")
def __repr__(self):
mode = self.mode
return f"<av.{self.__class__.__name__} {self.name} {mode=}>"
def create(self, kind = None):
"""Create a :class:`.CodecContext` for this codec.
:param str kind: Gives a hint to static type checkers for what exact CodecContext is used.
"""
from .context import CodecContext
return CodecContext.create(self)
@property
def mode(self):
return "w" if self.is_encoder else "r"
@property
def is_decoder(self):
return not self.is_encoder
@property
def descriptor(self): return wrap_avclass(self.ptr.priv_class)
@property
def name(self): return self.ptr.name or ""
@property
def canonical_name(self):
"""
Returns the name of the codec, not a specific encoder.
"""
return lib.avcodec_get_name(self.ptr.id)
@property
def long_name(self): return self.ptr.long_name or ""
@property
def type(self):
"""
The media type of this codec.
E.g: ``'audio'``, ``'video'``, ``'subtitle'``.
"""
return lib.av_get_media_type_string(self.ptr.type)
@property
def id(self): return self.ptr.id
@property
def frame_rates(self):
"""A list of supported frame rates (:class:`fractions.Fraction`), or ``None``."""
if not self.ptr.supported_framerates:
return
ret = []
cdef int i = 0
while self.ptr.supported_framerates[i].denum:
ret.append(avrational_to_fraction(&self.ptr.supported_framerates[i]))
i += 1
return ret
@property
def audio_rates(self):
"""A list of supported audio sample rates (``int``), or ``None``."""
if not self.ptr.supported_samplerates:
return
ret = []
cdef int i = 0
while self.ptr.supported_samplerates[i]:
ret.append(self.ptr.supported_samplerates[i])
i += 1
return ret
@property
def video_formats(self):
"""A list of supported :class:`.VideoFormat`, or ``None``."""
if not self.ptr.pix_fmts:
return
ret = []
cdef int i = 0
while self.ptr.pix_fmts[i] != -1:
ret.append(get_video_format(self.ptr.pix_fmts[i], 0, 0))
i += 1
return ret
@property
def audio_formats(self):
"""A list of supported :class:`.AudioFormat`, or ``None``."""
if not self.ptr.sample_fmts:
return
ret = []
cdef int i = 0
while self.ptr.sample_fmts[i] != -1:
ret.append(get_audio_format(self.ptr.sample_fmts[i]))
i += 1
return ret
@property
def hardware_configs(self):
if self._hardware_configs:
return self._hardware_configs
ret = []
cdef int i = 0
cdef const lib.AVCodecHWConfig *ptr
while True:
ptr = lib.avcodec_get_hw_config(self.ptr, i)
if not ptr:
break
ret.append(wrap_hwconfig(ptr))
i += 1
ret = tuple(ret)
self._hardware_configs = ret
return ret
@property
def properties(self):
return self.desc.props
@property
def intra_only(self):
return bool(self.desc.props & lib.AV_CODEC_PROP_INTRA_ONLY)
@property
def lossy(self):
return bool(self.desc.props & lib.AV_CODEC_PROP_LOSSY)
@property
def lossless(self):
return bool(self.desc.props & lib.AV_CODEC_PROP_LOSSLESS)
@property
def reorder(self):
return bool(self.desc.props & lib.AV_CODEC_PROP_REORDER)
@property
def bitmap_sub(self):
return bool(self.desc.props & lib.AV_CODEC_PROP_BITMAP_SUB)
@property
def text_sub(self):
return bool(self.desc.props & lib.AV_CODEC_PROP_TEXT_SUB)
@property
def capabilities(self):
"""
Get the capabilities bitmask of the codec.
This method returns an integer representing the codec capabilities bitmask,
which can be used to check specific codec features by performing bitwise
operations with the Capabilities enum values.
:example:
.. code-block:: python
from av.codec import Codec, Capabilities
codec = Codec("h264", "w")
# Check if the codec can be fed a final frame with a smaller size.
# This can be used to prevent truncation of the last audio samples.
small_last_frame = bool(codec.capabilities & Capabilities.small_last_frame)
:rtype: int
"""
return self.ptr.capabilities
@property
def experimental(self):
"""
Check if codec is experimental and is thus avoided in favor of non experimental encoders.
:rtype: bool
"""
return bool(self.ptr.capabilities & lib.AV_CODEC_CAP_EXPERIMENTAL)
@property
def delay(self):
"""
If true, encoder or decoder requires flushing with `None` at the end in order to give the complete and correct output.
:rtype: bool
"""
return bool(self.ptr.capabilities & lib.AV_CODEC_CAP_DELAY)
cdef get_codec_names():
names = set()
cdef const lib.AVCodec *ptr
cdef void *opaque = NULL
while True:
ptr = lib.av_codec_iterate(&opaque)
if ptr:
names.add(ptr.name)
else:
break
return names
codecs_available = get_codec_names()
codec_descriptor = wrap_avclass(lib.avcodec_get_class())
def dump_codecs():
"""Print information about available codecs."""
print(
"""Codecs:
D..... = Decoding supported
.E.... = Encoding supported
..V... = Video codec
..A... = Audio codec
..S... = Subtitle codec
...I.. = Intra frame-only codec
....L. = Lossy compression
.....S = Lossless compression
------"""
)
for name in sorted(codecs_available):
try:
e_codec = Codec(name, "w")
except ValueError:
e_codec = None
try:
d_codec = Codec(name, "r")
except ValueError:
d_codec = None
# TODO: Assert these always have the same properties.
codec = e_codec or d_codec
try:
print(
" %s%s%s%s%s%s %-18s %s"
% (
".D"[bool(d_codec)],
".E"[bool(e_codec)],
codec.type[0].upper(),
".I"[codec.intra_only],
".L"[codec.lossy],
".S"[codec.lossless],
codec.name,
codec.long_name,
)
)
except Exception as e:
print(f"...... {codec.name:<18} ERROR: {e}")
def dump_hwconfigs():
print("Hardware configs:")
for name in sorted(codecs_available):
try:
codec = Codec(name, "r")
except ValueError:
continue
configs = codec.hardware_configs
if not configs:
continue
print(" ", codec.name)
for config in configs:
print(" ", config)

View File

@@ -0,0 +1,64 @@
cimport libav as lib
from libc.stdint cimport int64_t
from av.bytesource cimport ByteSource
from av.codec.codec cimport Codec
from av.codec.hwaccel cimport HWAccel
from av.frame cimport Frame
from av.packet cimport Packet
cdef class CodecContext:
cdef lib.AVCodecContext *ptr
# Whether AVCodecContext.extradata should be de-allocated upon destruction.
cdef bint extradata_set
# Used as a signal that this is within a stream, and also for us to access that
# stream. This is set "manually" by the stream after constructing this object.
cdef int stream_index
cdef lib.AVCodecParserContext *parser
cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel)
# Public API.
cdef readonly bint is_open
cdef readonly Codec codec
cdef readonly HWAccel hwaccel
cdef public dict options
cpdef open(self, bint strict=?)
# Wraps both versions of the transcode API, returning lists.
cpdef encode(self, Frame frame=?)
cpdef decode(self, Packet packet=?)
cpdef flush_buffers(self)
# Used by hardware-accelerated decode.
cdef HWAccel hwaccel_ctx
# Used by both transcode APIs to setup user-land objects.
# TODO: Remove the `Packet` from `_setup_decoded_frame` (because flushing packets
# are bogus). It should take all info it needs from the context and/or stream.
cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame)
cdef _prepare_frames_for_encode(self, Frame frame)
cdef _setup_encoded_packet(self, Packet)
cdef _setup_decoded_frame(self, Frame, Packet)
# Implemented by base for the generic send/recv API.
# Note that the user cannot send without receiving. This is because
# `_prepare_frames_for_encode` may expand a frame into multiple (e.g. when
# resampling audio to a higher rate but with fixed size frames), and the
# send/recv buffer may be limited to a single frame. Ergo, we need to flush
# the buffer as often as possible.
cdef _recv_packet(self)
cdef _send_packet_and_recv(self, Packet packet)
cdef _recv_frame(self)
cdef _transfer_hwframe(self, Frame frame)
# Implemented by children for the generic send/recv API, so we have the
# correct subclass of Frame.
cdef Frame _next_frame
cdef Frame _alloc_next_frame(self)
cdef CodecContext wrap_codec_context(lib.AVCodecContext*, const lib.AVCodec*, HWAccel hwaccel)

View File

@@ -0,0 +1,117 @@
from enum import Flag, IntEnum
from fractions import Fraction
from typing import ClassVar, Literal, cast, overload
from av.audio import _AudioCodecName
from av.audio.codeccontext import AudioCodecContext
from av.packet import Packet
from av.video import _VideoCodecName
from av.video.codeccontext import VideoCodecContext
from .codec import Codec
from .hwaccel import HWAccel
class ThreadType(Flag):
NONE = cast(ClassVar[ThreadType], ...)
FRAME = cast(ClassVar[ThreadType], ...)
SLICE = cast(ClassVar[ThreadType], ...)
AUTO = cast(ClassVar[ThreadType], ...)
def __get__(self, i: object | None, owner: type | None = None) -> ThreadType: ...
def __set__(self, instance: object, value: int | str | ThreadType) -> None: ...
class Flags(IntEnum):
unaligned = cast(int, ...)
qscale = cast(int, ...)
four_mv = cast(int, ...)
output_corrupt = cast(int, ...)
qpel = cast(int, ...)
recon_frame = cast(int, ...)
copy_opaque = cast(int, ...)
frame_duration = cast(int, ...)
pass1 = cast(int, ...)
pass2 = cast(int, ...)
loop_filter = cast(int, ...)
gray = cast(int, ...)
psnr = cast(int, ...)
interlaced_dct = cast(int, ...)
low_delay = cast(int, ...)
global_header = cast(int, ...)
bitexact = cast(int, ...)
ac_pred = cast(int, ...)
interlaced_me = cast(int, ...)
closed_gop = cast(int, ...)
class Flags2(IntEnum):
fast = cast(int, ...)
no_output = cast(int, ...)
local_header = cast(int, ...)
chunks = cast(int, ...)
ignore_crop = cast(int, ...)
show_all = cast(int, ...)
export_mvs = cast(int, ...)
skip_manual = cast(int, ...)
ro_flush_noop = cast(int, ...)
class CodecContext:
name: str
type: Literal["video", "audio", "data", "subtitle", "attachment"]
options: dict[str, str]
profile: str | None
@property
def profiles(self) -> list[str]: ...
extradata: bytes | None
time_base: Fraction
codec_tag: str
bit_rate: int | None
bit_rate_tolerance: int
thread_count: int
thread_type: ThreadType
skip_frame: Literal[
"NONE", "DEFAULT", "NONREF", "BIDIR", "NONINTRA", "NONKEY", "ALL"
]
flags: int
qscale: bool
copy_opaque: bool
flags2: int
@property
def is_open(self) -> bool: ...
@property
def is_encoder(self) -> bool: ...
@property
def is_decoder(self) -> bool: ...
@property
def codec(self) -> Codec: ...
@property
def max_bit_rate(self) -> int | None: ...
@property
def delay(self) -> bool: ...
@property
def extradata_size(self) -> int: ...
@property
def is_hwaccel(self) -> bool: ...
def open(self, strict: bool = True) -> None: ...
@overload
@staticmethod
def create(
codec: _AudioCodecName,
mode: Literal["r", "w"] | None = None,
hwaccel: HWAccel | None = None,
) -> AudioCodecContext: ...
@overload
@staticmethod
def create(
codec: _VideoCodecName,
mode: Literal["r", "w"] | None = None,
hwaccel: HWAccel | None = None,
) -> VideoCodecContext: ...
@overload
@staticmethod
def create(
codec: str | Codec,
mode: Literal["r", "w"] | None = None,
hwaccel: HWAccel | None = None,
) -> CodecContext: ...
def parse(
self, raw_input: bytes | bytearray | memoryview | None = None
) -> list[Packet]: ...
def flush_buffers(self) -> None: ...

View File

@@ -0,0 +1,671 @@
cimport libav as lib
from libc.errno cimport EAGAIN
from libc.stdint cimport uint8_t
from libc.string cimport memcpy
from av.bytesource cimport ByteSource, bytesource
from av.codec.codec cimport Codec, wrap_codec
from av.dictionary cimport _Dictionary
from av.error cimport err_check
from av.packet cimport Packet
from av.utils cimport avrational_to_fraction, to_avrational
from enum import Flag, IntEnum
from av.dictionary import Dictionary
cdef object _cinit_sentinel = object()
cdef CodecContext wrap_codec_context(lib.AVCodecContext *c_ctx, const lib.AVCodec *c_codec, HWAccel hwaccel):
"""Build an av.CodecContext for an existing AVCodecContext."""
cdef CodecContext py_ctx
if c_ctx.codec_type == lib.AVMEDIA_TYPE_VIDEO:
from av.video.codeccontext import VideoCodecContext
py_ctx = VideoCodecContext(_cinit_sentinel)
elif c_ctx.codec_type == lib.AVMEDIA_TYPE_AUDIO:
from av.audio.codeccontext import AudioCodecContext
py_ctx = AudioCodecContext(_cinit_sentinel)
elif c_ctx.codec_type == lib.AVMEDIA_TYPE_SUBTITLE:
from av.subtitles.codeccontext import SubtitleCodecContext
py_ctx = SubtitleCodecContext(_cinit_sentinel)
else:
py_ctx = CodecContext(_cinit_sentinel)
py_ctx._init(c_ctx, c_codec, hwaccel)
return py_ctx
class ThreadType(Flag):
NONE = 0
FRAME: "Decode more than one frame at once" = lib.FF_THREAD_FRAME
SLICE: "Decode more than one part of a single frame at once" = lib.FF_THREAD_SLICE
AUTO: "Decode using both FRAME and SLICE methods." = lib.FF_THREAD_SLICE | lib.FF_THREAD_FRAME
class Flags(IntEnum):
unaligned = lib.AV_CODEC_FLAG_UNALIGNED
qscale = lib.AV_CODEC_FLAG_QSCALE
four_mv = lib.AV_CODEC_FLAG_4MV
output_corrupt = lib.AV_CODEC_FLAG_OUTPUT_CORRUPT
qpel = lib.AV_CODEC_FLAG_QPEL
recon_frame = lib.AV_CODEC_FLAG_RECON_FRAME
copy_opaque = lib.AV_CODEC_FLAG_COPY_OPAQUE
frame_duration = lib.AV_CODEC_FLAG_FRAME_DURATION
pass1 = lib.AV_CODEC_FLAG_PASS1
pass2 = lib.AV_CODEC_FLAG_PASS2
loop_filter = lib.AV_CODEC_FLAG_LOOP_FILTER
gray = lib.AV_CODEC_FLAG_GRAY
psnr = lib.AV_CODEC_FLAG_PSNR
interlaced_dct = lib.AV_CODEC_FLAG_INTERLACED_DCT
low_delay = lib.AV_CODEC_FLAG_LOW_DELAY
global_header = lib.AV_CODEC_FLAG_GLOBAL_HEADER
bitexact = lib.AV_CODEC_FLAG_BITEXACT
ac_pred = lib.AV_CODEC_FLAG_AC_PRED
interlaced_me = lib.AV_CODEC_FLAG_INTERLACED_ME
closed_gop = lib.AV_CODEC_FLAG_CLOSED_GOP
class Flags2(IntEnum):
fast = lib.AV_CODEC_FLAG2_FAST
no_output = lib.AV_CODEC_FLAG2_NO_OUTPUT
local_header = lib.AV_CODEC_FLAG2_LOCAL_HEADER
chunks = lib.AV_CODEC_FLAG2_CHUNKS
ignore_crop = lib.AV_CODEC_FLAG2_IGNORE_CROP
show_all = lib.AV_CODEC_FLAG2_SHOW_ALL
export_mvs = lib.AV_CODEC_FLAG2_EXPORT_MVS
skip_manual = lib.AV_CODEC_FLAG2_SKIP_MANUAL
ro_flush_noop = lib.AV_CODEC_FLAG2_RO_FLUSH_NOOP
cdef class CodecContext:
@staticmethod
def create(codec, mode=None, hwaccel=None):
cdef Codec cy_codec = codec if isinstance(codec, Codec) else Codec(codec, mode)
cdef lib.AVCodecContext *c_ctx = lib.avcodec_alloc_context3(cy_codec.ptr)
return wrap_codec_context(c_ctx, cy_codec.ptr, hwaccel)
def __cinit__(self, sentinel=None, *args, **kwargs):
if sentinel is not _cinit_sentinel:
raise RuntimeError("Cannot instantiate CodecContext")
self.options = {}
self.stream_index = -1 # This is set by the container immediately.
self.is_open = False
cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel):
self.ptr = ptr
if self.ptr.codec and codec and self.ptr.codec != codec:
raise RuntimeError("Wrapping CodecContext with mismatched codec.")
self.codec = wrap_codec(codec if codec != NULL else self.ptr.codec)
self.hwaccel = hwaccel
# Set reasonable threading defaults.
self.ptr.thread_count = 0 # use as many threads as there are CPUs.
self.ptr.thread_type = 0x02 # thread within a frame. Does not change the API.
@property
def flags(self):
"""
Get and set the flags bitmask of CodecContext.
:rtype: int
"""
return self.ptr.flags
@flags.setter
def flags(self, int value):
self.ptr.flags = value
@property
def qscale(self):
"""
Use fixed qscale.
:rtype: bool
"""
return bool(self.ptr.flags & lib.AV_CODEC_FLAG_QSCALE)
@qscale.setter
def qscale(self, value):
if value:
self.ptr.flags |= lib.AV_CODEC_FLAG_QSCALE
else:
self.ptr.flags &= ~lib.AV_CODEC_FLAG_QSCALE
@property
def copy_opaque(self):
return bool(self.ptr.flags & lib.AV_CODEC_FLAG_COPY_OPAQUE)
@copy_opaque.setter
def copy_opaque(self, value):
if value:
self.ptr.flags |= lib.AV_CODEC_FLAG_COPY_OPAQUE
else:
self.ptr.flags &= ~lib.AV_CODEC_FLAG_COPY_OPAQUE
@property
def flags2(self):
"""
Get and set the flags2 bitmask of CodecContext.
:rtype: int
"""
return self.ptr.flags2
@flags2.setter
def flags2(self, int value):
self.ptr.flags2 = value
@property
def extradata(self):
if self.ptr is NULL:
return None
if self.ptr.extradata_size > 0:
return <bytes>(<uint8_t*>self.ptr.extradata)[:self.ptr.extradata_size]
return None
@extradata.setter
def extradata(self, data):
if data is None:
lib.av_freep(&self.ptr.extradata)
self.ptr.extradata_size = 0
else:
source = bytesource(data)
self.ptr.extradata = <uint8_t*>lib.av_realloc(self.ptr.extradata, source.length + lib.AV_INPUT_BUFFER_PADDING_SIZE)
if not self.ptr.extradata:
raise MemoryError("Cannot allocate extradata")
memcpy(self.ptr.extradata, source.ptr, source.length)
self.ptr.extradata_size = source.length
self.extradata_set = True
@property
def extradata_size(self):
return self.ptr.extradata_size
@property
def is_encoder(self):
if self.ptr is NULL:
return False
return lib.av_codec_is_encoder(self.ptr.codec)
@property
def is_decoder(self):
if self.ptr is NULL:
return False
return lib.av_codec_is_decoder(self.ptr.codec)
cpdef open(self, bint strict=True):
if self.is_open:
if strict:
raise ValueError("CodecContext is already open.")
return
cdef _Dictionary options = Dictionary()
options.update(self.options or {})
if not self.ptr.time_base.num and self.is_encoder:
if self.type == "video":
self.ptr.time_base.num = self.ptr.framerate.den or 1
self.ptr.time_base.den = self.ptr.framerate.num or lib.AV_TIME_BASE
elif self.type == "audio":
self.ptr.time_base.num = 1
self.ptr.time_base.den = self.ptr.sample_rate
else:
self.ptr.time_base.num = 1
self.ptr.time_base.den = lib.AV_TIME_BASE
err_check(lib.avcodec_open2(self.ptr, self.codec.ptr, &options.ptr), "avcodec_open2(" + self.codec.name + ")")
self.is_open = True
self.options = dict(options)
def __dealloc__(self):
if self.ptr and self.extradata_set:
lib.av_freep(&self.ptr.extradata)
if self.ptr:
lib.avcodec_free_context(&self.ptr)
if self.parser:
lib.av_parser_close(self.parser)
def __repr__(self):
_type = self.type or "<notype>"
name = self.name or "<nocodec>"
return f"<av.{self.__class__.__name__} {_type}/{name} at 0x{id(self):x}>"
def parse(self, raw_input=None):
"""Split up a byte stream into list of :class:`.Packet`.
This is only effectively splitting up a byte stream, and does no
actual interpretation of the data.
It will return all packets that are fully contained within the given
input, and will buffer partial packets until they are complete.
:param ByteSource raw_input: A chunk of a byte-stream to process.
Anything that can be turned into a :class:`.ByteSource` is fine.
``None`` or empty inputs will flush the parser's buffers.
:return: ``list`` of :class:`.Packet` newly available.
"""
if not self.parser:
self.parser = lib.av_parser_init(self.codec.ptr.id)
if not self.parser:
raise ValueError(f"No parser for {self.codec.name}")
cdef ByteSource source = bytesource(raw_input, allow_none=True)
cdef unsigned char *in_data = source.ptr if source is not None else NULL
cdef int in_size = source.length if source is not None else 0
cdef unsigned char *out_data
cdef int out_size
cdef int consumed
cdef Packet packet = None
packets = []
while True:
with nogil:
consumed = lib.av_parser_parse2(
self.parser,
self.ptr,
&out_data, &out_size,
in_data, in_size,
lib.AV_NOPTS_VALUE, lib.AV_NOPTS_VALUE,
0
)
err_check(consumed)
if out_size:
# We copy the data immediately, as we have yet to figure out
# the expected lifetime of the buffer we get back. All of the
# examples decode it immediately.
#
# We've also tried:
# packet = Packet()
# packet.data = out_data
# packet.size = out_size
# packet.source = source
#
# ... but this results in corruption.
packet = Packet(out_size)
memcpy(packet.ptr.data, out_data, out_size)
packets.append(packet)
if not in_size:
# This was a flush. Only one packet should ever be returned.
break
in_data += consumed
in_size -= consumed
if not in_size:
break
return packets
@property
def is_hwaccel(self):
"""
Returns ``True`` if this codec context is hardware accelerated, ``False`` otherwise.
"""
return self.hwaccel_ctx is not None
def _send_frame_and_recv(self, Frame frame):
cdef Packet packet
cdef int res
with nogil:
res = lib.avcodec_send_frame(self.ptr, frame.ptr if frame is not None else NULL)
err_check(res, "avcodec_send_frame()")
packet = self._recv_packet()
while packet:
yield packet
packet = self._recv_packet()
cdef _send_packet_and_recv(self, Packet packet):
cdef Frame frame
cdef int res
with nogil:
res = lib.avcodec_send_packet(self.ptr, packet.ptr if packet is not None else NULL)
err_check(res, "avcodec_send_packet()")
out = []
while True:
frame = self._recv_frame()
if frame:
out.append(frame)
else:
break
return out
cdef _prepare_frames_for_encode(self, Frame frame):
return [frame]
cdef Frame _alloc_next_frame(self):
raise NotImplementedError("Base CodecContext cannot decode.")
cdef _recv_frame(self):
if not self._next_frame:
self._next_frame = self._alloc_next_frame()
cdef Frame frame = self._next_frame
cdef int res
with nogil:
res = lib.avcodec_receive_frame(self.ptr, frame.ptr)
if res == -EAGAIN or res == lib.AVERROR_EOF:
return
err_check(res, "avcodec_receive_frame()")
frame = self._transfer_hwframe(frame)
if not res:
self._next_frame = None
return frame
cdef _transfer_hwframe(self, Frame frame):
return frame
cdef _recv_packet(self):
cdef Packet packet = Packet()
cdef int res
with nogil:
res = lib.avcodec_receive_packet(self.ptr, packet.ptr)
if res == -EAGAIN or res == lib.AVERROR_EOF:
return
err_check(res, "avcodec_receive_packet()")
if not res:
return packet
cdef _prepare_and_time_rebase_frames_for_encode(self, Frame frame):
if self.ptr.codec_type not in [lib.AVMEDIA_TYPE_VIDEO, lib.AVMEDIA_TYPE_AUDIO]:
raise NotImplementedError("Encoding is only supported for audio and video.")
self.open(strict=False)
frames = self._prepare_frames_for_encode(frame)
# Assert the frames are in our time base.
# TODO: Don't mutate time.
for frame in frames:
if frame is not None:
frame._rebase_time(self.ptr.time_base)
return frames
cpdef encode(self, Frame frame=None):
"""Encode a list of :class:`.Packet` from the given :class:`.Frame`."""
res = []
for frame in self._prepare_and_time_rebase_frames_for_encode(frame):
for packet in self._send_frame_and_recv(frame):
self._setup_encoded_packet(packet)
res.append(packet)
return res
def encode_lazy(self, Frame frame=None):
for frame in self._prepare_and_time_rebase_frames_for_encode(frame):
for packet in self._send_frame_and_recv(frame):
self._setup_encoded_packet(packet)
yield packet
cdef _setup_encoded_packet(self, Packet packet):
# We coerced the frame's time_base into the CodecContext's during encoding,
# and FFmpeg copied the frame's pts/dts to the packet, so keep track of
# this time_base in case the frame needs to be muxed to a container with
# a different time_base.
#
# NOTE: if the CodecContext's time_base is altered during encoding, all bets
# are off!
packet.ptr.time_base = self.ptr.time_base
cpdef decode(self, Packet packet=None):
"""Decode a list of :class:`.Frame` from the given :class:`.Packet`.
If the packet is None, the buffers will be flushed. This is useful if
you do not want the library to automatically re-order frames for you
(if they are encoded with a codec that has B-frames).
"""
if not self.codec.ptr:
raise ValueError("cannot decode unknown codec")
self.open(strict=False)
res = []
for frame in self._send_packet_and_recv(packet):
if isinstance(frame, Frame):
self._setup_decoded_frame(frame, packet)
res.append(frame)
return res
cpdef flush_buffers(self):
"""Reset the internal codec state and discard all internal buffers.
Should be called before you start decoding from a new position e.g.
when seeking or when switching to a different stream.
"""
if self.is_open:
with nogil:
lib.avcodec_flush_buffers(self.ptr)
cdef _setup_decoded_frame(self, Frame frame, Packet packet):
# Propagate our manual times.
# While decoding, frame times are in stream time_base, which PyAV
# is carrying around.
# TODO: Somehow get this from the stream so we can not pass the
# packet here (because flushing packets are bogus).
if packet is not None:
frame._time_base = packet.ptr.time_base
@property
def name(self):
return self.codec.name
@property
def type(self):
return self.codec.type
@property
def profiles(self):
"""
List the available profiles for this stream.
:type: list[str]
"""
ret = []
if not self.ptr.codec or not self.codec.desc or not self.codec.desc.profiles:
return ret
# Profiles are always listed in the codec descriptor, but not necessarily in
# the codec itself. So use the descriptor here.
desc = self.codec.desc
cdef int i = 0
while desc.profiles[i].profile != lib.AV_PROFILE_UNKNOWN:
ret.append(desc.profiles[i].name)
i += 1
return ret
@property
def profile(self):
if not self.ptr.codec or not self.codec.desc or not self.codec.desc.profiles:
return
# Profiles are always listed in the codec descriptor, but not necessarily in
# the codec itself. So use the descriptor here.
desc = self.codec.desc
cdef int i = 0
while desc.profiles[i].profile != lib.AV_PROFILE_UNKNOWN:
if desc.profiles[i].profile == self.ptr.profile:
return desc.profiles[i].name
i += 1
@profile.setter
def profile(self, value):
if not self.codec or not self.codec.desc or not self.codec.desc.profiles:
return
# Profiles are always listed in the codec descriptor, but not necessarily in
# the codec itself. So use the descriptor here.
desc = self.codec.desc
cdef int i = 0
while desc.profiles[i].profile != lib.AV_PROFILE_UNKNOWN:
if desc.profiles[i].name == value:
self.ptr.profile = desc.profiles[i].profile
return
i += 1
@property
def time_base(self):
if self.is_decoder:
raise RuntimeError("Cannot access 'time_base' as a decoder")
return avrational_to_fraction(&self.ptr.time_base)
@time_base.setter
def time_base(self, value):
if self.is_decoder:
raise RuntimeError("Cannot access 'time_base' as a decoder")
to_avrational(value, &self.ptr.time_base)
@property
def codec_tag(self):
return self.ptr.codec_tag.to_bytes(4, byteorder="little", signed=False).decode(
encoding="ascii")
@codec_tag.setter
def codec_tag(self, value):
if isinstance(value, str) and len(value) == 4:
self.ptr.codec_tag = int.from_bytes(value.encode(encoding="ascii"),
byteorder="little", signed=False)
else:
raise ValueError("Codec tag should be a 4 character string.")
@property
def bit_rate(self):
return self.ptr.bit_rate if self.ptr.bit_rate > 0 else None
@bit_rate.setter
def bit_rate(self, int value):
self.ptr.bit_rate = value
@property
def max_bit_rate(self):
if self.ptr.rc_max_rate > 0:
return self.ptr.rc_max_rate
else:
return None
@property
def bit_rate_tolerance(self):
self.ptr.bit_rate_tolerance
@bit_rate_tolerance.setter
def bit_rate_tolerance(self, int value):
self.ptr.bit_rate_tolerance = value
@property
def thread_count(self):
"""How many threads to use; 0 means auto.
Wraps :ffmpeg:`AVCodecContext.thread_count`.
"""
return self.ptr.thread_count
@thread_count.setter
def thread_count(self, int value):
if self.is_open:
raise RuntimeError("Cannot change thread_count after codec is open.")
self.ptr.thread_count = value
@property
def thread_type(self):
"""One of :class:`.ThreadType`.
Wraps :ffmpeg:`AVCodecContext.thread_type`.
"""
return ThreadType(self.ptr.thread_type)
@thread_type.setter
def thread_type(self, value):
if self.is_open:
raise RuntimeError("Cannot change thread_type after codec is open.")
if type(value) is int:
self.ptr.thread_type = value
elif type(value) is str:
self.ptr.thread_type = ThreadType[value].value
else:
self.ptr.thread_type = value.value
@property
def skip_frame(self):
"""Returns one of the following str literals:
"NONE" Discard nothing
"DEFAULT" Discard useless packets like 0 size packets in AVI
"NONREF" Discard all non reference
"BIDIR" Discard all bidirectional frames
"NONINTRA" Discard all non intra frames
"NONKEY Discard all frames except keyframes
"ALL" Discard all
Wraps :ffmpeg:`AVCodecContext.skip_frame`.
"""
value = self.ptr.skip_frame
if value == lib.AVDISCARD_NONE:
return "NONE"
if value == lib.AVDISCARD_DEFAULT:
return "DEFAULT"
if value == lib.AVDISCARD_NONREF:
return "NONREF"
if value == lib.AVDISCARD_BIDIR:
return "BIDIR"
if value == lib.AVDISCARD_NONINTRA:
return "NONINTRA"
if value == lib.AVDISCARD_NONKEY:
return "NONKEY"
if value == lib.AVDISCARD_ALL:
return "ALL"
return f"{value}"
@skip_frame.setter
def skip_frame(self, value):
if value == "NONE":
self.ptr.skip_frame = lib.AVDISCARD_NONE
elif value == "DEFAULT":
self.ptr.skip_frame = lib.AVDISCARD_DEFAULT
elif value == "NONREF":
self.ptr.skip_frame = lib.AVDISCARD_NONREF
elif value == "BIDIR":
self.ptr.skip_frame = lib.AVDISCARD_BIDIR
elif value == "NONINTRA":
self.ptr.skip_frame = lib.AVDISCARD_NONINTRA
elif value == "NONKEY":
self.ptr.skip_frame = lib.AVDISCARD_NONKEY
elif value == "ALL":
self.ptr.skip_frame = lib.AVDISCARD_ALL
else:
raise ValueError("Invalid skip_frame type")
@property
def delay(self):
"""Codec delay.
Wraps :ffmpeg:`AVCodecContext.delay`.
"""
return self.ptr.delay

View File

@@ -0,0 +1,21 @@
cimport libav as lib
from av.codec.codec cimport Codec
cdef class HWConfig:
cdef object __weakref__
cdef lib.AVCodecHWConfig *ptr
cdef void _init(self, lib.AVCodecHWConfig *ptr)
cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr)
cdef class HWAccel:
cdef int _device_type
cdef str _device
cdef readonly Codec codec
cdef readonly HWConfig config
cdef lib.AVBufferRef *ptr
cdef public bint allow_software_fallback
cdef public dict options
cdef public int flags

View File

@@ -0,0 +1,50 @@
from enum import IntEnum
from typing import cast
from av.codec.codec import Codec
from av.video.format import VideoFormat
class HWDeviceType(IntEnum):
none = cast(int, ...)
vdpau = cast(int, ...)
cuda = cast(int, ...)
vaapi = cast(int, ...)
dxva2 = cast(int, ...)
qsv = cast(int, ...)
videotoolbox = cast(int, ...)
d3d11va = cast(int, ...)
drm = cast(int, ...)
opencl = cast(int, ...)
mediacodec = cast(int, ...)
vulkan = cast(int, ...)
d3d12va = cast(int, ...)
class HWConfigMethod(IntEnum):
none = cast(int, ...)
hw_device_ctx = cast(int, ...)
hw_frame_ctx = cast(int, ...)
internal = cast(int, ...)
ad_hoc = cast(int, ...)
class HWConfig:
@property
def device_type(self) -> HWDeviceType: ...
@property
def format(self) -> VideoFormat: ...
@property
def methods(self) -> HWConfigMethod: ...
@property
def is_supported(self) -> bool: ...
class HWAccel:
def __init__(
self,
device_type: str | HWDeviceType,
device: str | None = None,
allow_software_fallback: bool = False,
options: dict[str, object] | None = None,
flags: int | None = None,
) -> None: ...
def create(self, codec: Codec) -> HWAccel: ...
def hwdevices_available() -> list[str]: ...

View File

@@ -0,0 +1,159 @@
import weakref
from enum import IntEnum
cimport libav as lib
from av.codec.codec cimport Codec
from av.dictionary cimport _Dictionary
from av.error cimport err_check
from av.video.format cimport get_video_format
from av.dictionary import Dictionary
class HWDeviceType(IntEnum):
none = lib.AV_HWDEVICE_TYPE_NONE
vdpau = lib.AV_HWDEVICE_TYPE_VDPAU
cuda = lib.AV_HWDEVICE_TYPE_CUDA
vaapi = lib.AV_HWDEVICE_TYPE_VAAPI
dxva2 = lib.AV_HWDEVICE_TYPE_DXVA2
qsv = lib.AV_HWDEVICE_TYPE_QSV
videotoolbox = lib.AV_HWDEVICE_TYPE_VIDEOTOOLBOX
d3d11va = lib.AV_HWDEVICE_TYPE_D3D11VA
drm = lib.AV_HWDEVICE_TYPE_DRM
opencl = lib.AV_HWDEVICE_TYPE_OPENCL
mediacodec = lib.AV_HWDEVICE_TYPE_MEDIACODEC
vulkan = lib.AV_HWDEVICE_TYPE_VULKAN
d3d12va = lib.AV_HWDEVICE_TYPE_D3D12VA
amf = 13 # FFmpeg >=8
ohcodec = 14
# TODO: When ffmpeg major is changed, check this enum.
class HWConfigMethod(IntEnum):
none = 0
hw_device_ctx = lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX # This is the only one we support.
hw_frame_ctx = lib.AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX
internal = lib.AV_CODEC_HW_CONFIG_METHOD_INTERNAL
ad_hoc = lib.AV_CODEC_HW_CONFIG_METHOD_AD_HOC
cdef object _cinit_sentinel = object()
cdef object _singletons = weakref.WeakValueDictionary()
cdef HWConfig wrap_hwconfig(lib.AVCodecHWConfig *ptr):
try:
return _singletons[<int>ptr]
except KeyError:
pass
cdef HWConfig config = HWConfig(_cinit_sentinel)
config._init(ptr)
_singletons[<int>ptr] = config
return config
cdef class HWConfig:
def __init__(self, sentinel):
if sentinel is not _cinit_sentinel:
raise RuntimeError("Cannot instantiate CodecContext")
cdef void _init(self, lib.AVCodecHWConfig *ptr):
self.ptr = ptr
def __repr__(self):
return (
f"<av.{self.__class__.__name__} "
f"device_type={lib.av_hwdevice_get_type_name(self.device_type)} "
f"format={self.format.name if self.format else None} "
f"is_supported={self.is_supported} at 0x{<int>self.ptr:x}>"
)
@property
def device_type(self):
return HWDeviceType(self.ptr.device_type)
@property
def format(self):
return get_video_format(self.ptr.pix_fmt, 0, 0)
@property
def methods(self):
return HWConfigMethod(self.ptr.methods)
@property
def is_supported(self):
return bool(self.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)
cpdef hwdevices_available():
result = []
cdef lib.AVHWDeviceType x = lib.AV_HWDEVICE_TYPE_NONE
while True:
x = lib.av_hwdevice_iterate_types(x)
if x == lib.AV_HWDEVICE_TYPE_NONE:
break
result.append(lib.av_hwdevice_get_type_name(HWDeviceType(x)))
return result
cdef class HWAccel:
def __init__(self, device_type, device=None, allow_software_fallback=True, options=None, flags=None):
if isinstance(device_type, HWDeviceType):
self._device_type = device_type
elif isinstance(device_type, str):
self._device_type = int(lib.av_hwdevice_find_type_by_name(device_type))
elif isinstance(device_type, int):
self._device_type = device_type
else:
raise ValueError("Unknown type for device_type")
self._device = device
self.allow_software_fallback = allow_software_fallback
self.options = {} if not options else dict(options)
self.flags = 0 if not flags else flags
self.ptr = NULL
self.config = None
def _initialize_hw_context(self, Codec codec not None):
cdef HWConfig config
for config in codec.hardware_configs:
if not (config.ptr.methods & lib.AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX):
continue
if self._device_type and config.device_type != self._device_type:
continue
break
else:
raise NotImplementedError(f"No supported hardware config for {codec}")
self.config = config
cdef char *c_device = NULL
if self._device:
device_bytes = self._device.encode()
c_device = device_bytes
cdef _Dictionary c_options = Dictionary(self.options)
err_check(
lib.av_hwdevice_ctx_create(
&self.ptr, config.ptr.device_type, c_device, c_options.ptr, self.flags
)
)
def create(self, Codec codec not None):
"""Create a new hardware accelerator context with the given codec"""
if self.ptr:
raise RuntimeError("Hardware context already initialized")
ret = HWAccel(
device_type=self._device_type,
device=self._device,
allow_software_fallback=self.allow_software_fallback,
options=self.options
)
ret._initialize_hw_context(codec)
return ret
def __dealloc__(self):
if self.ptr:
lib.av_buffer_unref(&self.ptr)

View File

@@ -0,0 +1,3 @@
from .core import Container, Flags, open
from .input import InputContainer
from .output import OutputContainer

View File

@@ -0,0 +1,3 @@
from .core import *
from .input import *
from .output import *

View File

@@ -0,0 +1,51 @@
cimport libav as lib
from av.codec.hwaccel cimport HWAccel
from av.container.pyio cimport PyIOFile
from av.container.streams cimport StreamContainer
from av.dictionary cimport _Dictionary
from av.format cimport ContainerFormat
from av.stream cimport Stream
# Interrupt callback information, times are in seconds.
ctypedef struct timeout_info:
double start_time
double timeout
cdef class Container:
cdef readonly bint writeable
cdef lib.AVFormatContext *ptr
cdef readonly object name
cdef readonly str metadata_encoding
cdef readonly str metadata_errors
cdef readonly PyIOFile file
cdef int buffer_size
cdef bint input_was_opened
cdef readonly object io_open
cdef readonly object open_files
cdef readonly ContainerFormat format
cdef readonly dict options
cdef readonly dict container_options
cdef readonly list stream_options
cdef HWAccel hwaccel
cdef readonly StreamContainer streams
cdef readonly dict metadata
# Private API.
cdef _assert_open(self)
cdef int err_check(self, int value) except -1
# Timeouts
cdef readonly object open_timeout
cdef readonly object read_timeout
cdef timeout_info interrupt_callback_info
cdef set_timeout(self, object)
cdef start_timeout(self)

View File

@@ -0,0 +1,167 @@
from enum import Flag, IntEnum
from fractions import Fraction
from pathlib import Path
from types import TracebackType
from typing import Any, Callable, ClassVar, Literal, Type, TypedDict, cast, overload
from av.codec.hwaccel import HWAccel
from av.format import ContainerFormat
from .input import InputContainer
from .output import OutputContainer
from .streams import StreamContainer
Real = int | float | Fraction
class Flags(Flag):
gen_pts = cast(ClassVar[Flags], ...)
ign_idx = cast(ClassVar[Flags], ...)
non_block = cast(ClassVar[Flags], ...)
ign_dts = cast(ClassVar[Flags], ...)
no_fillin = cast(ClassVar[Flags], ...)
no_parse = cast(ClassVar[Flags], ...)
no_buffer = cast(ClassVar[Flags], ...)
custom_io = cast(ClassVar[Flags], ...)
discard_corrupt = cast(ClassVar[Flags], ...)
flush_packets = cast(ClassVar[Flags], ...)
bitexact = cast(ClassVar[Flags], ...)
sort_dts = cast(ClassVar[Flags], ...)
fast_seek = cast(ClassVar[Flags], ...)
shortest = cast(ClassVar[Flags], ...)
auto_bsf = cast(ClassVar[Flags], ...)
class AudioCodec(IntEnum):
none = cast(int, ...)
pcm_alaw = cast(int, ...)
pcm_bluray = cast(int, ...)
pcm_dvd = cast(int, ...)
pcm_f16le = cast(int, ...)
pcm_f24le = cast(int, ...)
pcm_f32be = cast(int, ...)
pcm_f32le = cast(int, ...)
pcm_f64be = cast(int, ...)
pcm_f64le = cast(int, ...)
pcm_lxf = cast(int, ...)
pcm_mulaw = cast(int, ...)
pcm_s16be = cast(int, ...)
pcm_s16be_planar = cast(int, ...)
pcm_s16le = cast(int, ...)
pcm_s16le_planar = cast(int, ...)
pcm_s24be = cast(int, ...)
pcm_s24daud = cast(int, ...)
pcm_s24le = cast(int, ...)
pcm_s24le_planar = cast(int, ...)
pcm_s32be = cast(int, ...)
pcm_s32le = cast(int, ...)
pcm_s32le_planar = cast(int, ...)
pcm_s64be = cast(int, ...)
pcm_s64le = cast(int, ...)
pcm_s8 = cast(int, ...)
pcm_s8_planar = cast(int, ...)
pcm_u16be = cast(int, ...)
pcm_u16le = cast(int, ...)
pcm_u24be = cast(int, ...)
pcm_u24le = cast(int, ...)
pcm_u32be = cast(int, ...)
pcm_u32le = cast(int, ...)
pcm_u8 = cast(int, ...)
pcm_vidc = cast(int, ...)
class Chapter(TypedDict):
id: int
start: int
end: int
time_base: Fraction | None
metadata: dict[str, str]
class Container:
writeable: bool
name: str
metadata_encoding: str
metadata_errors: str
file: Any
buffer_size: int
input_was_opened: bool
io_open: Any
open_files: Any
format: ContainerFormat
options: dict[str, str]
container_options: dict[str, str]
stream_options: list[dict[str, str]]
streams: StreamContainer
metadata: dict[str, str]
open_timeout: Real | None
read_timeout: Real | None
flags: int
def __enter__(self) -> Container: ...
def __exit__(
self,
exc_type: Type[BaseException] | None,
exc_val: BaseException | None,
exc_tb: TracebackType | None,
) -> bool: ...
def set_timeout(self, timeout: Real | None) -> None: ...
def start_timeout(self) -> None: ...
def chapters(self) -> list[Chapter]: ...
def set_chapters(self, chapters: list[Chapter]) -> None: ...
@overload
def open(
file: Any,
mode: Literal["r"],
format: str | None = None,
options: dict[str, str] | None = None,
container_options: dict[str, str] | None = None,
stream_options: list[str] | None = None,
metadata_encoding: str = "utf-8",
metadata_errors: str = "strict",
buffer_size: int = 32768,
timeout: Real | None | tuple[Real | None, Real | None] = None,
io_open: Callable[..., Any] | None = None,
hwaccel: HWAccel | None = None,
) -> InputContainer: ...
@overload
def open(
file: str | Path,
mode: Literal["r"] | None = None,
format: str | None = None,
options: dict[str, str] | None = None,
container_options: dict[str, str] | None = None,
stream_options: list[str] | None = None,
metadata_encoding: str = "utf-8",
metadata_errors: str = "strict",
buffer_size: int = 32768,
timeout: Real | None | tuple[Real | None, Real | None] = None,
io_open: Callable[..., Any] | None = None,
hwaccel: HWAccel | None = None,
) -> InputContainer: ...
@overload
def open(
file: Any,
mode: Literal["w"],
format: str | None = None,
options: dict[str, str] | None = None,
container_options: dict[str, str] | None = None,
stream_options: list[str] | None = None,
metadata_encoding: str = "utf-8",
metadata_errors: str = "strict",
buffer_size: int = 32768,
timeout: Real | None | tuple[Real | None, Real | None] = None,
io_open: Callable[..., Any] | None = None,
hwaccel: HWAccel | None = None,
) -> OutputContainer: ...
@overload
def open(
file: Any,
mode: Literal["r", "w"] | None = None,
format: str | None = None,
options: dict[str, str] | None = None,
container_options: dict[str, str] | None = None,
stream_options: list[str] | None = None,
metadata_encoding: str = "utf-8",
metadata_errors: str = "strict",
buffer_size: int = 32768,
timeout: Real | None | tuple[Real | None, Real | None] = None,
io_open: Callable[..., Any] | None = None,
hwaccel: HWAccel | None = None,
) -> InputContainer | OutputContainer: ...

View File

@@ -0,0 +1,494 @@
from cython.operator cimport dereference
from libc.stdint cimport int64_t
import os
import time
from enum import Flag, IntEnum
from pathlib import Path
cimport libav as lib
from av.codec.hwaccel cimport HWAccel
from av.container.core cimport timeout_info
from av.container.input cimport InputContainer
from av.container.output cimport OutputContainer
from av.container.pyio cimport pyio_close_custom_gil, pyio_close_gil
from av.error cimport err_check, stash_exception
from av.format cimport build_container_format
from av.utils cimport (
avdict_to_dict,
avrational_to_fraction,
dict_to_avdict,
to_avrational,
)
from av.dictionary import Dictionary
from av.logging import Capture as LogCapture
cdef object _cinit_sentinel = object()
# We want to use the monotonic clock if it is available.
cdef object clock = getattr(time, "monotonic", time.time)
cdef int interrupt_cb (void *p) noexcept nogil:
cdef timeout_info info = dereference(<timeout_info*> p)
if info.timeout < 0: # timeout < 0 means no timeout
return 0
cdef double current_time
with gil:
current_time = clock()
# Check if the clock has been changed.
if current_time < info.start_time:
# Raise this when we get back to Python.
stash_exception((RuntimeError, RuntimeError("Clock has been changed to before timeout start"), None))
return 1
if current_time > info.start_time + info.timeout:
return 1
return 0
cdef int pyav_io_open(lib.AVFormatContext *s,
lib.AVIOContext **pb,
const char *url,
int flags,
lib.AVDictionary **options) noexcept nogil:
with gil:
return pyav_io_open_gil(s, pb, url, flags, options)
cdef int pyav_io_open_gil(lib.AVFormatContext *s,
lib.AVIOContext **pb,
const char *url,
int flags,
lib.AVDictionary **options) noexcept:
cdef Container container
cdef object file
cdef PyIOFile pyio_file
try:
container = <Container>dereference(s).opaque
if options is not NULL:
options_dict = avdict_to_dict(
dereference(<lib.AVDictionary**>options),
encoding=container.metadata_encoding,
errors=container.metadata_errors
)
else:
options_dict = {}
file = container.io_open(
<str>url if url is not NULL else "",
flags,
options_dict
)
pyio_file = PyIOFile(
file,
container.buffer_size,
(flags & lib.AVIO_FLAG_WRITE) != 0
)
# Add it to the container to avoid it being deallocated
container.open_files[<int64_t>pyio_file.iocontext.opaque] = pyio_file
pb[0] = pyio_file.iocontext
return 0
except Exception:
return stash_exception()
cdef int pyav_io_close(lib.AVFormatContext *s, lib.AVIOContext *pb) noexcept nogil:
with gil:
return pyav_io_close_gil(s, pb)
cdef int pyav_io_close_gil(lib.AVFormatContext *s, lib.AVIOContext *pb) noexcept:
cdef Container container
cdef int result = 0
try:
container = <Container>dereference(s).opaque
if container.open_files is not None and <int64_t>pb.opaque in container.open_files:
result = pyio_close_custom_gil(pb)
# Remove it from the container so that it can be deallocated
del container.open_files[<int64_t>pb.opaque]
else:
result = pyio_close_gil(pb)
except Exception:
stash_exception()
result = lib.AVERROR_UNKNOWN # Or another appropriate error code
return result
cdef void _free_chapters(lib.AVFormatContext *ctx) noexcept nogil:
cdef int i
if ctx.chapters != NULL:
for i in range(ctx.nb_chapters):
if ctx.chapters[i] != NULL:
if ctx.chapters[i].metadata != NULL:
lib.av_dict_free(&ctx.chapters[i].metadata)
lib.av_freep(<void **>&ctx.chapters[i])
lib.av_freep(<void **>&ctx.chapters)
ctx.nb_chapters = 0
class Flags(Flag):
gen_pts: "Generate missing pts even if it requires parsing future frames." = lib.AVFMT_FLAG_GENPTS
ign_idx: "Ignore index." = lib.AVFMT_FLAG_IGNIDX
non_block: "Do not block when reading packets from input." = lib.AVFMT_FLAG_NONBLOCK
ign_dts: "Ignore DTS on frames that contain both DTS & PTS." = lib.AVFMT_FLAG_IGNDTS
no_fillin: "Do not infer any values from other values, just return what is stored in the container." = lib.AVFMT_FLAG_NOFILLIN
no_parse: "Do not use AVParsers, you also must set AVFMT_FLAG_NOFILLIN as the fill in code works on frames and no parsing -> no frames. Also seeking to frames can not work if parsing to find frame boundaries has been disabled." = lib.AVFMT_FLAG_NOPARSE
no_buffer: "Do not buffer frames when possible." = lib.AVFMT_FLAG_NOBUFFER
custom_io: "The caller has supplied a custom AVIOContext, don't avio_close() it." = lib.AVFMT_FLAG_CUSTOM_IO
discard_corrupt: "Discard frames marked corrupted." = lib.AVFMT_FLAG_DISCARD_CORRUPT
flush_packets: "Flush the AVIOContext every packet." = lib.AVFMT_FLAG_FLUSH_PACKETS
bitexact: "When muxing, try to avoid writing any random/volatile data to the output. This includes any random IDs, real-time timestamps/dates, muxer version, etc. This flag is mainly intended for testing." = lib.AVFMT_FLAG_BITEXACT
sort_dts: "Try to interleave outputted packets by dts (using this flag can slow demuxing down)." = lib.AVFMT_FLAG_SORT_DTS
fast_seek: "Enable fast, but inaccurate seeks for some formats." = lib.AVFMT_FLAG_FAST_SEEK
auto_bsf: "Add bitstream filters as requested by the muxer." = lib.AVFMT_FLAG_AUTO_BSF
class AudioCodec(IntEnum):
"""Enumeration for audio codec IDs."""
none = lib.AV_CODEC_ID_NONE # No codec.
pcm_alaw = lib.AV_CODEC_ID_PCM_ALAW # PCM A-law.
pcm_bluray = lib.AV_CODEC_ID_PCM_BLURAY # PCM Blu-ray.
pcm_dvd = lib.AV_CODEC_ID_PCM_DVD # PCM DVD.
pcm_f16le = lib.AV_CODEC_ID_PCM_F16LE # PCM F16 little-endian.
pcm_f24le = lib.AV_CODEC_ID_PCM_F24LE # PCM F24 little-endian.
pcm_f32be = lib.AV_CODEC_ID_PCM_F32BE # PCM F32 big-endian.
pcm_f32le = lib.AV_CODEC_ID_PCM_F32LE # PCM F32 little-endian.
pcm_f64be = lib.AV_CODEC_ID_PCM_F64BE # PCM F64 big-endian.
pcm_f64le = lib.AV_CODEC_ID_PCM_F64LE # PCM F64 little-endian.
pcm_lxf = lib.AV_CODEC_ID_PCM_LXF # PCM LXF.
pcm_mulaw = lib.AV_CODEC_ID_PCM_MULAW # PCM μ-law.
pcm_s16be = lib.AV_CODEC_ID_PCM_S16BE # PCM signed 16-bit big-endian.
pcm_s16be_planar = lib.AV_CODEC_ID_PCM_S16BE_PLANAR # PCM signed 16-bit big-endian planar.
pcm_s16le = lib.AV_CODEC_ID_PCM_S16LE # PCM signed 16-bit little-endian.
pcm_s16le_planar = lib.AV_CODEC_ID_PCM_S16LE_PLANAR # PCM signed 16-bit little-endian planar.
pcm_s24be = lib.AV_CODEC_ID_PCM_S24BE # PCM signed 24-bit big-endian.
pcm_s24daud = lib.AV_CODEC_ID_PCM_S24DAUD # PCM signed 24-bit D-Cinema audio.
pcm_s24le = lib.AV_CODEC_ID_PCM_S24LE # PCM signed 24-bit little-endian.
pcm_s24le_planar = lib.AV_CODEC_ID_PCM_S24LE_PLANAR # PCM signed 24-bit little-endian planar.
pcm_s32be = lib.AV_CODEC_ID_PCM_S32BE # PCM signed 32-bit big-endian.
pcm_s32le = lib.AV_CODEC_ID_PCM_S32LE # PCM signed 32-bit little-endian.
pcm_s32le_planar = lib.AV_CODEC_ID_PCM_S32LE_PLANAR # PCM signed 32-bit little-endian planar.
pcm_s64be = lib.AV_CODEC_ID_PCM_S64BE # PCM signed 64-bit big-endian.
pcm_s64le = lib.AV_CODEC_ID_PCM_S64LE # PCM signed 64-bit little-endian.
pcm_s8 = lib.AV_CODEC_ID_PCM_S8 # PCM signed 8-bit.
pcm_s8_planar = lib.AV_CODEC_ID_PCM_S8_PLANAR # PCM signed 8-bit planar.
pcm_u16be = lib.AV_CODEC_ID_PCM_U16BE # PCM unsigned 16-bit big-endian.
pcm_u16le = lib.AV_CODEC_ID_PCM_U16LE # PCM unsigned 16-bit little-endian.
pcm_u24be = lib.AV_CODEC_ID_PCM_U24BE # PCM unsigned 24-bit big-endian.
pcm_u24le = lib.AV_CODEC_ID_PCM_U24LE # PCM unsigned 24-bit little-endian.
pcm_u32be = lib.AV_CODEC_ID_PCM_U32BE # PCM unsigned 32-bit big-endian.
pcm_u32le = lib.AV_CODEC_ID_PCM_U32LE # PCM unsigned 32-bit little-endian.
pcm_u8 = lib.AV_CODEC_ID_PCM_U8 # PCM unsigned 8-bit.
pcm_vidc = lib.AV_CODEC_ID_PCM_VIDC # PCM VIDC.
cdef class Container:
def __cinit__(self, sentinel, file_, format_name, options,
container_options, stream_options, hwaccel,
metadata_encoding, metadata_errors,
buffer_size, open_timeout, read_timeout,
io_open):
if sentinel is not _cinit_sentinel:
raise RuntimeError("cannot construct base Container")
self.writeable = isinstance(self, OutputContainer)
if not self.writeable and not isinstance(self, InputContainer):
raise RuntimeError("Container cannot be directly extended.")
if isinstance(file_, str):
self.name = file_
else:
self.name = str(getattr(file_, "name", "<none>"))
self.options = dict(options or ())
self.container_options = dict(container_options or ())
self.stream_options = [dict(x) for x in stream_options or ()]
self.hwaccel = hwaccel
self.metadata_encoding = metadata_encoding
self.metadata_errors = metadata_errors
self.open_timeout = open_timeout
self.read_timeout = read_timeout
self.buffer_size = buffer_size
self.io_open = io_open
acodec = None # no audio codec specified
if format_name is not None:
if ":" in format_name:
format_name, acodec = format_name.split(":")
self.format = ContainerFormat(format_name)
self.input_was_opened = False
cdef int res
cdef bytes name_obj = os.fsencode(self.name)
cdef char *name = name_obj
cdef lib.AVOutputFormat *ofmt
if self.writeable:
ofmt = self.format.optr if self.format else lib.av_guess_format(NULL, name, NULL)
if ofmt == NULL:
raise ValueError("Could not determine output format")
with nogil:
# This does not actually open the file.
res = lib.avformat_alloc_output_context2(
&self.ptr,
ofmt,
NULL,
name,
)
self.err_check(res)
else:
# We need the context before we open the input AND setup Python IO.
self.ptr = lib.avformat_alloc_context()
# Setup interrupt callback
if self.open_timeout is not None or self.read_timeout is not None:
self.ptr.interrupt_callback.callback = interrupt_cb
self.ptr.interrupt_callback.opaque = &self.interrupt_callback_info
if acodec is not None:
self.ptr.audio_codec_id = getattr(AudioCodec, acodec)
self.ptr.flags |= lib.AVFMT_FLAG_GENPTS
self.ptr.opaque = <void*>self
# Setup Python IO.
self.open_files = {}
if not isinstance(file_, basestring):
self.file = PyIOFile(file_, buffer_size, self.writeable)
self.ptr.pb = self.file.iocontext
if io_open is not None:
self.ptr.io_open = pyav_io_open
self.ptr.io_close2 = pyav_io_close
self.ptr.flags |= lib.AVFMT_FLAG_CUSTOM_IO
cdef lib.AVInputFormat *ifmt
cdef _Dictionary c_options
if not self.writeable:
ifmt = self.format.iptr if self.format else NULL
c_options = Dictionary(self.options, self.container_options)
self.set_timeout(self.open_timeout)
self.start_timeout()
with nogil:
res = lib.avformat_open_input(&self.ptr, name, ifmt, &c_options.ptr)
self.set_timeout(None)
self.err_check(res)
self.input_was_opened = True
if format_name is None:
self.format = build_container_format(self.ptr.iformat, self.ptr.oformat)
def __dealloc__(self):
with nogil:
lib.avformat_free_context(self.ptr)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
self.close()
def __repr__(self):
return f"<av.{self.__class__.__name__} {self.file or self.name!r}>"
cdef int err_check(self, int value) except -1:
return err_check(value, filename=self.name)
def dumps_format(self):
self._assert_open()
with LogCapture() as logs:
lib.av_dump_format(self.ptr, 0, "", isinstance(self, OutputContainer))
return "".join(log[2] for log in logs)
cdef set_timeout(self, timeout):
if timeout is None:
self.interrupt_callback_info.timeout = -1.0
else:
self.interrupt_callback_info.timeout = timeout
cdef start_timeout(self):
self.interrupt_callback_info.start_time = clock()
cdef _assert_open(self):
if self.ptr == NULL:
raise AssertionError("Container is not open")
@property
def flags(self):
self._assert_open()
return self.ptr.flags
@flags.setter
def flags(self, int value):
self._assert_open()
self.ptr.flags = value
def chapters(self):
self._assert_open()
cdef list result = []
cdef int i
for i in range(self.ptr.nb_chapters):
ch = self.ptr.chapters[i]
result.append({
"id": ch.id,
"start": ch.start,
"end": ch.end,
"time_base": avrational_to_fraction(&ch.time_base),
"metadata": avdict_to_dict(ch.metadata, self.metadata_encoding, self.metadata_errors),
})
return result
def set_chapters(self, chapters):
self._assert_open()
cdef int count = len(chapters)
cdef int i
cdef lib.AVChapter **ch_array
cdef lib.AVChapter *ch
cdef dict entry
with nogil:
_free_chapters(self.ptr)
ch_array = <lib.AVChapter **>lib.av_malloc(count * sizeof(lib.AVChapter *))
if ch_array == NULL:
raise MemoryError("av_malloc failed for chapters")
for i in range(count):
entry = chapters[i]
ch = <lib.AVChapter *>lib.av_malloc(sizeof(lib.AVChapter))
if ch == NULL:
raise MemoryError("av_malloc failed for chapter")
ch.id = entry["id"]
ch.start = <int64_t>entry["start"]
ch.end = <int64_t>entry["end"]
to_avrational(entry["time_base"], &ch.time_base)
ch.metadata = NULL
if "metadata" in entry:
dict_to_avdict(&ch.metadata, entry["metadata"], self.metadata_encoding, self.metadata_errors)
ch_array[i] = ch
self.ptr.nb_chapters = count
self.ptr.chapters = ch_array
def open(
file,
mode=None,
format=None,
options=None,
container_options=None,
stream_options=None,
metadata_encoding="utf-8",
metadata_errors="strict",
buffer_size=32768,
timeout=None,
io_open=None,
hwaccel=None
):
"""open(file, mode='r', **kwargs)
Main entrypoint to opening files/streams.
:param str file: The file to open, which can be either a string or a file-like object.
:param str mode: ``"r"`` for reading and ``"w"`` for writing.
:param str format: Specific format to use. Defaults to autodect.
:param dict options: Options to pass to the container and all streams.
:param dict container_options: Options to pass to the container.
:param list stream_options: Options to pass to each stream.
:param str metadata_encoding: Encoding to use when reading or writing file metadata.
Defaults to ``"utf-8"``.
:param str metadata_errors: Specifies how to handle encoding errors; behaves like
``str.encode`` parameter. Defaults to ``"strict"``.
:param int buffer_size: Size of buffer for Python input/output operations in bytes.
Honored only when ``file`` is a file-like object. Defaults to 32768 (32k).
:param timeout: How many seconds to wait for data before giving up, as a float, or a
``(open timeout, read timeout)`` tuple.
:param callable io_open: Custom I/O callable for opening files/streams.
This option is intended for formats that need to open additional
file-like objects to ``file`` using custom I/O.
The callable signature is ``io_open(url: str, flags: int, options: dict)``, where
``url`` is the url to open, ``flags`` is a combination of AVIO_FLAG_* and
``options`` is a dictionary of additional options. The callable should return a
file-like object.
:param HWAccel hwaccel: Optional settings for hardware-accelerated decoding.
:rtype: Container
For devices (via ``libavdevice``), pass the name of the device to ``format``,
e.g.::
>>> # Open webcam on MacOS.
>>> av.open('0', format='avfoundation') # doctest: +SKIP
For DASH and custom I/O using ``io_open``, add a protocol prefix to the ``file`` to
prevent the DASH encoder defaulting to the file protocol and using temporary files.
The custom I/O callable can be used to remove the protocol prefix to reveal the actual
name for creating the file-like object. E.g.::
>>> av.open("customprotocol://manifest.mpd", "w", io_open=custom_io) # doctest: +SKIP
.. seealso:: :ref:`garbage_collection`
More information on using input and output devices is available on the
`FFmpeg website <https://www.ffmpeg.org/ffmpeg-devices.html>`_.
"""
if not (mode is None or (isinstance(mode, str) and mode == "r" or mode == "w")):
raise ValueError(f"mode must be 'r', 'w', or None, got: {mode}")
if isinstance(file, str):
pass
elif isinstance(file, Path):
file = f"{file}"
elif mode is None:
mode = getattr(file, "mode", None)
if mode is None:
mode = "r"
if isinstance(timeout, tuple):
if not len(timeout) == 2:
raise ValueError("timeout must be `float` or `tuple[float, float]`")
open_timeout, read_timeout = timeout
else:
open_timeout = timeout
read_timeout = timeout
if mode.startswith("r"):
return InputContainer(_cinit_sentinel, file, format, options,
container_options, stream_options, hwaccel, metadata_encoding, metadata_errors,
buffer_size, open_timeout, read_timeout, io_open,
)
if stream_options:
raise ValueError(
"Provide stream options via Container.add_stream(..., options={})."
)
return OutputContainer(_cinit_sentinel, file, format, options,
container_options, stream_options, None, metadata_encoding, metadata_errors,
buffer_size, open_timeout, read_timeout, io_open,
)

View File

@@ -0,0 +1,9 @@
cimport libav as lib
from av.container.core cimport Container
from av.stream cimport Stream
cdef class InputContainer(Container):
cdef flush_buffers(self)

View File

@@ -0,0 +1,49 @@
from typing import Any, Iterator, overload
from av.audio.frame import AudioFrame
from av.audio.stream import AudioStream
from av.packet import Packet
from av.stream import Stream
from av.subtitles.stream import SubtitleStream
from av.subtitles.subtitle import SubtitleSet
from av.video.frame import VideoFrame
from av.video.stream import VideoStream
from .core import Container
class InputContainer(Container):
start_time: int
duration: int | None
bit_rate: int
size: int
def __enter__(self) -> InputContainer: ...
def close(self) -> None: ...
def demux(self, *args: Any, **kwargs: Any) -> Iterator[Packet]: ...
@overload
def decode(self, video: int) -> Iterator[VideoFrame]: ...
@overload
def decode(self, audio: int) -> Iterator[AudioFrame]: ...
@overload
def decode(self, subtitles: int) -> Iterator[SubtitleSet]: ...
@overload
def decode(self, *args: VideoStream) -> Iterator[VideoFrame]: ...
@overload
def decode(self, *args: AudioStream) -> Iterator[AudioFrame]: ...
@overload
def decode(self, *args: SubtitleStream) -> Iterator[SubtitleSet]: ...
@overload
def decode(
self, *args: Any, **kwargs: Any
) -> Iterator[VideoFrame | AudioFrame | SubtitleSet]: ...
def seek(
self,
offset: int,
*,
backward: bool = True,
any_frame: bool = False,
stream: Stream | VideoStream | AudioStream | None = None,
unsupported_frame_offset: bool = False,
unsupported_byte_offset: bool = False,
) -> None: ...
def flush_buffers(self) -> None: ...

View File

@@ -0,0 +1,290 @@
from libc.stdint cimport int64_t
from libc.stdlib cimport free, malloc
from av.codec.context cimport CodecContext, wrap_codec_context
from av.container.streams cimport StreamContainer
from av.dictionary cimport _Dictionary
from av.error cimport err_check
from av.packet cimport Packet
from av.stream cimport Stream, wrap_stream
from av.utils cimport avdict_to_dict
from av.dictionary import Dictionary
cdef close_input(InputContainer self):
self.streams = StreamContainer()
if self.input_was_opened:
with nogil:
# This causes `self.ptr` to be set to NULL.
lib.avformat_close_input(&self.ptr)
self.input_was_opened = False
cdef class InputContainer(Container):
def __cinit__(self, *args, **kwargs):
cdef CodecContext py_codec_context
cdef unsigned int i
cdef lib.AVStream *stream
cdef lib.AVCodec *codec
cdef lib.AVCodecContext *codec_context
# If we have either the global `options`, or a `stream_options`, prepare
# a mashup of those options for each stream.
cdef lib.AVDictionary **c_options = NULL
cdef _Dictionary base_dict, stream_dict
if self.options or self.stream_options:
base_dict = Dictionary(self.options)
c_options = <lib.AVDictionary**>malloc(self.ptr.nb_streams * sizeof(void*))
for i in range(self.ptr.nb_streams):
c_options[i] = NULL
if i < len(self.stream_options) and self.stream_options:
stream_dict = base_dict.copy()
stream_dict.update(self.stream_options[i])
lib.av_dict_copy(&c_options[i], stream_dict.ptr, 0)
else:
lib.av_dict_copy(&c_options[i], base_dict.ptr, 0)
self.set_timeout(self.open_timeout)
self.start_timeout()
with nogil:
# This peeks are the first few frames to:
# - set stream.disposition from codec.audio_service_type (not exposed);
# - set stream.codec.bits_per_coded_sample;
# - set stream.duration;
# - set stream.start_time;
# - set stream.r_frame_rate to average value;
# - open and closes codecs with the options provided.
ret = lib.avformat_find_stream_info(
self.ptr,
c_options
)
self.set_timeout(None)
self.err_check(ret)
# Cleanup all of our options.
if c_options:
for i in range(self.ptr.nb_streams):
lib.av_dict_free(&c_options[i])
free(c_options)
at_least_one_accelerated_context = False
self.streams = StreamContainer()
for i in range(self.ptr.nb_streams):
stream = self.ptr.streams[i]
codec = lib.avcodec_find_decoder(stream.codecpar.codec_id)
if codec:
# allocate and initialise decoder
codec_context = lib.avcodec_alloc_context3(codec)
err_check(lib.avcodec_parameters_to_context(codec_context, stream.codecpar))
codec_context.pkt_timebase = stream.time_base
py_codec_context = wrap_codec_context(codec_context, codec, self.hwaccel)
if py_codec_context.is_hwaccel:
at_least_one_accelerated_context = True
else:
# no decoder is available
py_codec_context = None
self.streams.add_stream(wrap_stream(self, stream, py_codec_context))
if self.hwaccel and not self.hwaccel.allow_software_fallback and not at_least_one_accelerated_context:
raise RuntimeError("Hardware accelerated decode requested but no stream is compatible")
self.metadata = avdict_to_dict(self.ptr.metadata, self.metadata_encoding, self.metadata_errors)
def __dealloc__(self):
close_input(self)
@property
def start_time(self):
self._assert_open()
if self.ptr.start_time != lib.AV_NOPTS_VALUE:
return self.ptr.start_time
@property
def duration(self):
self._assert_open()
if self.ptr.duration != lib.AV_NOPTS_VALUE:
return self.ptr.duration
@property
def bit_rate(self):
self._assert_open()
return self.ptr.bit_rate
@property
def size(self):
self._assert_open()
return lib.avio_size(self.ptr.pb)
def close(self):
close_input(self)
def demux(self, *args, **kwargs):
"""demux(streams=None, video=None, audio=None, subtitles=None, data=None)
Yields a series of :class:`.Packet` from the given set of :class:`.Stream`::
for packet in container.demux():
# Do something with `packet`, often:
for frame in packet.decode():
# Do something with `frame`.
.. seealso:: :meth:`.StreamContainer.get` for the interpretation of
the arguments.
.. note:: The last packets are dummy packets that when decoded will flush the buffers.
"""
self._assert_open()
# For whatever reason, Cython does not like us directly passing kwargs
# from one method to another. Without kwargs, it ends up passing a
# NULL reference, which segfaults. So we force it to do something with it.
# This is likely a bug in Cython; see https://github.com/cython/cython/issues/2166
# (and others).
id(kwargs)
streams = self.streams.get(*args, **kwargs)
cdef bint *include_stream = <bint*>malloc(self.ptr.nb_streams * sizeof(bint))
if include_stream == NULL:
raise MemoryError()
cdef unsigned int i
cdef Packet packet
cdef int ret
self.set_timeout(self.read_timeout)
try:
for i in range(self.ptr.nb_streams):
include_stream[i] = False
for stream in streams:
i = stream.index
if i >= self.ptr.nb_streams:
raise ValueError(f"stream index {i} out of range")
include_stream[i] = True
while True:
packet = Packet()
try:
self.start_timeout()
with nogil:
ret = lib.av_read_frame(self.ptr, packet.ptr)
self.err_check(ret)
except EOFError:
break
if include_stream[packet.ptr.stream_index]:
# If AVFMTCTX_NOHEADER is set in ctx_flags, then new streams
# may also appear in av_read_frame().
# http://ffmpeg.org/doxygen/trunk/structAVFormatContext.html
# TODO: find better way to handle this
if packet.ptr.stream_index < len(self.streams):
packet._stream = self.streams[packet.ptr.stream_index]
# Keep track of this so that remuxing is easier.
packet.ptr.time_base = packet._stream.ptr.time_base
yield packet
# Flush!
for i in range(self.ptr.nb_streams):
if include_stream[i]:
packet = Packet()
packet._stream = self.streams[i]
packet.ptr.time_base = packet._stream.ptr.time_base
yield packet
finally:
self.set_timeout(None)
free(include_stream)
def decode(self, *args, **kwargs):
"""decode(streams=None, video=None, audio=None, subtitles=None, data=None)
Yields a series of :class:`.Frame` from the given set of streams::
for frame in container.decode():
# Do something with `frame`.
.. seealso:: :meth:`.StreamContainer.get` for the interpretation of
the arguments.
"""
self._assert_open()
id(kwargs) # Avoid Cython bug; see demux().
for packet in self.demux(*args, **kwargs):
for frame in packet.decode():
yield frame
def seek(
self, offset, *, bint backward=True, bint any_frame=False, Stream stream=None,
bint unsupported_frame_offset=False, bint unsupported_byte_offset=False
):
"""seek(offset, *, backward=True, any_frame=False, stream=None)
Seek to a (key)frame nearsest to the given timestamp.
:param int offset: Time to seek to, expressed in``stream.time_base`` if ``stream``
is given, otherwise in :data:`av.time_base`.
:param bool backward: If there is not a (key)frame at the given offset,
look backwards for it.
:param bool any_frame: Seek to any frame, not just a keyframe.
:param Stream stream: The stream who's ``time_base`` the ``offset`` is in.
:param bool unsupported_frame_offset: ``offset`` is a frame
index instead of a time; not supported by any known format.
:param bool unsupported_byte_offset: ``offset`` is a byte
location in the file; not supported by any known format.
After seeking, packets that you demux should correspond (roughly) to
the position you requested.
In most cases, the defaults of ``backwards = True`` and ``any_frame = False``
are the best course of action, followed by you demuxing/decoding to
the position that you want. This is because to properly decode video frames
you need to start from the previous keyframe.
.. seealso:: :ffmpeg:`avformat_seek_file` for discussion of the flags.
"""
self._assert_open()
# We used to take floats here and assume they were in seconds. This
# was super confusing, so lets go in the complete opposite direction
# and reject non-ints.
if not isinstance(offset, int):
raise TypeError("Container.seek only accepts integer offset.", type(offset))
cdef int64_t c_offset = offset
cdef int flags = 0
cdef int ret
if backward:
flags |= lib.AVSEEK_FLAG_BACKWARD
if any_frame:
flags |= lib.AVSEEK_FLAG_ANY
# If someone really wants (and to experiment), expose these.
if unsupported_frame_offset:
flags |= lib.AVSEEK_FLAG_FRAME
if unsupported_byte_offset:
flags |= lib.AVSEEK_FLAG_BYTE
cdef int stream_index = stream.index if stream else -1
with nogil:
ret = lib.av_seek_frame(self.ptr, stream_index, c_offset, flags)
err_check(ret)
self.flush_buffers()
cdef flush_buffers(self):
self._assert_open()
cdef Stream stream
cdef CodecContext codec_context
for stream in self.streams:
codec_context = stream.codec_context
if codec_context:
codec_context.flush_buffers()

View File

@@ -0,0 +1,12 @@
cimport libav as lib
from av.container.core cimport Container
from av.stream cimport Stream
cdef class OutputContainer(Container):
cdef bint _started
cdef bint _done
cdef lib.AVPacket *packet_ptr
cpdef start_encoding(self)

View File

@@ -0,0 +1,474 @@
import os
from fractions import Fraction
import cython
from cython.cimports import libav as lib
from cython.cimports.av.codec.codec import Codec
from cython.cimports.av.codec.context import CodecContext, wrap_codec_context
from cython.cimports.av.container.streams import StreamContainer
from cython.cimports.av.dictionary import _Dictionary
from cython.cimports.av.error import err_check
from cython.cimports.av.packet import Packet
from cython.cimports.av.stream import Stream, wrap_stream
from cython.cimports.av.utils import dict_to_avdict, to_avrational
from av.dictionary import Dictionary
@cython.cfunc
def close_output(self: OutputContainer):
self.streams = StreamContainer()
if self._started and not self._done:
# We must only ever call av_write_trailer *once*, otherwise we get a
# segmentation fault. Therefore no matter whether it succeeds or not
# we must absolutely set self._done.
try:
self.err_check(lib.av_write_trailer(self.ptr))
finally:
if self.file is None and not (self.ptr.oformat.flags & lib.AVFMT_NOFILE):
lib.avio_closep(cython.address(self.ptr.pb))
self._done = True
@cython.cclass
class OutputContainer(Container):
def __cinit__(self, *args, **kwargs):
self.streams = StreamContainer()
self.metadata = {}
with cython.nogil:
self.packet_ptr = lib.av_packet_alloc()
def __dealloc__(self):
close_output(self)
with cython.nogil:
lib.av_packet_free(cython.address(self.packet_ptr))
def add_stream(self, codec_name, rate=None, options: dict | None = None, **kwargs):
"""add_stream(codec_name, rate=None)
Creates a new stream from a codec name and returns it.
Supports video, audio, and subtitle streams.
:param codec_name: The name of a codec.
:type codec_name: str
:param dict options: Stream options.
:param \\**kwargs: Set attributes for the stream.
:rtype: The new :class:`~av.stream.Stream`.
"""
codec_obj: Codec = Codec(codec_name, "w")
codec: cython.pointer[cython.const[lib.AVCodec]] = codec_obj.ptr
# Assert that this format supports the requested codec.
if not lib.avformat_query_codec(
self.ptr.oformat, codec.id, lib.FF_COMPLIANCE_NORMAL
):
raise ValueError(
f"{self.format.name!r} format does not support {codec_obj.name!r} codec"
)
# Create new stream in the AVFormatContext, set AVCodecContext values.
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(self.ptr, codec)
ctx: cython.pointer[lib.AVCodecContext] = lib.avcodec_alloc_context3(codec)
# Now lets set some more sane video defaults
if codec.type == lib.AVMEDIA_TYPE_VIDEO:
ctx.pix_fmt = lib.AV_PIX_FMT_YUV420P
ctx.width = kwargs.pop("width", 640)
ctx.height = kwargs.pop("height", 480)
ctx.bit_rate = kwargs.pop("bit_rate", 0)
ctx.bit_rate_tolerance = kwargs.pop("bit_rate_tolerance", 128000)
try:
to_avrational(kwargs.pop("time_base"), cython.address(ctx.time_base))
except KeyError:
pass
to_avrational(rate or 24, cython.address(ctx.framerate))
stream.avg_frame_rate = ctx.framerate
stream.time_base = ctx.time_base
# Some sane audio defaults
elif codec.type == lib.AVMEDIA_TYPE_AUDIO:
ctx.sample_fmt = codec.sample_fmts[0]
ctx.bit_rate = kwargs.pop("bit_rate", 0)
ctx.bit_rate_tolerance = kwargs.pop("bit_rate_tolerance", 32000)
try:
to_avrational(kwargs.pop("time_base"), cython.address(ctx.time_base))
except KeyError:
pass
if rate is None:
ctx.sample_rate = 48000
elif type(rate) is int:
ctx.sample_rate = rate
else:
raise TypeError("audio stream `rate` must be: int | None")
stream.time_base = ctx.time_base
lib.av_channel_layout_default(cython.address(ctx.ch_layout), 2)
# Some formats want stream headers to be separate
if self.ptr.oformat.flags & lib.AVFMT_GLOBALHEADER:
ctx.flags |= lib.AV_CODEC_FLAG_GLOBAL_HEADER
# Initialise stream codec parameters to populate the codec type.
#
# Subsequent changes to the codec context will be applied just before
# encoding starts in `start_encoding()`.
err_check(lib.avcodec_parameters_from_context(stream.codecpar, ctx))
# Construct the user-land stream
py_codec_context: CodecContext = wrap_codec_context(ctx, codec, None)
py_stream: Stream = wrap_stream(self, stream, py_codec_context)
self.streams.add_stream(py_stream)
if options:
py_stream.options.update(options)
for k, v in kwargs.items():
setattr(py_stream, k, v)
return py_stream
def add_stream_from_template(
self, template: Stream, opaque: bool | None = None, **kwargs
):
"""
Creates a new stream from a template. Supports video, audio, subtitle, data and attachment streams.
:param template: Copy codec from another :class:`~av.stream.Stream` instance.
:param opaque: If True, copy opaque data from the template's codec context.
:param \\**kwargs: Set attributes for the stream.
:rtype: The new :class:`~av.stream.Stream`.
"""
if opaque is None:
opaque = template.type != "video"
if template.codec_context is None:
return self._add_stream_without_codec_from_template(template, **kwargs)
codec_obj: Codec
if opaque: # Copy ctx from template.
codec_obj = template.codec_context.codec
else: # Construct new codec object.
codec_obj = Codec(template.codec_context.codec.name, "w")
codec: cython.pointer[cython.const[lib.AVCodec]] = codec_obj.ptr
# Assert that this format supports the requested codec.
if not lib.avformat_query_codec(
self.ptr.oformat, codec.id, lib.FF_COMPLIANCE_NORMAL
):
raise ValueError(
f"{self.format.name!r} format does not support {codec_obj.name!r} codec"
)
# Create new stream in the AVFormatContext, set AVCodecContext values.
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(self.ptr, codec)
ctx: cython.pointer[lib.AVCodecContext] = lib.avcodec_alloc_context3(codec)
err_check(lib.avcodec_parameters_to_context(ctx, template.ptr.codecpar))
# Reset the codec tag assuming we are remuxing.
ctx.codec_tag = 0
# Some formats want stream headers to be separate
if self.ptr.oformat.flags & lib.AVFMT_GLOBALHEADER:
ctx.flags |= lib.AV_CODEC_FLAG_GLOBAL_HEADER
# Copy flags If we're creating a new codec object. This fixes some muxing issues.
# Overwriting `ctx.flags |= lib.AV_CODEC_FLAG_GLOBAL_HEADER` is intentional.
if not opaque:
ctx.flags = template.codec_context.flags
# Initialize stream codec parameters to populate the codec type. Subsequent changes to
# the codec context will be applied just before encoding starts in `start_encoding()`.
err_check(lib.avcodec_parameters_from_context(stream.codecpar, ctx))
# Construct the user-land stream
py_codec_context: CodecContext = wrap_codec_context(ctx, codec, None)
py_stream: Stream = wrap_stream(self, stream, py_codec_context)
self.streams.add_stream(py_stream)
for k, v in kwargs.items():
setattr(py_stream, k, v)
return py_stream
def _add_stream_without_codec_from_template(
self, template: Stream, **kwargs
) -> Stream:
codec_type: cython.int = template.ptr.codecpar.codec_type
if codec_type not in {lib.AVMEDIA_TYPE_ATTACHMENT, lib.AVMEDIA_TYPE_DATA}:
raise ValueError(
f"template stream of type {template.type} has no codec context"
)
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(
self.ptr, cython.NULL
)
if stream == cython.NULL:
raise MemoryError("Could not allocate stream")
err_check(lib.avcodec_parameters_copy(stream.codecpar, template.ptr.codecpar))
# Mirror basic properties that are not derived from a codec context.
stream.time_base = template.ptr.time_base
stream.start_time = template.ptr.start_time
stream.duration = template.ptr.duration
stream.disposition = template.ptr.disposition
py_stream: Stream = wrap_stream(self, stream, None)
self.streams.add_stream(py_stream)
py_stream.metadata = dict(template.metadata)
for k, v in kwargs.items():
setattr(py_stream, k, v)
return py_stream
def add_attachment(self, name: str, mimetype: str, data: bytes):
"""
Create an attachment stream and embed its payload into the container header.
- Only supported by formats that support attachments (e.g. Matroska).
- No per-packet muxing is required; attachments are written at header time.
"""
# Create stream with no codec (attachments are codec-less).
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(
self.ptr, cython.NULL
)
if stream == cython.NULL:
raise MemoryError("Could not allocate stream")
stream.codecpar.codec_type = lib.AVMEDIA_TYPE_ATTACHMENT
stream.codecpar.codec_id = lib.AV_CODEC_ID_NONE
# Allocate and copy payload into codecpar.extradata.
payload_size: cython.size_t = len(data)
if payload_size:
buf = cython.cast(cython.p_uchar, lib.av_malloc(payload_size + 1))
if buf == cython.NULL:
raise MemoryError("Could not allocate attachment data")
# Copy bytes.
for i in range(payload_size):
buf[i] = data[i]
buf[payload_size] = 0
stream.codecpar.extradata = cython.cast(cython.p_uchar, buf)
stream.codecpar.extradata_size = payload_size
# Wrap as user-land stream.
meta_ptr = cython.address(stream.metadata)
err_check(lib.av_dict_set(meta_ptr, b"filename", name.encode(), 0))
mime_bytes = mimetype.encode()
err_check(lib.av_dict_set(meta_ptr, b"mimetype", mime_bytes, 0))
py_stream: Stream = wrap_stream(self, stream, None)
self.streams.add_stream(py_stream)
return py_stream
def add_data_stream(self, codec_name=None, options: dict | None = None):
"""add_data_stream(codec_name=None)
Creates a new data stream and returns it.
:param codec_name: Optional name of the data codec (e.g. 'klv')
:type codec_name: str | None
:param dict options: Stream options.
:rtype: The new :class:`~av.data.stream.DataStream`.
"""
codec: cython.pointer[cython.const[lib.AVCodec]] = cython.NULL
if codec_name is not None:
codec = lib.avcodec_find_encoder_by_name(codec_name.encode())
if codec == cython.NULL:
raise ValueError(f"Unknown data codec: {codec_name}")
# Assert that this format supports the requested codec
if not lib.avformat_query_codec(
self.ptr.oformat, codec.id, lib.FF_COMPLIANCE_NORMAL
):
raise ValueError(
f"{self.format.name!r} format does not support {codec_name!r} codec"
)
# Create new stream in the AVFormatContext
stream: cython.pointer[lib.AVStream] = lib.avformat_new_stream(self.ptr, codec)
if stream == cython.NULL:
raise MemoryError("Could not allocate stream")
# Set up codec context if we have a codec
ctx: cython.pointer[lib.AVCodecContext] = cython.NULL
if codec != cython.NULL:
ctx = lib.avcodec_alloc_context3(codec)
if ctx == cython.NULL:
raise MemoryError("Could not allocate codec context")
# Some formats want stream headers to be separate
if self.ptr.oformat.flags & lib.AVFMT_GLOBALHEADER:
ctx.flags |= lib.AV_CODEC_FLAG_GLOBAL_HEADER
# Initialize stream codec parameters
err_check(lib.avcodec_parameters_from_context(stream.codecpar, ctx))
else:
# For raw data streams, just set the codec type
stream.codecpar.codec_type = lib.AVMEDIA_TYPE_DATA
# Construct the user-land stream
py_codec_context: CodecContext | None = None
if ctx != cython.NULL:
py_codec_context = wrap_codec_context(ctx, codec, None)
py_stream: Stream = wrap_stream(self, stream, py_codec_context)
self.streams.add_stream(py_stream)
if options:
py_stream.options.update(options)
return py_stream
@cython.ccall
def start_encoding(self):
"""Write the file header! Called automatically."""
if self._started:
return
# TODO: This does NOT handle options coming from 3 sources.
# This is only a rough approximation of what would be cool to do.
used_options: set = set()
stream: Stream
# Finalize and open all streams.
for stream in self.streams:
ctx = stream.codec_context
# Skip codec context handling for streams without codecs (e.g. data/attachments).
if ctx is None:
if stream.type not in {"data", "attachment"}:
raise ValueError(f"Stream {stream.index} has no codec context")
else:
if not ctx.is_open:
for k, v in self.options.items():
ctx.options.setdefault(k, v)
ctx.open()
# Track option consumption.
for k in self.options:
if k not in ctx.options:
used_options.add(k)
stream._finalize_for_output()
# Open the output file, if needed.
name_obj: bytes = os.fsencode(self.name if self.file is None else "")
name: cython.p_char = name_obj
if self.ptr.pb == cython.NULL and not self.ptr.oformat.flags & lib.AVFMT_NOFILE:
err_check(
lib.avio_open(cython.address(self.ptr.pb), name, lib.AVIO_FLAG_WRITE)
)
# Copy the metadata dict.
dict_to_avdict(
cython.address(self.ptr.metadata),
self.metadata,
encoding=self.metadata_encoding,
errors=self.metadata_errors,
)
all_options: _Dictionary = Dictionary(self.options, self.container_options)
options: _Dictionary = all_options.copy()
self.err_check(lib.avformat_write_header(self.ptr, cython.address(options.ptr)))
# Track option usage...
for k in all_options:
if k not in options:
used_options.add(k)
# ... and warn if any weren't used.
unused_options = {
k: v for k, v in self.options.items() if k not in used_options
}
if unused_options:
import logging
log = logging.getLogger(__name__)
log.warning("Some options were not used: %s" % unused_options)
self._started = True
@property
def supported_codecs(self):
"""
Returns a set of all codecs this format supports.
"""
result: set = set()
codec: cython.pointer[cython.const[lib.AVCodec]] = cython.NULL
opaque: cython.p_void = cython.NULL
while True:
codec = lib.av_codec_iterate(cython.address(opaque))
if codec == cython.NULL:
break
if (
lib.avformat_query_codec(
self.ptr.oformat, codec.id, lib.FF_COMPLIANCE_NORMAL
)
== 1
):
result.add(codec.name)
return result
@property
def default_video_codec(self):
"""
Returns the default video codec this container recommends.
"""
return lib.avcodec_get_name(self.format.optr.video_codec)
@property
def default_audio_codec(self):
"""
Returns the default audio codec this container recommends.
"""
return lib.avcodec_get_name(self.format.optr.audio_codec)
@property
def default_subtitle_codec(self):
"""
Returns the default subtitle codec this container recommends.
"""
return lib.avcodec_get_name(self.format.optr.subtitle_codec)
def close(self):
close_output(self)
def mux(self, packets):
# We accept either a Packet, or a sequence of packets. This should smooth out
# the transition to the new encode API which returns a sequence of packets.
if isinstance(packets, Packet):
self.mux_one(packets)
else:
for packet in packets:
self.mux_one(packet)
def mux_one(self, packet: Packet):
self.start_encoding()
# Assert the packet is in stream time.
if (
packet.ptr.stream_index < 0
or cython.cast(cython.uint, packet.ptr.stream_index) >= self.ptr.nb_streams
):
raise ValueError("Bad Packet stream_index.")
stream: cython.pointer[lib.AVStream] = self.ptr.streams[packet.ptr.stream_index]
packet._rebase_time(stream.time_base)
# Make another reference to the packet, as `av_interleaved_write_frame()`
# takes ownership of the reference.
self.err_check(lib.av_packet_ref(self.packet_ptr, packet.ptr))
with cython.nogil:
ret: cython.int = lib.av_interleaved_write_frame(self.ptr, self.packet_ptr)
self.err_check(ret)

View File

@@ -0,0 +1,62 @@
from fractions import Fraction
from typing import Sequence, TypeVar, Union, overload
from av.audio import _AudioCodecName
from av.audio.stream import AudioStream
from av.packet import Packet
from av.stream import AttachmentStream, DataStream, Stream
from av.subtitles.stream import SubtitleStream
from av.video import _VideoCodecName
from av.video.stream import VideoStream
from .core import Container
_StreamT = TypeVar("_StreamT", bound=Stream)
class OutputContainer(Container):
def __enter__(self) -> OutputContainer: ...
@overload
def add_stream(
self,
codec_name: _AudioCodecName,
rate: int | None = None,
options: dict[str, str] | None = None,
**kwargs,
) -> AudioStream: ...
@overload
def add_stream(
self,
codec_name: _VideoCodecName,
rate: Fraction | int | None = None,
options: dict[str, str] | None = None,
**kwargs,
) -> VideoStream: ...
@overload
def add_stream(
self,
codec_name: str,
rate: Fraction | int | None = None,
options: dict[str, str] | None = None,
**kwargs,
) -> VideoStream | AudioStream | SubtitleStream: ...
def add_stream_from_template(
self, template: _StreamT, opaque: bool | None = None, **kwargs
) -> _StreamT: ...
def add_attachment(
self, name: str, mimetype: str, data: bytes
) -> AttachmentStream: ...
def add_data_stream(
self, codec_name: str | None = None, options: dict[str, str] | None = None
) -> DataStream: ...
def start_encoding(self) -> None: ...
def close(self) -> None: ...
def mux(self, packets: Packet | Sequence[Packet]) -> None: ...
def mux_one(self, packet: Packet) -> None: ...
@property
def default_video_codec(self) -> str: ...
@property
def default_audio_codec(self) -> str: ...
@property
def default_subtitle_codec(self) -> str: ...
@property
def supported_codecs(self) -> set[str]: ...

Some files were not shown because too many files have changed in this diff Show More