add read me

This commit is contained in:
2026-01-09 10:28:44 +11:00
commit edaf914b73
13417 changed files with 2952119 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
from .frame import VideoFrame
from .stream import VideoStream

View File

@@ -0,0 +1,17 @@
from typing import Literal
from .frame import VideoFrame
from .stream import VideoStream
_VideoCodecName = Literal[
"gif",
"h264",
"hevc",
"libvpx",
"libx264",
"mpeg4",
"png",
"qtrle",
]
__all__ = ("VideoFrame", "VideoStream")

View File

@@ -0,0 +1,33 @@
cimport libav as lib
from av.codec.context cimport CodecContext
from av.video.format cimport VideoFormat
from av.video.frame cimport VideoFrame
from av.video.reformatter cimport VideoReformatter
# The get_format callback in AVCodecContext is called by the decoder to pick a format out of a list.
# When we want accelerated decoding, we need to figure out ahead of time what the format should be,
# and find a way to pass that into our callback so we can return it to the decoder. We use the 'opaque'
# user data field in AVCodecContext for that. This is the struct we store a pointer to in that field.
cdef struct AVCodecPrivateData:
lib.AVPixelFormat hardware_pix_fmt
bint allow_software_fallback
cdef class VideoCodecContext(CodecContext):
cdef AVCodecPrivateData _private_data
cdef VideoFormat _format
cdef _build_format(self)
cdef int last_w
cdef int last_h
cdef readonly VideoReformatter reformatter
# For encoding.
cdef readonly int encoded_frame_count
# For decoding.
cdef VideoFrame next_frame

View File

@@ -0,0 +1,35 @@
from fractions import Fraction
from typing import Iterator, Literal
from av.codec.context import CodecContext
from av.packet import Packet
from .format import VideoFormat
from .frame import VideoFrame
class VideoCodecContext(CodecContext):
format: VideoFormat | None
width: int
height: int
bits_per_coded_sample: int
pix_fmt: str | None
framerate: Fraction
rate: Fraction
gop_size: int
sample_aspect_ratio: Fraction | None
display_aspect_ratio: Fraction | None
has_b_frames: bool
max_b_frames: int
coded_width: int
coded_height: int
color_range: int
color_primaries: int
color_trc: int
colorspace: int
qmin: int
qmax: int
type: Literal["video"]
def encode(self, frame: VideoFrame | None = None) -> list[Packet]: ...
def encode_lazy(self, frame: VideoFrame | None = None) -> Iterator[Packet]: ...
def decode(self, packet: Packet | None = None) -> list[VideoFrame]: ...

View File

@@ -0,0 +1,370 @@
cimport libav as lib
from libc.stdint cimport int64_t
from av.codec.context cimport CodecContext
from av.codec.hwaccel cimport HWAccel, HWConfig
from av.error cimport err_check
from av.frame cimport Frame
from av.packet cimport Packet
from av.utils cimport avrational_to_fraction, to_avrational
from av.video.format cimport VideoFormat, get_pix_fmt, get_video_format
from av.video.frame cimport VideoFrame, alloc_video_frame
from av.video.reformatter cimport VideoReformatter
cdef lib.AVPixelFormat _get_hw_format(lib.AVCodecContext *ctx, const lib.AVPixelFormat *pix_fmts) noexcept:
# In the case where we requested accelerated decoding, the decoder first calls this function
# with a list that includes both the hardware format and software formats.
# First we try to pick the hardware format if it's in the list.
# However, if the decoder fails to initialize the hardware, it will call this function again,
# with only software formats in pix_fmts. We return ctx->sw_pix_fmt regardless in this case,
# because that should be in the candidate list. If not, we are out of ideas anyways.
cdef AVCodecPrivateData* private_data = <AVCodecPrivateData*>ctx.opaque
i = 0
while pix_fmts[i] != -1:
if pix_fmts[i] == private_data.hardware_pix_fmt:
return pix_fmts[i]
i += 1
return ctx.sw_pix_fmt if private_data.allow_software_fallback else lib.AV_PIX_FMT_NONE
cdef class VideoCodecContext(CodecContext):
def __cinit__(self, *args, **kwargs):
self.last_w = 0
self.last_h = 0
cdef _init(self, lib.AVCodecContext *ptr, const lib.AVCodec *codec, HWAccel hwaccel):
CodecContext._init(self, ptr, codec, hwaccel) # TODO: Can this be `super`?
if hwaccel is not None:
try:
self.hwaccel_ctx = hwaccel.create(self.codec)
self.ptr.hw_device_ctx = lib.av_buffer_ref(self.hwaccel_ctx.ptr)
self.ptr.pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
self.ptr.get_format = _get_hw_format
self._private_data.hardware_pix_fmt = self.hwaccel_ctx.config.ptr.pix_fmt
self._private_data.allow_software_fallback = self.hwaccel.allow_software_fallback
self.ptr.opaque = &self._private_data
except NotImplementedError:
# Some streams may not have a hardware decoder. For example, many action
# cam videos have a low resolution mjpeg stream, which is usually not
# compatible with hardware decoders.
# The user may have passed in a hwaccel because they want to decode the main
# stream with it, so we shouldn't abort even if we find a stream that can't
# be HW decoded.
# If the user wants to make sure hwaccel is actually used, they can check with the
# is_hwaccel() function on each stream's codec context.
self.hwaccel_ctx = None
self._build_format()
self.encoded_frame_count = 0
cdef _prepare_frames_for_encode(self, Frame input):
if not input:
return [None]
cdef VideoFrame vframe = input
if self._format is None:
raise ValueError("self._format is None, cannot encode")
# Reformat if it doesn't match.
if (
vframe.format.pix_fmt != self._format.pix_fmt or
vframe.width != self.ptr.width or
vframe.height != self.ptr.height
):
if not self.reformatter:
self.reformatter = VideoReformatter()
vframe = self.reformatter.reformat(
vframe, self.ptr.width, self.ptr.height, self._format
)
# There is no pts, so create one.
if vframe.ptr.pts == lib.AV_NOPTS_VALUE:
vframe.ptr.pts = <int64_t>self.encoded_frame_count
self.encoded_frame_count += 1
return [vframe]
cdef Frame _alloc_next_frame(self):
return alloc_video_frame()
cdef _setup_decoded_frame(self, Frame frame, Packet packet):
CodecContext._setup_decoded_frame(self, frame, packet)
cdef VideoFrame vframe = frame
vframe._init_user_attributes()
cdef _transfer_hwframe(self, Frame frame):
if self.hwaccel_ctx is None:
return frame
if frame.ptr.format != self.hwaccel_ctx.config.ptr.pix_fmt:
# If we get a software frame, that means we are in software fallback mode, and don't actually
# need to transfer.
return frame
cdef Frame frame_sw
frame_sw = self._alloc_next_frame()
err_check(lib.av_hwframe_transfer_data(frame_sw.ptr, frame.ptr, 0))
# TODO: Is there anything else to transfer?!
frame_sw.pts = frame.pts
return frame_sw
cdef _build_format(self):
self._format = get_video_format(<lib.AVPixelFormat>self.ptr.pix_fmt, self.ptr.width, self.ptr.height)
@property
def format(self):
return self._format
@format.setter
def format(self, VideoFormat format):
self.ptr.pix_fmt = format.pix_fmt
self.ptr.width = format.width
self.ptr.height = format.height
self._build_format() # Kinda wasteful.
@property
def width(self):
if self.ptr is NULL:
return 0
return self.ptr.width
@width.setter
def width(self, unsigned int value):
self.ptr.width = value
self._build_format()
@property
def height(self):
if self.ptr is NULL:
return 0
return self.ptr.height
@height.setter
def height(self, unsigned int value):
self.ptr.height = value
self._build_format()
@property
def bits_per_coded_sample(self):
"""
The number of bits per sample in the codedwords. It's mandatory for this to be set for some formats to decode properly.
Wraps :ffmpeg:`AVCodecContext.bits_per_coded_sample`.
:type: int
"""
return self.ptr.bits_per_coded_sample
@bits_per_coded_sample.setter
def bits_per_coded_sample(self, int value):
if self.is_encoder:
raise ValueError("Not supported for encoders")
self.ptr.bits_per_coded_sample = value
self._build_format()
@property
def pix_fmt(self):
"""
The pixel format's name.
:type: str | None
"""
return getattr(self._format, "name", None)
@pix_fmt.setter
def pix_fmt(self, value):
self.ptr.pix_fmt = get_pix_fmt(value)
self._build_format()
@property
def framerate(self):
"""
The frame rate, in frames per second.
:type: fractions.Fraction
"""
return avrational_to_fraction(&self.ptr.framerate)
@framerate.setter
def framerate(self, value):
to_avrational(value, &self.ptr.framerate)
@property
def rate(self):
"""Another name for :attr:`framerate`."""
return self.framerate
@rate.setter
def rate(self, value):
self.framerate = value
@property
def gop_size(self):
"""
Sets the number of frames between keyframes. Used only for encoding.
:type: int
"""
if self.is_decoder:
raise RuntimeError("Cannot access 'gop_size' as a decoder")
return self.ptr.gop_size
@gop_size.setter
def gop_size(self, int value):
if self.is_decoder:
raise RuntimeError("Cannot access 'gop_size' as a decoder")
self.ptr.gop_size = value
@property
def sample_aspect_ratio(self):
return avrational_to_fraction(&self.ptr.sample_aspect_ratio)
@sample_aspect_ratio.setter
def sample_aspect_ratio(self, value):
to_avrational(value, &self.ptr.sample_aspect_ratio)
@property
def display_aspect_ratio(self):
cdef lib.AVRational dar
lib.av_reduce(
&dar.num, &dar.den,
self.ptr.width * self.ptr.sample_aspect_ratio.num,
self.ptr.height * self.ptr.sample_aspect_ratio.den, 1024*1024)
return avrational_to_fraction(&dar)
@property
def has_b_frames(self):
"""
:type: bool
"""
return bool(self.ptr.has_b_frames)
@property
def coded_width(self):
"""
:type: int
"""
return self.ptr.coded_width
@property
def coded_height(self):
"""
:type: int
"""
return self.ptr.coded_height
@property
def color_range(self):
"""
Describes the signal range of the colorspace.
Wraps :ffmpeg:`AVFrame.color_range`.
:type: int
"""
return self.ptr.color_range
@color_range.setter
def color_range(self, value):
self.ptr.color_range = value
@property
def color_primaries(self):
"""
Describes the RGB/XYZ matrix of the colorspace.
Wraps :ffmpeg:`AVFrame.color_primaries`.
:type: int
"""
return self.ptr.color_primaries
@color_primaries.setter
def color_primaries(self, value):
self.ptr.color_primaries = value
@property
def color_trc(self):
"""
Describes the linearization function (a.k.a. transformation characteristics) of the colorspace.
Wraps :ffmpeg:`AVFrame.color_trc`.
:type: int
"""
return self.ptr.color_trc
@color_trc.setter
def color_trc(self, value):
self.ptr.color_trc = value
@property
def colorspace(self):
"""
Describes the YUV/RGB transformation matrix of the colorspace.
Wraps :ffmpeg:`AVFrame.colorspace`.
:type: int
"""
return self.ptr.colorspace
@colorspace.setter
def colorspace(self, value):
self.ptr.colorspace = value
@property
def max_b_frames(self):
"""
The maximum run of consecutive B frames when encoding a video.
:type: int
"""
return self.ptr.max_b_frames
@max_b_frames.setter
def max_b_frames(self, value):
self.ptr.max_b_frames = value
@property
def qmin(self):
"""
The minimum quantiser value of an encoded stream.
Wraps :ffmpeg:`AVCodecContext.qmin`.
:type: int
"""
return self.ptr.qmin
@qmin.setter
def qmin(self, value):
self.ptr.qmin = value
@property
def qmax(self):
"""
The maximum quantiser value of an encoded stream.
Wraps :ffmpeg:`AVCodecContext.qmax`.
:type: int
"""
return self.ptr.qmax
@qmax.setter
def qmax(self, value):
self.ptr.qmax = value

View File

@@ -0,0 +1,27 @@
cimport libav as lib
cdef class VideoFormat:
cdef lib.AVPixelFormat pix_fmt
cdef const lib.AVPixFmtDescriptor *ptr
cdef readonly unsigned int width, height
cdef readonly tuple components
cdef _init(self, lib.AVPixelFormat pix_fmt, unsigned int width, unsigned int height)
cpdef chroma_width(self, int luma_width=?)
cpdef chroma_height(self, int luma_height=?)
cdef class VideoFormatComponent:
cdef VideoFormat format
cdef readonly unsigned int index
cdef const lib.AVComponentDescriptor *ptr
cdef VideoFormat get_video_format(lib.AVPixelFormat c_format, unsigned int width, unsigned int height)
cdef lib.AVPixelFormat get_pix_fmt(const char *name) except lib.AV_PIX_FMT_NONE

View File

@@ -0,0 +1,30 @@
class VideoFormat:
name: str
bits_per_pixel: int
padded_bits_per_pixel: int
is_big_endian: bool
has_palette: bool
is_bit_stream: bool
is_planar: bool
@property
def is_rgb(self) -> bool: ...
@property
def is_bayer(self) -> bool: ...
width: int
height: int
components: tuple[VideoFormatComponent, ...]
def __init__(self, name: str, width: int = 0, height: int = 0) -> None: ...
def chroma_width(self, luma_width: int = 0) -> int: ...
def chroma_height(self, luma_height: int = 0) -> int: ...
class VideoFormatComponent:
plane: int
bits: int
is_alpha: bool
is_luma: bool
is_chroma: bool
width: int
height: int
def __init__(self, format: VideoFormat, index: int) -> None: ...

View File

@@ -0,0 +1,198 @@
cdef object _cinit_bypass_sentinel = object()
cdef VideoFormat get_video_format(lib.AVPixelFormat c_format, unsigned int width, unsigned int height):
if c_format == lib.AV_PIX_FMT_NONE:
return None
cdef VideoFormat format = VideoFormat.__new__(VideoFormat, _cinit_bypass_sentinel)
format._init(c_format, width, height)
return format
cdef lib.AVPixelFormat get_pix_fmt(const char *name) except lib.AV_PIX_FMT_NONE:
"""Wrapper for lib.av_get_pix_fmt with error checking."""
cdef lib.AVPixelFormat pix_fmt = lib.av_get_pix_fmt(name)
if pix_fmt == lib.AV_PIX_FMT_NONE:
raise ValueError("not a pixel format: %r" % name)
return pix_fmt
cdef class VideoFormat:
"""
>>> format = VideoFormat('rgb24')
>>> format.name
'rgb24'
"""
def __cinit__(self, name, width=0, height=0):
if name is _cinit_bypass_sentinel:
return
cdef VideoFormat other
if isinstance(name, VideoFormat):
other = <VideoFormat>name
self._init(other.pix_fmt, width or other.width, height or other.height)
return
cdef lib.AVPixelFormat pix_fmt = get_pix_fmt(name)
self._init(pix_fmt, width, height)
cdef _init(self, lib.AVPixelFormat pix_fmt, unsigned int width, unsigned int height):
self.pix_fmt = pix_fmt
self.ptr = lib.av_pix_fmt_desc_get(pix_fmt)
self.width = width
self.height = height
self.components = tuple(
VideoFormatComponent(self, i)
for i in range(self.ptr.nb_components)
)
def __repr__(self):
if self.width or self.height:
return f"<av.{self.__class__.__name__} {self.name}, {self.width}x{self.height}>"
else:
return f"<av.{self.__class__.__name__} {self.name}>"
def __int__(self):
return int(self.pix_fmt)
@property
def name(self):
"""Canonical name of the pixel format."""
return <str>self.ptr.name
@property
def bits_per_pixel(self):
return lib.av_get_bits_per_pixel(self.ptr)
@property
def padded_bits_per_pixel(self): return lib.av_get_padded_bits_per_pixel(self.ptr)
@property
def is_big_endian(self):
"""Pixel format is big-endian."""
return bool(self.ptr.flags & lib.AV_PIX_FMT_FLAG_BE)
@property
def has_palette(self):
"""Pixel format has a palette in data[1], values are indexes in this palette."""
return bool(self.ptr.flags & lib.AV_PIX_FMT_FLAG_PAL)
@property
def is_bit_stream(self):
"""All values of a component are bit-wise packed end to end."""
return bool(self.ptr.flags & lib.AV_PIX_FMT_FLAG_BITSTREAM)
# Skipping PIX_FMT_HWACCEL
# """Pixel format is an HW accelerated format."""
@property
def is_planar(self):
"""At least one pixel component is not in the first data plane."""
return bool(self.ptr.flags & lib.AV_PIX_FMT_FLAG_PLANAR)
@property
def is_rgb(self):
"""The pixel format contains RGB-like data (as opposed to YUV/grayscale)."""
return bool(self.ptr.flags & lib.AV_PIX_FMT_FLAG_RGB)
@property
def is_bayer(self):
"""The pixel format contains Bayer data."""
return bool(self.ptr.flags & lib.AV_PIX_FMT_FLAG_BAYER)
cpdef chroma_width(self, int luma_width=0):
"""chroma_width(luma_width=0)
Width of a chroma plane relative to a luma plane.
:param int luma_width: Width of the luma plane; defaults to ``self.width``.
"""
luma_width = luma_width or self.width
return -((-luma_width) >> self.ptr.log2_chroma_w) if luma_width else 0
cpdef chroma_height(self, int luma_height=0):
"""chroma_height(luma_height=0)
Height of a chroma plane relative to a luma plane.
:param int luma_height: Height of the luma plane; defaults to ``self.height``.
"""
luma_height = luma_height or self.height
return -((-luma_height) >> self.ptr.log2_chroma_h) if luma_height else 0
cdef class VideoFormatComponent:
def __cinit__(self, VideoFormat format, size_t index):
self.format = format
self.index = index
self.ptr = &format.ptr.comp[index]
@property
def plane(self):
"""The index of the plane which contains this component."""
return self.ptr.plane
@property
def bits(self):
"""Number of bits in the component."""
return self.ptr.depth
@property
def is_alpha(self):
"""Is this component an alpha channel?"""
return ((self.index == 1 and self.format.ptr.nb_components == 2) or
(self.index == 3 and self.format.ptr.nb_components == 4))
@property
def is_luma(self):
"""Is this component a luma channel?"""
return self.index == 0 and (
self.format.ptr.nb_components == 1 or
self.format.ptr.nb_components == 2 or
not self.format.is_rgb
)
@property
def is_chroma(self):
"""Is this component a chroma channel?"""
return (self.index == 1 or self.index == 2) and (self.format.ptr.log2_chroma_w or self.format.ptr.log2_chroma_h)
@property
def width(self):
"""The width of this component's plane.
Requires the parent :class:`VideoFormat` to have a width.
"""
return self.format.chroma_width() if self.is_chroma else self.format.width
@property
def height(self):
"""The height of this component's plane.
Requires the parent :class:`VideoFormat` to have a height.
"""
return self.format.chroma_height() if self.is_chroma else self.format.height
names = set()
cdef const lib.AVPixFmtDescriptor *desc = NULL
while True:
desc = lib.av_pix_fmt_desc_next(desc)
if not desc:
break
names.add(desc.name)

View File

@@ -0,0 +1,22 @@
cimport libav as lib
from libc.stdint cimport uint8_t
from av.frame cimport Frame
from av.video.format cimport VideoFormat
from av.video.reformatter cimport VideoReformatter
cdef class VideoFrame(Frame):
# This is the buffer that is used to back everything in the AVFrame.
# We don't ever actually access it directly.
cdef uint8_t *_buffer
cdef object _np_buffer
cdef VideoReformatter reformatter
cdef readonly VideoFormat format
cdef _init(self, lib.AVPixelFormat format, unsigned int width, unsigned int height)
cdef _init_user_attributes(self)
cpdef save(self, object filepath)
cdef VideoFrame alloc_video_frame()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,86 @@
from enum import IntEnum
from pathlib import Path
from typing import Any, ClassVar, Union
import numpy as np
from av.frame import Frame
from .format import VideoFormat
from .plane import VideoPlane
_SupportedNDarray = Union[
np.ndarray[Any, np.dtype[np.uint8]],
np.ndarray[Any, np.dtype[np.uint16]],
np.ndarray[Any, np.dtype[np.float16]],
np.ndarray[Any, np.dtype[np.float32]],
]
supported_np_pix_fmts: set[str]
class PictureType(IntEnum):
NONE = 0
I = 1
P = 2
B = 3
S = 4
SI = 5
SP = 6
BI = 7
class VideoFrame(Frame):
format: VideoFormat
planes: tuple[VideoPlane, ...]
pict_type: int
colorspace: int
color_range: int
@property
def time(self) -> float: ...
@property
def width(self) -> int: ...
@property
def height(self) -> int: ...
@property
def interlaced_frame(self) -> bool: ...
@property
def rotation(self) -> int: ...
def __init__(
self, width: int = 0, height: int = 0, format: str = "yuv420p"
) -> None: ...
def reformat(
self,
width: int | None = None,
height: int | None = None,
format: str | None = None,
src_colorspace: str | int | None = None,
dst_colorspace: str | int | None = None,
interpolation: int | str | None = None,
src_color_range: int | str | None = None,
dst_color_range: int | str | None = None,
) -> VideoFrame: ...
def to_rgb(self, **kwargs: Any) -> VideoFrame: ...
def save(self, filepath: str | Path) -> None: ...
def to_image(self, **kwargs): ...
def to_ndarray(
self, channel_last: bool = False, **kwargs: Any
) -> _SupportedNDarray: ...
@staticmethod
def from_image(img): ...
@staticmethod
def from_numpy_buffer(
array: _SupportedNDarray, format: str = "rgb24", width: int = 0
) -> VideoFrame: ...
@staticmethod
def from_ndarray(
array: _SupportedNDarray, format: str = "rgb24", channel_last: bool = False
) -> VideoFrame: ...
@staticmethod
def from_bytes(
data: bytes,
width: int,
height: int,
format: str = "rgba",
flip_horizontal: bool = False,
flip_vertical: bool = False,
) -> VideoFrame: ...

View File

@@ -0,0 +1,8 @@
from av.plane cimport Plane
from av.video.format cimport VideoFormatComponent
cdef class VideoPlane(Plane):
cdef readonly size_t buffer_size
cdef readonly unsigned int width, height

View File

@@ -0,0 +1,11 @@
from av.plane import Plane
from .frame import VideoFrame
class VideoPlane(Plane):
line_size: int
width: int
height: int
buffer_size: int
def __init__(self, frame: VideoFrame, index: int) -> None: ...

View File

@@ -0,0 +1,37 @@
from av.video.frame cimport VideoFrame
cdef class VideoPlane(Plane):
def __cinit__(self, VideoFrame frame, int index):
# The palette plane has no associated component or linesize; set fields manually
if frame.format.name == "pal8" and index == 1:
self.width = 256
self.height = 1
self.buffer_size = 256 * 4
return
for i in range(frame.format.ptr.nb_components):
if frame.format.ptr.comp[i].plane == index:
component = frame.format.components[i]
self.width = component.width
self.height = component.height
break
else:
raise RuntimeError(f"could not find plane {index} of {frame.format!r}")
# Sometimes, linesize is negative (and that is meaningful). We are only
# insisting that the buffer size be based on the extent of linesize, and
# ignore it's direction.
self.buffer_size = abs(self.frame.ptr.linesize[self.index]) * self.height
cdef size_t _buffer_size(self):
return self.buffer_size
@property
def line_size(self):
"""
Bytes per horizontal line in this plane.
:type: int
"""
return self.frame.ptr.linesize[self.index]

View File

@@ -0,0 +1,13 @@
cimport libav as lib
from av.video.frame cimport VideoFrame
cdef class VideoReformatter:
cdef lib.SwsContext *ptr
cdef _reformat(self, VideoFrame frame, int width, int height,
lib.AVPixelFormat format, int src_colorspace,
int dst_colorspace, int interpolation,
int src_color_range, int dst_color_range)

View File

@@ -0,0 +1,53 @@
from enum import IntEnum
from typing import cast
from .frame import VideoFrame
class Interpolation(IntEnum):
FAST_BILINEAER = cast(int, ...)
BILINEAR = cast(int, ...)
BICUBIC = cast(int, ...)
X = cast(int, ...)
POINT = cast(int, ...)
AREA = cast(int, ...)
BICUBLIN = cast(int, ...)
GAUSS = cast(int, ...)
SINC = cast(int, ...)
LANCZOS = cast(int, ...)
SPLINE = cast(int, ...)
class Colorspace(IntEnum):
ITU709 = cast(int, ...)
FCC = cast(int, ...)
ITU601 = cast(int, ...)
ITU624 = cast(int, ...)
SMPTE170M = cast(int, ...)
SMPTE240M = cast(int, ...)
DEFAULT = cast(int, ...)
itu709 = cast(int, ...)
fcc = cast(int, ...)
itu601 = cast(int, ...)
itu624 = cast(int, ...)
smpte170m = cast(int, ...)
smpte240m = cast(int, ...)
default = cast(int, ...)
class ColorRange(IntEnum):
UNSPECIFIED = 0
MPEG = 1
JPEG = 2
NB = 3
class VideoReformatter:
def reformat(
self,
frame: VideoFrame,
width: int | None = None,
height: int | None = None,
format: str | None = None,
src_colorspace: int | None = None,
dst_colorspace: int | None = None,
interpolation: int | str | None = None,
src_color_range: int | str | None = None,
dst_color_range: int | str | None = None,
) -> VideoFrame: ...

View File

@@ -0,0 +1,222 @@
cimport libav as lib
from libc.stdint cimport uint8_t
from av.error cimport err_check
from av.video.format cimport VideoFormat
from av.video.frame cimport alloc_video_frame
from enum import IntEnum
class Interpolation(IntEnum):
FAST_BILINEAR: "Fast bilinear" = lib.SWS_FAST_BILINEAR
BILINEAR: "Bilinear" = lib.SWS_BILINEAR
BICUBIC: "Bicubic" = lib.SWS_BICUBIC
X: "Experimental" = lib.SWS_X
POINT: "Nearest neighbor / point" = lib.SWS_POINT
AREA: "Area averaging" = lib.SWS_AREA
BICUBLIN: "Luma bicubic / chroma bilinear" = lib.SWS_BICUBLIN
GAUSS: "Gaussian" = lib.SWS_GAUSS
SINC: "Sinc" = lib.SWS_SINC
LANCZOS: "Bicubic spline" = lib.SWS_LANCZOS
class Colorspace(IntEnum):
ITU709 = lib.SWS_CS_ITU709
FCC = lib.SWS_CS_FCC
ITU601 = lib.SWS_CS_ITU601
ITU624 = lib.SWS_CS_ITU624
SMPTE170M = lib.SWS_CS_SMPTE170M
SMPTE240M = lib.SWS_CS_SMPTE240M
DEFAULT = lib.SWS_CS_DEFAULT
# Lowercase for b/c.
itu709 = lib.SWS_CS_ITU709
fcc = lib.SWS_CS_FCC
itu601 = lib.SWS_CS_ITU601
itu624 = lib.SWS_CS_ITU624
smpte170m = lib.SWS_CS_SMPTE170M
smpte240m = lib.SWS_CS_SMPTE240M
default = lib.SWS_CS_DEFAULT
class ColorRange(IntEnum):
UNSPECIFIED: "Unspecified" = lib.AVCOL_RANGE_UNSPECIFIED
MPEG: "MPEG (limited) YUV range, 219*2^(n-8)" = lib.AVCOL_RANGE_MPEG
JPEG: "JPEG (full) YUV range, 2^n-1" = lib.AVCOL_RANGE_JPEG
NB: "Not part of ABI" = lib.AVCOL_RANGE_NB
def _resolve_enum_value(value, enum_class, default):
# Helper function to resolve enum values from different input types.
if value is None:
return default
if isinstance(value, enum_class):
return value.value
if isinstance(value, int):
return value
if isinstance(value, str):
return enum_class[value].value
raise ValueError(f"Cannot convert {value} to {enum_class.__name__}")
cdef class VideoReformatter:
"""An object for reformatting size and pixel format of :class:`.VideoFrame`.
It is most efficient to have a reformatter object for each set of parameters
you will use as calling :meth:`reformat` will reconfigure the internal object.
"""
def __dealloc__(self):
with nogil:
lib.sws_freeContext(self.ptr)
def reformat(self, VideoFrame frame not None, width=None, height=None,
format=None, src_colorspace=None, dst_colorspace=None,
interpolation=None, src_color_range=None,
dst_color_range=None):
"""Create a new :class:`VideoFrame` with the given width/height/format/colorspace.
Returns the same frame untouched if nothing needs to be done to it.
:param int width: New width, or ``None`` for the same width.
:param int height: New height, or ``None`` for the same height.
:param format: New format, or ``None`` for the same format.
:type format: :class:`.VideoFormat` or ``str``
:param src_colorspace: Current colorspace, or ``None`` for the frame colorspace.
:type src_colorspace: :class:`Colorspace` or ``str``
:param dst_colorspace: Desired colorspace, or ``None`` for the frame colorspace.
:type dst_colorspace: :class:`Colorspace` or ``str``
:param interpolation: The interpolation method to use, or ``None`` for ``BILINEAR``.
:type interpolation: :class:`Interpolation` or ``str``
:param src_color_range: Current color range, or ``None`` for the ``UNSPECIFIED``.
:type src_color_range: :class:`color range` or ``str``
:param dst_color_range: Desired color range, or ``None`` for the ``UNSPECIFIED``.
:type dst_color_range: :class:`color range` or ``str``
"""
cdef VideoFormat video_format = VideoFormat(format if format is not None else frame.format)
cdef int c_src_colorspace = _resolve_enum_value(src_colorspace, Colorspace, frame.colorspace)
cdef int c_dst_colorspace = _resolve_enum_value(dst_colorspace, Colorspace, frame.colorspace)
cdef int c_interpolation = _resolve_enum_value(interpolation, Interpolation, int(Interpolation.BILINEAR))
cdef int c_src_color_range = _resolve_enum_value(src_color_range, ColorRange, 0)
cdef int c_dst_color_range = _resolve_enum_value(dst_color_range, ColorRange, 0)
return self._reformat(
frame,
width or frame.ptr.width,
height or frame.ptr.height,
video_format.pix_fmt,
c_src_colorspace,
c_dst_colorspace,
c_interpolation,
c_src_color_range,
c_dst_color_range,
)
cdef _reformat(self, VideoFrame frame, int width, int height,
lib.AVPixelFormat dst_format, int src_colorspace,
int dst_colorspace, int interpolation,
int src_color_range, int dst_color_range):
if frame.ptr.format < 0:
raise ValueError("Frame does not have format set.")
# The definition of color range in pixfmt.h and swscale.h is different.
src_color_range = 1 if src_color_range == ColorRange.JPEG.value else 0
dst_color_range = 1 if dst_color_range == ColorRange.JPEG.value else 0
cdef lib.AVPixelFormat src_format = <lib.AVPixelFormat> frame.ptr.format
# Shortcut!
if (
dst_format == src_format and
width == frame.ptr.width and
height == frame.ptr.height and
dst_colorspace == src_colorspace and
src_color_range == dst_color_range
):
return frame
with nogil:
self.ptr = lib.sws_getCachedContext(
self.ptr,
frame.ptr.width,
frame.ptr.height,
src_format,
width,
height,
dst_format,
interpolation,
NULL,
NULL,
NULL
)
# We want to change the colorspace/color_range transforms.
# We do that by grabbing all of the current settings, changing a
# couple, and setting them all. We need a lot of state here.
cdef const int *inv_tbl
cdef const int *tbl
cdef int src_colorspace_range, dst_colorspace_range
cdef int brightness, contrast, saturation
cdef int ret
if src_colorspace != dst_colorspace or src_color_range != dst_color_range:
with nogil:
# Casts for const-ness, because Cython isn't expressive enough.
ret = lib.sws_getColorspaceDetails(
self.ptr,
<int**>&inv_tbl,
&src_colorspace_range,
<int**>&tbl,
&dst_colorspace_range,
&brightness,
&contrast,
&saturation
)
err_check(ret)
with nogil:
# Grab the coefficients for the requested transforms.
# The inv_table brings us to linear, and `tbl` to the new space.
if src_colorspace != lib.SWS_CS_DEFAULT:
inv_tbl = lib.sws_getCoefficients(src_colorspace)
if dst_colorspace != lib.SWS_CS_DEFAULT:
tbl = lib.sws_getCoefficients(dst_colorspace)
# Apply!
ret = lib.sws_setColorspaceDetails(
self.ptr,
inv_tbl,
src_color_range,
tbl,
dst_color_range,
brightness,
contrast,
saturation
)
err_check(ret)
# Create a new VideoFrame.
cdef VideoFrame new_frame = alloc_video_frame()
new_frame._copy_internal_attributes(frame)
new_frame._init(dst_format, width, height)
# Finally, scale the image.
with nogil:
lib.sws_scale(
self.ptr,
# Cast for const-ness, because Cython isn't expressive enough.
<const uint8_t**>frame.ptr.data,
frame.ptr.linesize,
0, # slice Y
frame.ptr.height,
new_frame.ptr.data,
new_frame.ptr.linesize,
)
return new_frame

View File

@@ -0,0 +1,9 @@
from av.packet cimport Packet
from av.stream cimport Stream
from .frame cimport VideoFrame
cdef class VideoStream(Stream):
cpdef encode(self, VideoFrame frame=?)
cpdef decode(self, Packet packet=?)

View File

@@ -0,0 +1,123 @@
import cython
from cython.cimports import libav as lib
from cython.cimports.av.packet import Packet
from cython.cimports.av.utils import avrational_to_fraction, to_avrational
from cython.cimports.av.video.frame import VideoFrame
@cython.cclass
class VideoStream(Stream):
def __repr__(self):
return (
f"<av.VideoStream #{self.index} {self.name}, "
f"{self.format.name if self.format else None} {self.codec_context.width}x"
f"{self.codec_context.height} at 0x{id(self):x}>"
)
def __getattr__(self, name):
if name in ("framerate", "rate"):
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{name}'"
)
return getattr(self.codec_context, name)
@cython.ccall
def encode(self, frame: VideoFrame | None = None):
"""
Encode an :class:`.VideoFrame` and return a list of :class:`.Packet`.
:rtype: list[Packet]
.. seealso:: This is mostly a passthrough to :meth:`.CodecContext.encode`.
"""
packets = self.codec_context.encode(frame)
packet: Packet
for packet in packets:
packet._stream = self
packet.ptr.stream_index = self.ptr.index
return packets
@cython.ccall
def decode(self, packet: Packet | None = None):
"""
Decode a :class:`.Packet` and return a list of :class:`.VideoFrame`.
:rtype: list[VideoFrame]
.. seealso:: This is a passthrough to :meth:`.CodecContext.decode`.
"""
return self.codec_context.decode(packet)
@property
def average_rate(self):
"""
The average frame rate of this video stream.
This is calculated when the file is opened by looking at the first
few frames and averaging their rate.
:type: fractions.Fraction | None
"""
return avrational_to_fraction(cython.address(self.ptr.avg_frame_rate))
@property
def base_rate(self):
"""
The base frame rate of this stream.
This is calculated as the lowest framerate at which the timestamps of
frames can be represented accurately. See :ffmpeg:`AVStream.r_frame_rate`
for more.
:type: fractions.Fraction | None
"""
return avrational_to_fraction(cython.address(self.ptr.r_frame_rate))
@property
def guessed_rate(self):
"""The guessed frame rate of this stream.
This is a wrapper around :ffmpeg:`av_guess_frame_rate`, and uses multiple
heuristics to decide what is "the" frame rate.
:type: fractions.Fraction | None
"""
val: lib.AVRational = lib.av_guess_frame_rate(
cython.NULL, self.ptr, cython.NULL
)
return avrational_to_fraction(cython.address(val))
@property
def sample_aspect_ratio(self):
"""The guessed sample aspect ratio (SAR) of this stream.
This is a wrapper around :ffmpeg:`av_guess_sample_aspect_ratio`, and uses multiple
heuristics to decide what is "the" sample aspect ratio.
:type: fractions.Fraction | None
"""
sar: lib.AVRational = lib.av_guess_sample_aspect_ratio(
self.container.ptr, self.ptr, cython.NULL
)
return avrational_to_fraction(cython.address(sar))
@property
def display_aspect_ratio(self):
"""The guessed display aspect ratio (DAR) of this stream.
This is calculated from :meth:`.VideoStream.guessed_sample_aspect_ratio`.
:type: fractions.Fraction | None
"""
dar = cython.declare(lib.AVRational)
lib.av_reduce(
cython.address(dar.num),
cython.address(dar.den),
self.format.width * self.sample_aspect_ratio.num,
self.format.height * self.sample_aspect_ratio.den,
1024 * 1024,
)
return avrational_to_fraction(cython.address(dar))

View File

@@ -0,0 +1,43 @@
from fractions import Fraction
from typing import Iterator, Literal
from av.codec.context import ThreadType
from av.packet import Packet
from av.stream import Stream
from .codeccontext import VideoCodecContext
from .format import VideoFormat
from .frame import VideoFrame
class VideoStream(Stream):
bit_rate: int | None
max_bit_rate: int | None
bit_rate_tolerance: int
sample_aspect_ratio: Fraction | None
display_aspect_ratio: Fraction | None
codec_context: VideoCodecContext
def encode(self, frame: VideoFrame | None = None) -> list[Packet]: ...
def encode_lazy(self, frame: VideoFrame | None = None) -> Iterator[Packet]: ...
def decode(self, packet: Packet | None = None) -> list[VideoFrame]: ...
# from codec context
format: VideoFormat
thread_count: int
thread_type: ThreadType
width: int
height: int
bits_per_coded_sample: int
pix_fmt: str | None
framerate: Fraction
rate: Fraction
gop_size: int
has_b_frames: bool
max_b_frames: int
coded_width: int
coded_height: int
color_range: int
color_primaries: int
color_trc: int
colorspace: int
type: Literal["video"]