From 3ea321f8b7e7873ff04882a9734c9f25186b4771 Mon Sep 17 00:00:00 2001 From: Ruwen Hahn Date: Mon, 20 Oct 2025 15:09:40 +0200 Subject: [PATCH] Add support for hevc and av1 to WebVTT-in-video-stream (#204) * Squashed 'deps/c-webvtt-in-video-stream/' changes from 5579ca6..3d44cbd 3d44cbd Add h265 support 7f96bcf Extract various bits of h264 functionality to prepare for h265 support 698793a Prepare `h264::NalUnitWrite` for reuse with h265 git-subtree-dir: deps/c-webvtt-in-video-stream git-subtree-split: 3d44cbd5039e3ea247972f44f73a66e08cea6e71 * Add h265 support * Squashed 'deps/c-webvtt-in-video-stream/' changes from 3d44cbd5..d599f6f0 d599f6f0 Add av1 support 11693f6a Extract header writing semantics e7eb1894 Convert h264/h265 header writing to bitwriters git-subtree-dir: deps/c-webvtt-in-video-stream git-subtree-split: d599f6f0142e24ba9a7daeee252da9f055aa39c4 * Add av1 support * Fix subtitle track name lookup --- deps/c-webvtt-in-video-stream/Cargo.lock | 9 +- deps/c-webvtt-in-video-stream/src/lib.rs | 51 +- .../video-bytestream-tools/Cargo.toml | 1 + .../video-bytestream-tools/src/av1.rs | 337 ++++++++++++++ .../video-bytestream-tools/src/h264.rs | 209 +++------ .../src/h264/annex_b.rs | 66 +-- .../video-bytestream-tools/src/h264/avcc.rs | 36 +- .../video-bytestream-tools/src/h265.rs | 437 ++++++++++++++++++ .../src/h265/annex_b.rs | 78 ++++ .../video-bytestream-tools/src/h26x.rs | 130 ++++++ .../src/h26x/annex_b.rs | 92 ++++ .../video-bytestream-tools/src/lib.rs | 3 + .../video-bytestream-tools/src/webvtt.rs | 33 +- src/transcription-filter-callbacks.cpp | 6 +- 14 files changed, 1264 insertions(+), 224 deletions(-) create mode 100644 deps/c-webvtt-in-video-stream/video-bytestream-tools/src/av1.rs create mode 100644 deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265.rs create mode 100644 deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265/annex_b.rs create mode 100644 deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x.rs create mode 100644 deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x/annex_b.rs diff --git a/deps/c-webvtt-in-video-stream/Cargo.lock b/deps/c-webvtt-in-video-stream/Cargo.lock index b16183c..274fe23 100644 --- a/deps/c-webvtt-in-video-stream/Cargo.lock +++ b/deps/c-webvtt-in-video-stream/Cargo.lock @@ -63,6 +63,12 @@ version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e445576659fd04a57b44cbd00aa37aaa815ebefa0aa3cb677a6b5e63d883074f" +[[package]] +name = "bitstream-io" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6099cdc01846bc367c4e7dd630dc5966dccf36b652fae7a74e17b640411a91b2" + [[package]] name = "byteorder" version = "1.5.0" @@ -172,7 +178,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd118dcc322cc71cfc33254a19ebece92cfaaf6d4b4793fec3f7f44fbc4150df" dependencies = [ - "bitstream-io", + "bitstream-io 1.10.0", "hex-slice", "log", "memchr", @@ -484,6 +490,7 @@ checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" name = "video-bytestream-tools" version = "0.1.0" dependencies = [ + "bitstream-io 2.6.0", "byteorder", "h264-reader", "thiserror", diff --git a/deps/c-webvtt-in-video-stream/src/lib.rs b/deps/c-webvtt-in-video-stream/src/lib.rs index 07ad373..0665ed0 100644 --- a/deps/c-webvtt-in-video-stream/src/lib.rs +++ b/deps/c-webvtt-in-video-stream/src/lib.rs @@ -5,7 +5,10 @@ use std::{ }; use strum_macros::FromRepr; use video_bytestream_tools::{ - h264::{self, H264ByteStreamWrite, NalHeader, NalUnitWrite, RbspWrite}, + av1, + h264::{self, H264ByteStreamWrite, H264NalHeader}, + h265::{self, H265ByteStreamWrite, H265NalHeader}, + h26x::{NalUnitWrite, RbspWrite}, webvtt::WebvttWrite, }; use webvtt_in_video_stream::{WebvttMuxer, WebvttMuxerBuilder, WebvttString}; @@ -103,6 +106,8 @@ enum CodecFlavor { H264Avcc2, H264Avcc4, H264AnnexB, + H265AnnexB, + AV1OBUs, } impl CodecFlavor { @@ -112,6 +117,8 @@ impl CodecFlavor { CodecFlavor::H264Avcc2 => CodecFlavorInternal::H264(CodecFlavorH264::Avcc(2)), CodecFlavor::H264Avcc4 => CodecFlavorInternal::H264(CodecFlavorH264::Avcc(4)), CodecFlavor::H264AnnexB => CodecFlavorInternal::H264(CodecFlavorH264::AnnexB), + CodecFlavor::H265AnnexB => CodecFlavorInternal::H265(CodecFlavorH265::AnnexB), + CodecFlavor::AV1OBUs => CodecFlavorInternal::AV1, } } } @@ -121,8 +128,14 @@ enum CodecFlavorH264 { AnnexB, } +enum CodecFlavorH265 { + AnnexB, +} + enum CodecFlavorInternal { H264(CodecFlavorH264), + H265(CodecFlavorH265), + AV1, } pub struct WebvttBuffer(Vec); @@ -150,8 +163,13 @@ pub extern "C" fn webvtt_muxer_try_mux_into_bytestream( Ok(true) } - fn create_nal_header() -> NalHeader { - NalHeader::from_nal_unit_type_and_nal_ref_idc(h264_reader::nal::UnitType::SEI, 0).unwrap() + fn create_nal_header() -> H264NalHeader { + H264NalHeader::from_nal_unit_type_and_nal_ref_idc(h264_reader::nal::UnitType::SEI, 0) + .unwrap() + } + + fn create_h265_nal_header() -> H265NalHeader { + H265NalHeader::from_nal_unit_type_and_nuh_ids(h265::UnitType::PrefixSeiNut, 0, 0).unwrap() } fn inner( @@ -197,6 +215,33 @@ pub extern "C" fn webvtt_muxer_try_mux_into_bytestream( }, ) .ok()?, + + CodecFlavorInternal::H265(CodecFlavorH265::AnnexB) => mux_into_bytestream( + muxer, + video_timestamp, + add_header, + &mut buffer, + |buffer| -> Result, Box> { + Ok(h265::annex_b::AnnexBWriter::new(buffer) + .start_write_nal_unit()? + .write_nal_header(create_h265_nal_header())?) + }, + |write| { + write.finish_rbsp()?; + Ok(()) + }, + ) + .ok()?, + + CodecFlavorInternal::AV1 => mux_into_bytestream( + muxer, + video_timestamp, + add_header, + &mut buffer, + |buffer| Ok(av1::OBUWriter::new(buffer)), + |_write| Ok(()), + ) + .ok()?, }; if !data_written { return None; diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/Cargo.toml b/deps/c-webvtt-in-video-stream/video-bytestream-tools/Cargo.toml index 1ebc56e..c3592b2 100644 --- a/deps/c-webvtt-in-video-stream/video-bytestream-tools/Cargo.toml +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/Cargo.toml @@ -4,6 +4,7 @@ name = "video-bytestream-tools" version = "0.1.0" [dependencies] +bitstream-io = "2.6.0" byteorder = "1.5.0" h264-reader = "0.7.0" thiserror = "2.0.4" diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/av1.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/av1.rs new file mode 100644 index 0000000..6e4d791 --- /dev/null +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/av1.rs @@ -0,0 +1,337 @@ +use crate::webvtt::{ + write_webvtt_header, write_webvtt_payload, CountingSink, WebvttTrack, WebvttWrite, +}; +use bitstream_io::{BigEndian, BitWrite, BitWriter}; +use byteorder::WriteBytesExt; +use std::{ + io::{Cursor, Write}, + time::Duration, +}; + +type Result = std::result::Result; + +pub trait WriteLeb128Ext: BitWrite { + fn write_leb128(&mut self, mut val: u32) -> std::io::Result<()> { + loop { + let bits = u8::try_from(val & 0b111_1111).unwrap(); + val >>= 7; + self.write_bit(val != 0)?; + self.write(7, bits)?; + if val == 0 { + break; + } + } + Ok(()) + } +} + +impl WriteLeb128Ext for W {} + +#[derive(Debug, Clone, Copy)] +pub struct OBUHeaderWithSize { + obu_type: OBUType, + obu_size: Option, + obu_extension_header: Option, +} + +#[derive(Debug, Clone, Copy)] +pub enum OBUType { + Reserved0, + SequenceHeader, + TemporalDelimiter, + FrameHeader, + TileGroup, + Metadata, + Frame, + RedundantFrameHeader, + TileList, + Reserved9, + Reserved10, + Reserved11, + Reserved12, + Reserved13, + Reserved14, + Padding, +} + +#[derive(Debug, Clone, Copy)] +pub enum MetadataType { + ReservedForAOMUse, + HdrCll, + HdrMdcv, + Scalability, + ItutT35, + Timecode, + UnregisteredPrivate6, + UnregisteredPrivate7, + UnregisteredPrivate8, + UnregisteredPrivate9, + UnregisteredPrivate10, + UnregisteredPrivate11, + UnregisteredPrivate12, + UnregisteredPrivate13, + UnregisteredPrivate14, + UnregisteredPrivate15, + UnregisteredPrivate16, + UnregisteredPrivate17, + UnregisteredPrivate18, + UnregisteredPrivate19, + UnregisteredPrivate20, + UnregisteredPrivate21, + UnregisteredPrivate22, + UnregisteredPrivate23, + UnregisteredPrivate24, + UnregisteredPrivate25, + UnregisteredPrivate26, + UnregisteredPrivate27, + UnregisteredPrivate28, + UnregisteredPrivate29, + UnregisteredPrivate30, + UnregisteredPrivate31, +} + +#[derive(Debug, Clone, Copy)] +pub struct OBUExtensionHeader { + temporal_id: u8, + spatial_id: u8, +} + +impl OBUHeaderWithSize { + pub fn new( + obu_type: OBUType, + obu_size: Option, + obu_extension_header: Option, + ) -> Self { + Self { + obu_type, + obu_size, + obu_extension_header, + } + } + + fn as_header_bytes(self, buffer: &mut [u8; 10]) -> Result<&[u8]> { + let mut cursor = Cursor::new(&mut buffer[..]); + let mut writer = BitWriter::endian(&mut cursor, BigEndian); + writer.write(1, 0)?; + writer.write(4, self.obu_type.id())?; + writer.write_bit(self.obu_extension_header.is_some())?; + writer.write_bit(self.obu_size.is_some())?; + writer.write(1, 0)?; + if let Some(extension_header) = self.obu_extension_header { + writer.write(3, extension_header.temporal_id)?; + writer.write(2, extension_header.spatial_id)?; + writer.write(3, 0)?; + } + if let Some(size) = self.obu_size { + writer.write_leb128(size)?; + } + assert!(writer.into_unwritten() == (0, 0)); + let written = usize::try_from(cursor.position()).unwrap(); + Ok(&buffer[..written]) + } +} + +impl OBUType { + pub fn id(self) -> u8 { + match self { + OBUType::Reserved0 => 0, + OBUType::SequenceHeader => 1, + OBUType::TemporalDelimiter => 2, + OBUType::FrameHeader => 3, + OBUType::TileGroup => 4, + OBUType::Metadata => 5, + OBUType::Frame => 6, + OBUType::RedundantFrameHeader => 7, + OBUType::TileList => 8, + OBUType::Reserved9 => 9, + OBUType::Reserved10 => 10, + OBUType::Reserved11 => 11, + OBUType::Reserved12 => 12, + OBUType::Reserved13 => 13, + OBUType::Reserved14 => 14, + OBUType::Padding => 15, + } + } +} + +impl MetadataType { + fn id(self) -> u32 { + match self { + MetadataType::ReservedForAOMUse => 0, + MetadataType::HdrCll => 1, + MetadataType::HdrMdcv => 2, + MetadataType::Scalability => 3, + MetadataType::ItutT35 => 4, + MetadataType::Timecode => 5, + MetadataType::UnregisteredPrivate6 => 6, + MetadataType::UnregisteredPrivate7 => 7, + MetadataType::UnregisteredPrivate8 => 8, + MetadataType::UnregisteredPrivate9 => 9, + MetadataType::UnregisteredPrivate10 => 10, + MetadataType::UnregisteredPrivate11 => 11, + MetadataType::UnregisteredPrivate12 => 12, + MetadataType::UnregisteredPrivate13 => 13, + MetadataType::UnregisteredPrivate14 => 14, + MetadataType::UnregisteredPrivate15 => 15, + MetadataType::UnregisteredPrivate16 => 16, + MetadataType::UnregisteredPrivate17 => 17, + MetadataType::UnregisteredPrivate18 => 18, + MetadataType::UnregisteredPrivate19 => 19, + MetadataType::UnregisteredPrivate20 => 20, + MetadataType::UnregisteredPrivate21 => 21, + MetadataType::UnregisteredPrivate22 => 22, + MetadataType::UnregisteredPrivate23 => 23, + MetadataType::UnregisteredPrivate24 => 24, + MetadataType::UnregisteredPrivate25 => 25, + MetadataType::UnregisteredPrivate26 => 26, + MetadataType::UnregisteredPrivate27 => 27, + MetadataType::UnregisteredPrivate28 => 28, + MetadataType::UnregisteredPrivate29 => 29, + MetadataType::UnregisteredPrivate30 => 30, + MetadataType::UnregisteredPrivate31 => 31, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum OBUExtensionHeaderError { + TemporalIdOutOfRange(u8), + SpatialIdOutOfRange(u8), +} + +impl OBUExtensionHeader { + pub fn new(temporal_id: u8, spatial_id: u8) -> Result { + if temporal_id > 0b111 { + return Err(OBUExtensionHeaderError::TemporalIdOutOfRange(temporal_id)); + } + if spatial_id > 0b11 { + return Err(OBUExtensionHeaderError::SpatialIdOutOfRange(spatial_id)); + } + Ok(Self { + temporal_id, + spatial_id, + }) + } +} + +pub struct OBUWriter(W); + +impl OBUWriter { + pub fn new(inner: W) -> Self { + Self(inner) + } +} + +impl OBUWriter { + fn write_obu_header(&mut self, obu_header: OBUHeaderWithSize) -> Result<()> { + let mut buffer = [0u8; 10]; + let header_bytes = obu_header.as_header_bytes(&mut buffer)?; + self.0.write_all(header_bytes) + } + + fn finish_payload(&mut self) -> Result<()> { + self.0.write_u8(0b1000_0000) + } +} + +impl WebvttWrite for OBUWriter { + fn write_webvtt_header( + &mut self, + max_latency_to_video: Duration, + send_frequency_hz: u8, + subtitle_tracks: &[WebvttTrack], + ) -> std::io::Result<()> { + fn inner( + writer: &mut W, + max_latency_to_video: Duration, + send_frequency_hz: u8, + subtitle_tracks: &[WebvttTrack], + ) -> std::io::Result<()> { + write_webvtt_header( + writer, + max_latency_to_video, + send_frequency_hz, + subtitle_tracks, + |write, _size| { + let mut writer = BitWriter::endian(write, BigEndian); + writer.write_leb128(MetadataType::UnregisteredPrivate6.id()) + }, + ) + } + let mut count = CountingSink::new(); + inner( + &mut count, + max_latency_to_video, + send_frequency_hz, + subtitle_tracks, + )?; + let header = OBUHeaderWithSize::new( + OBUType::Metadata, + Some(u32::try_from(count.count()).unwrap() + 1), + None, + ); + self.write_obu_header(header)?; + inner( + &mut self.0, + max_latency_to_video, + send_frequency_hz, + subtitle_tracks, + )?; + self.finish_payload() + } + + fn write_webvtt_payload( + &mut self, + track_index: u8, + chunk_number: u64, + chunk_version: u8, + video_offset: Duration, + webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs + ) -> std::io::Result<()> { + fn inner( + writer: &mut W, + track_index: u8, + chunk_number: u64, + chunk_version: u8, + video_offset: Duration, + webvtt_payload: &str, + ) -> std::io::Result<()> { + write_webvtt_payload( + writer, + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + |write, _size| { + let mut writer = BitWriter::endian(write, BigEndian); + writer.write_leb128(MetadataType::UnregisteredPrivate6.id()) + }, + ) + } + let mut count = CountingSink::new(); + inner( + &mut count, + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + )?; + let header = OBUHeaderWithSize::new( + OBUType::Metadata, + Some(u32::try_from(count.count()).unwrap() + 1), + None, + ); + self.write_obu_header(header)?; + inner( + &mut self.0, + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + )?; + self.finish_payload() + } +} diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264.rs index 91f33c0..f0c2672 100644 --- a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264.rs +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264.rs @@ -1,34 +1,24 @@ -use crate::webvtt::{write_webvtt_header, write_webvtt_payload, WebvttTrack, WebvttWrite}; -use byteorder::WriteBytesExt; +use crate::{ + h26x::{annex_b::WriteNalHeader, NalUnitWrite, NalUnitWriter, RbspWrite, RbspWriter}, + webvtt::{WebvttTrack, WebvttWrite}, +}; +use bitstream_io::{BigEndian, BitWrite, BitWriter}; use h264_reader::nal::UnitType; -use std::{collections::VecDeque, io::Write, time::Duration}; +use std::{io::Write, time::Duration}; type Result = std::result::Result; pub mod annex_b; pub mod avcc; -pub trait H264ByteStreamWrite { - type Writer: NalUnitWrite; - fn start_write_nal_unit(self) -> Result; -} - -impl H264ByteStreamWrite for W { - type Writer = NalUnitWriter; - - fn start_write_nal_unit(self) -> Result { - Ok(NalUnitWriter::new(self)) - } -} - #[derive(Debug, Clone, Copy)] -pub struct NalHeader { +pub struct H264NalHeader { nal_unit_type: UnitType, nal_ref_idc: u8, } #[derive(Debug, Clone, Copy)] -pub enum NalHeaderError { +pub enum H264NalHeaderError { NalRefIdcOutOfRange(u8), InvalidNalRefIdcForNalUnitType { nal_unit_type: UnitType, @@ -37,24 +27,24 @@ pub enum NalHeaderError { NalUnitTypeOutOfRange(UnitType), } -impl NalHeader { +impl H264NalHeader { pub fn from_nal_unit_type_and_nal_ref_idc( nal_unit_type: UnitType, nal_ref_idc: u8, - ) -> Result { + ) -> Result { if nal_ref_idc >= 4 { - return Err(NalHeaderError::NalRefIdcOutOfRange(nal_ref_idc)); + return Err(H264NalHeaderError::NalRefIdcOutOfRange(nal_ref_idc)); } match nal_unit_type.id() { - 0 => Err(NalHeaderError::NalUnitTypeOutOfRange(nal_unit_type)), + 0 => Err(H264NalHeaderError::NalUnitTypeOutOfRange(nal_unit_type)), 6 | 9 | 10 | 11 | 12 => { if nal_ref_idc == 0 { - Ok(NalHeader { + Ok(Self { nal_unit_type, nal_ref_idc, }) } else { - Err(NalHeaderError::InvalidNalRefIdcForNalUnitType { + Err(H264NalHeaderError::InvalidNalRefIdcForNalUnitType { nal_unit_type, nal_ref_idc, }) @@ -62,164 +52,85 @@ impl NalHeader { } 5 => { if nal_ref_idc != 0 { - Ok(NalHeader { + Ok(Self { nal_unit_type, nal_ref_idc, }) } else { - Err(NalHeaderError::InvalidNalRefIdcForNalUnitType { + Err(H264NalHeaderError::InvalidNalRefIdcForNalUnitType { nal_unit_type, nal_ref_idc, }) } } - 32.. => Err(NalHeaderError::NalUnitTypeOutOfRange(nal_unit_type)), - _ => Ok(NalHeader { + 32.. => Err(H264NalHeaderError::NalUnitTypeOutOfRange(nal_unit_type)), + _ => Ok(Self { nal_unit_type, nal_ref_idc, }), } } - fn as_header_byte(&self) -> u8 { - self.nal_ref_idc << 5 | self.nal_unit_type.id() + fn as_header_bytes(&self) -> Result<[u8; 1]> { + let mut output = [0u8]; + let mut writer = BitWriter::endian(&mut output[..], BigEndian); + writer.write(1, 0)?; + writer.write(2, self.nal_ref_idc)?; + writer.write(5, self.nal_unit_type.id())?; + assert!(writer.into_unwritten() == (0, 0)); + Ok(output) } } -pub struct NalUnitWriter { - inner: W, -} - -pub trait NalUnitWrite { - type Writer: RbspWrite; - fn write_nal_header(self, nal_header: NalHeader) -> Result; -} - -impl NalUnitWriter { - fn new(inner: W) -> Self { - Self { inner } +impl WriteNalHeader for H264NalHeader { + fn write_to(self, writer: &mut W) -> crate::h26x::Result<()> { + writer.write_all(&self.as_header_bytes()?[..]) } } -impl NalUnitWrite for NalUnitWriter { - type Writer = RbspWriter; +pub trait H264ByteStreamWrite { + type Writer: NalUnitWrite; + fn start_write_nal_unit(self) -> Result; +} - fn write_nal_header(mut self, nal_header: NalHeader) -> Result> { - self.inner.write_u8(nal_header.as_header_byte())?; - Ok(RbspWriter::new(self.inner)) +impl H264ByteStreamWrite for W { + type Writer = H264NalUnitWriter; + + fn start_write_nal_unit(self) -> Result { + Ok(H264NalUnitWriter(NalUnitWriter::new(self))) } } -pub struct RbspWriter { - last_written: VecDeque, - inner: W, -} +pub struct H264NalUnitWriter(NalUnitWriter); +pub struct H264RbspWriter(RbspWriter); -pub trait RbspWrite { - type Writer: H264ByteStreamWrite; - fn finish_rbsp(self) -> Result; -} +impl NalUnitWrite for H264NalUnitWriter { + type Writer = H264RbspWriter; + type NalHeader = H264NalHeader; -impl RbspWriter { - pub fn new(inner: W) -> Self { - Self { - last_written: VecDeque::with_capacity(3), - inner, - } + fn write_nal_header(mut self, nal_header: H264NalHeader) -> Result> { + self.0.inner.write_all(&nal_header.as_header_bytes()?[..])?; + Ok(H264RbspWriter(RbspWriter::new(self.0.inner))) } } -impl RbspWrite for RbspWriter { +impl RbspWrite for H264RbspWriter { type Writer = W; - fn finish_rbsp(mut self) -> Result { - self.write_u8(0x80)?; - Ok(self.inner) + + fn finish_rbsp(self) -> crate::h26x::Result { + self.0.finish_rbsp() } } -impl Write for RbspWriter { - fn write(&mut self, buf: &[u8]) -> Result { - let mut written = 0; - for &byte in buf { - let mut last_written_iter = self.last_written.iter(); - if last_written_iter.next() == Some(&0) - && last_written_iter.next() == Some(&0) - && (byte == 0 || byte == 1 || byte == 2 || byte == 3) - { - self.inner.write_u8(3)?; - self.last_written.clear(); - } - self.inner.write_u8(byte)?; - written += 1; - self.last_written.push_back(byte); - if self.last_written.len() > 2 { - self.last_written.pop_front(); - } - } - Ok(written) - } - - fn flush(&mut self) -> Result<()> { - self.inner.flush() - } -} - -pub(crate) struct CountingSink { - count: usize, -} - -impl CountingSink { - pub fn new() -> Self { - Self { count: 0 } - } - - pub fn count(&self) -> usize { - self.count - } -} - -impl Write for CountingSink { - fn write(&mut self, buf: &[u8]) -> Result { - self.count += buf.len(); - Ok(buf.len()) - } - - fn flush(&mut self) -> Result<()> { - Ok(()) - } -} - -pub(crate) fn write_sei_header( - writer: &mut W, - mut payload_type: usize, - mut payload_size: usize, -) -> std::io::Result<()> { - while payload_type >= 255 { - writer.write_u8(255)?; - payload_type -= 255; - } - writer.write_u8(payload_type.try_into().unwrap())?; - while payload_size >= 255 { - writer.write_u8(255)?; - payload_size -= 255; - } - writer.write_u8(payload_size.try_into().unwrap())?; - Ok(()) -} - -impl WebvttWrite for RbspWriter { +impl WebvttWrite for H264RbspWriter { fn write_webvtt_header( &mut self, max_latency_to_video: Duration, send_frequency_hz: u8, subtitle_tracks: &[WebvttTrack], ) -> std::io::Result<()> { - write_webvtt_header( - self, - max_latency_to_video, - send_frequency_hz, - subtitle_tracks, - ) + self.0 + .write_webvtt_header(max_latency_to_video, send_frequency_hz, subtitle_tracks) } fn write_webvtt_payload( @@ -230,8 +141,7 @@ impl WebvttWrite for RbspWriter { video_offset: Duration, webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs ) -> std::io::Result<()> { - write_webvtt_payload( - self, + self.0.write_webvtt_payload( track_index, chunk_number, chunk_version, @@ -244,7 +154,8 @@ impl WebvttWrite for RbspWriter { #[cfg(test)] mod tests { use crate::{ - h264::{NalHeader, NalUnitWrite, NalUnitWriter, RbspWrite}, + h264::{H264NalHeader, H264NalUnitWriter}, + h26x::{NalUnitWrite, NalUnitWriter, RbspWrite}, webvtt::{WebvttWrite, PAYLOAD_GUID, USER_DATA_UNREGISTERED}, }; use byteorder::{BigEndian, ReadBytesExt}; @@ -255,11 +166,11 @@ mod tests { fn check_webvtt_sei() { let mut writer = vec![]; - let nalu_writer = NalUnitWriter::new(&mut writer); + let nalu_writer = H264NalUnitWriter(NalUnitWriter::new(&mut writer)); let nal_unit_type = h264_reader::nal::UnitType::SEI; let nal_ref_idc = 0; let nal_header = - NalHeader::from_nal_unit_type_and_nal_ref_idc(nal_unit_type, nal_ref_idc).unwrap(); + H264NalHeader::from_nal_unit_type_and_nal_ref_idc(nal_unit_type, nal_ref_idc).unwrap(); let mut payload_writer = nalu_writer.write_nal_header(nal_header).unwrap(); let track_index = 0; let chunk_number = 1; @@ -308,11 +219,11 @@ mod tests { fn check_webvtt_multi_sei() { let mut writer = vec![]; - let nalu_writer = NalUnitWriter::new(&mut writer); + let nalu_writer = H264NalUnitWriter(NalUnitWriter::new(&mut writer)); let nal_unit_type = h264_reader::nal::UnitType::SEI; let nal_ref_idc = 0; let nal_header = - NalHeader::from_nal_unit_type_and_nal_ref_idc(nal_unit_type, nal_ref_idc).unwrap(); + H264NalHeader::from_nal_unit_type_and_nal_ref_idc(nal_unit_type, nal_ref_idc).unwrap(); let mut payload_writer = nalu_writer.write_nal_header(nal_header).unwrap(); let track_index = 0; let chunk_number = 1; diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/annex_b.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/annex_b.rs index 09b0e66..d33f72a 100644 --- a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/annex_b.rs +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/annex_b.rs @@ -1,72 +1,50 @@ -use super::{ - H264ByteStreamWrite, NalHeader, NalUnitWrite, NalUnitWriter, RbspWrite, RbspWriter, Result, +use crate::{ + h264::{H264ByteStreamWrite, H264NalHeader}, + h26x::{ + annex_b::{ + AnnexBNalUnitWriter as AnnexBNalUnitWriterImpl, + AnnexBRbspWriter as AnnexBRbspWriterImpl, AnnexBWriter as AnnexBWriterImpl, + }, + NalUnitWrite, RbspWrite, Result, + }, + webvtt::{WebvttTrack, WebvttWrite}, }; -use crate::webvtt::{WebvttTrack, WebvttWrite}; -use byteorder::WriteBytesExt; use std::{io::Write, time::Duration}; -pub struct AnnexBWriter { - leading_zero_8bits_written: bool, - inner: W, -} +pub struct AnnexBWriter(AnnexBWriterImpl); impl AnnexBWriter { pub fn new(inner: W) -> Self { - Self { - leading_zero_8bits_written: false, - inner, - } + Self(AnnexBWriterImpl::new(inner)) } } impl H264ByteStreamWrite for AnnexBWriter { type Writer = AnnexBNalUnitWriter; - fn start_write_nal_unit(mut self) -> Result> { - if !self.leading_zero_8bits_written { - self.inner.write_u8(0)?; - self.leading_zero_8bits_written = true; - } - self.inner.write_all(&[0, 0, 1])?; - Ok(AnnexBNalUnitWriter { - inner: NalUnitWriter::new(self.inner), - }) + fn start_write_nal_unit(self) -> Result> { + self.0.start_write_nal_unit().map(AnnexBNalUnitWriter) } } -pub struct AnnexBNalUnitWriter { - inner: NalUnitWriter, -} - -impl AnnexBNalUnitWriter { - fn _nal_unit_writer(&mut self) -> &mut NalUnitWriter { - &mut self.inner - } -} +pub struct AnnexBNalUnitWriter(AnnexBNalUnitWriterImpl); impl NalUnitWrite for AnnexBNalUnitWriter { type Writer = AnnexBRbspWriter; + type NalHeader = H264NalHeader; - fn write_nal_header(self, nal_header: NalHeader) -> Result> { - self.inner - .write_nal_header(nal_header) - .map(|inner| AnnexBRbspWriter { inner }) + fn write_nal_header(self, nal_header: Self::NalHeader) -> Result> { + self.0.write_nal_header(nal_header).map(AnnexBRbspWriter) } } -pub struct AnnexBRbspWriter { - inner: RbspWriter, -} - -impl AnnexBRbspWriter {} +pub struct AnnexBRbspWriter(AnnexBRbspWriterImpl); impl RbspWrite for AnnexBRbspWriter { type Writer = AnnexBWriter; fn finish_rbsp(self) -> Result { - self.inner - .finish_rbsp() - .map(|writer| AnnexBWriter::new(writer)) + self.0.finish_rbsp().map(AnnexBWriter) } } @@ -77,7 +55,7 @@ impl WebvttWrite for AnnexBRbspWriter { send_frequency_hz: u8, subtitle_tracks: &[WebvttTrack], ) -> std::io::Result<()> { - self.inner + self.0 .write_webvtt_header(max_latency_to_video, send_frequency_hz, subtitle_tracks) } @@ -89,7 +67,7 @@ impl WebvttWrite for AnnexBRbspWriter { video_offset: Duration, webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs ) -> std::io::Result<()> { - self.inner.write_webvtt_payload( + self.0.write_webvtt_payload( track_index, chunk_number, chunk_version, diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/avcc.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/avcc.rs index 672b2e4..a6f4716 100644 --- a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/avcc.rs +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h264/avcc.rs @@ -1,11 +1,14 @@ -use super::{ - H264ByteStreamWrite, NalHeader, NalUnitWrite, NalUnitWriter, RbspWrite, RbspWriter, Result, +use crate::{ + h264::{H264ByteStreamWrite, H264NalHeader}, + h26x::{NalUnitWrite, NalUnitWriter, RbspWrite, Result}, + webvtt::{WebvttTrack, WebvttWrite}, }; -use crate::webvtt::{WebvttTrack, WebvttWrite}; use byteorder::{BigEndian, WriteBytesExt}; use std::{io::Write, time::Duration}; use thiserror::Error; +use super::{H264NalUnitWriter, H264RbspWriter}; + const AVCC_MAX_LENGTH: [usize; 4] = [0xff, 0xff_ff, 0, 0xff_ff_ff_ff]; pub struct AVCCWriter { @@ -38,7 +41,7 @@ impl H264ByteStreamWrite for AVCCWriter { fn start_write_nal_unit(self) -> Result>> { Ok(AVCCNalUnitWriter { - inner: NalUnitWriter::new(AVCCWriterBuffer::new(self)), + inner: H264NalUnitWriter(NalUnitWriter::new(AVCCWriterBuffer::new(self))), }) } } @@ -88,37 +91,28 @@ impl Write for AVCCWriterBuffer { } pub struct AVCCNalUnitWriter { - inner: NalUnitWriter, -} - -impl AVCCNalUnitWriter { - fn _nal_unit_writer(&mut self) -> &mut NalUnitWriter { - &mut self.inner - } + inner: H264NalUnitWriter, } impl NalUnitWrite for AVCCNalUnitWriter> { type Writer = AVCCRbspWriter>; + type NalHeader = H264NalHeader; fn write_nal_header( self, - nal_header: NalHeader, + nal_header: Self::NalHeader, ) -> Result>> { - self.inner - .write_nal_header(nal_header) - .map(|inner| AVCCRbspWriter { inner }) + self.inner.write_nal_header(nal_header).map(AVCCRbspWriter) } } -pub struct AVCCRbspWriter { - inner: RbspWriter, -} +pub struct AVCCRbspWriter(H264RbspWriter); impl RbspWrite for AVCCRbspWriter> { type Writer = AVCCWriter; fn finish_rbsp(self) -> Result { - let buffer = self.inner.finish_rbsp()?; + let buffer = self.0.finish_rbsp()?; buffer.finish() } } @@ -130,7 +124,7 @@ impl WebvttWrite for AVCCRbspWriter { send_frequency_hz: u8, subtitle_tracks: &[WebvttTrack], ) -> std::io::Result<()> { - self.inner + self.0 .write_webvtt_header(max_latency_to_video, send_frequency_hz, subtitle_tracks) } @@ -142,7 +136,7 @@ impl WebvttWrite for AVCCRbspWriter { video_offset: Duration, webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs ) -> std::io::Result<()> { - self.inner.write_webvtt_payload( + self.0.write_webvtt_payload( track_index, chunk_number, chunk_version, diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265.rs new file mode 100644 index 0000000..65b620a --- /dev/null +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265.rs @@ -0,0 +1,437 @@ +use crate::{ + h26x::{annex_b::WriteNalHeader, NalUnitWrite, NalUnitWriter, RbspWrite, RbspWriter}, + webvtt::{WebvttTrack, WebvttWrite}, +}; +use bitstream_io::{BigEndian, BitWrite, BitWriter}; +use std::{io::Write, time::Duration}; + +type Result = std::result::Result; + +pub mod annex_b; + +#[derive(Debug, Clone, Copy)] +pub enum UnitType { + TrailN, + TrailR, + TsaN, + TsaR, + StsaN, + StsaR, + RadlN, + RadlR, + RaslN, + RaslR, + RsvVclN10, + RsvVclN12, + RsvVclN14, + RsvVclR11, + RsvVclR13, + RsvVclR15, + BlaWLp, + BlaWRadl, + BlaNLp, + IdrWRadl, + IdrNLp, + CraNut, + RsvIrapVcl22, + RsvIrapVcl23, + RsvVcl24, + RsvVcl25, + RsvVcl26, + RsvVcl27, + RsvVcl28, + RsvVcl29, + RsvVcl30, + RsvVcl31, + VpsNut, + SpsNut, + PpsNut, + AudNut, + EosNut, + EobNut, + FdNut, + PrefixSeiNut, + SuffixSeiNut, + RsvNvcl41, + RsvNvcl42, + RsvNvcl43, + RsvNvcl44, + RsvNvcl45, + RsvNvcl46, + RsvNvcl47, + Unspec48, + Unspec49, + Unspec50, + Unspec51, + Unspec52, + Unspec53, + Unspec54, + Unspec55, + Unspec56, + Unspec57, + Unspec58, + Unspec59, + Unspec60, + Unspec61, + Unspec62, + Unspec63, +} + +impl UnitType { + fn id(self) -> u8 { + match self { + UnitType::TrailN => 0, + UnitType::TrailR => 1, + UnitType::TsaN => 2, + UnitType::TsaR => 3, + UnitType::StsaN => 4, + UnitType::StsaR => 5, + UnitType::RadlN => 6, + UnitType::RadlR => 7, + UnitType::RaslN => 8, + UnitType::RaslR => 9, + UnitType::RsvVclN10 => 10, + UnitType::RsvVclN12 => 11, + UnitType::RsvVclN14 => 12, + UnitType::RsvVclR11 => 13, + UnitType::RsvVclR13 => 14, + UnitType::RsvVclR15 => 15, + UnitType::BlaWLp => 16, + UnitType::BlaWRadl => 17, + UnitType::BlaNLp => 18, + UnitType::IdrWRadl => 19, + UnitType::IdrNLp => 20, + UnitType::CraNut => 21, + UnitType::RsvIrapVcl22 => 22, + UnitType::RsvIrapVcl23 => 23, + UnitType::RsvVcl24 => 24, + UnitType::RsvVcl25 => 25, + UnitType::RsvVcl26 => 26, + UnitType::RsvVcl27 => 27, + UnitType::RsvVcl28 => 28, + UnitType::RsvVcl29 => 29, + UnitType::RsvVcl30 => 30, + UnitType::RsvVcl31 => 31, + UnitType::VpsNut => 32, + UnitType::SpsNut => 33, + UnitType::PpsNut => 34, + UnitType::AudNut => 35, + UnitType::EosNut => 36, + UnitType::EobNut => 37, + UnitType::FdNut => 38, + UnitType::PrefixSeiNut => 39, + UnitType::SuffixSeiNut => 40, + UnitType::RsvNvcl41 => 41, + UnitType::RsvNvcl42 => 42, + UnitType::RsvNvcl43 => 43, + UnitType::RsvNvcl44 => 44, + UnitType::RsvNvcl45 => 45, + UnitType::RsvNvcl46 => 46, + UnitType::RsvNvcl47 => 47, + UnitType::Unspec48 => 48, + UnitType::Unspec49 => 49, + UnitType::Unspec50 => 50, + UnitType::Unspec51 => 51, + UnitType::Unspec52 => 52, + UnitType::Unspec53 => 53, + UnitType::Unspec54 => 54, + UnitType::Unspec55 => 55, + UnitType::Unspec56 => 56, + UnitType::Unspec57 => 57, + UnitType::Unspec58 => 58, + UnitType::Unspec59 => 59, + UnitType::Unspec60 => 60, + UnitType::Unspec61 => 61, + UnitType::Unspec62 => 62, + UnitType::Unspec63 => 63, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub struct H265NalHeader { + nal_unit_type: UnitType, + nuh_layer_id: u8, + nuh_temporal_id: u8, +} + +#[derive(Debug, Clone, Copy)] +pub enum H265NalHeaderError { + NuhLayerIdOutOfRange(u8), + NuhTemporalIdOutOfRange(u8), +} + +impl H265NalHeader { + pub fn from_nal_unit_type_and_nuh_ids( + nal_unit_type: UnitType, + nuh_layer_id: u8, + nuh_temporal_id: u8, + ) -> Result { + if nuh_layer_id >= 0b100_0000 { + return Err(H265NalHeaderError::NuhLayerIdOutOfRange(nuh_layer_id)); + } + if nuh_temporal_id >= (0b1000 - 1) { + return Err(H265NalHeaderError::NuhTemporalIdOutOfRange(nuh_temporal_id)); + } + Ok(Self { + nal_unit_type, + nuh_layer_id, + nuh_temporal_id, + }) + } + + fn as_header_bytes(&self) -> Result<[u8; 2]> { + let mut output = [0u8; 2]; + let mut writer = BitWriter::endian(&mut output[..], BigEndian); + writer.write(1, 0)?; + writer.write(6, self.nal_unit_type.id())?; + writer.write(6, self.nuh_layer_id)?; + writer.write(3, self.nuh_temporal_id + 1)?; + assert!(writer.into_unwritten() == (0, 0)); + Ok(output) + } +} + +impl WriteNalHeader for H265NalHeader { + fn write_to(self, writer: &mut W) -> crate::h26x::Result<()> { + writer.write_all(&self.as_header_bytes()?[..]) + } +} + +pub trait H265ByteStreamWrite { + type Writer: NalUnitWrite; + fn start_write_nal_unit(self) -> Result; +} + +impl H265ByteStreamWrite for W { + type Writer = H265NalUnitWriter; + + fn start_write_nal_unit(self) -> Result { + Ok(H265NalUnitWriter(NalUnitWriter::new(self))) + } +} + +pub struct H265NalUnitWriter(NalUnitWriter); +pub struct H265RbspWriter(RbspWriter); + +impl NalUnitWrite for H265NalUnitWriter { + type Writer = H265RbspWriter; + type NalHeader = H265NalHeader; + + fn write_nal_header(mut self, nal_header: H265NalHeader) -> Result> { + self.0.inner.write_all(&nal_header.as_header_bytes()?[..])?; + Ok(H265RbspWriter(RbspWriter::new(self.0.inner))) + } +} + +impl RbspWrite for H265RbspWriter { + type Writer = W; + + fn finish_rbsp(self) -> crate::h26x::Result { + self.0.finish_rbsp() + } +} + +impl WebvttWrite for H265RbspWriter { + fn write_webvtt_header( + &mut self, + max_latency_to_video: Duration, + send_frequency_hz: u8, + subtitle_tracks: &[WebvttTrack], + ) -> std::io::Result<()> { + self.0 + .write_webvtt_header(max_latency_to_video, send_frequency_hz, subtitle_tracks) + } + + fn write_webvtt_payload( + &mut self, + track_index: u8, + chunk_number: u64, + chunk_version: u8, + video_offset: Duration, + webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs + ) -> std::io::Result<()> { + self.0.write_webvtt_payload( + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + ) + } +} + +#[cfg(test)] +mod tests { + use crate::{ + h265::{H265NalHeader, H265NalUnitWriter, UnitType}, + h26x::{NalUnitWrite, NalUnitWriter, RbspWrite}, + webvtt::{WebvttWrite, PAYLOAD_GUID, USER_DATA_UNREGISTERED}, + }; + use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; + use std::{ + collections::VecDeque, + io::{ErrorKind, Read}, + time::Duration, + }; + + #[derive(Clone)] + pub(crate) struct RbspReader { + last_read: VecDeque, + inner: R, + } + + impl RbspReader { + pub fn new(inner: R) -> Self { + RbspReader { + last_read: VecDeque::new(), + inner, + } + } + } + + impl Read for RbspReader { + fn read(&mut self, mut buf: &mut [u8]) -> std::io::Result { + let mut read = 0; + while !buf.is_empty() { + let res = self.inner.read_u8(); + let byte = match res { + Ok(byte) => byte, + Err(err) if err.kind() == ErrorKind::UnexpectedEof => return Ok(0), + Err(err) => return Err(err), + }; + let mut last_read_iter = self.last_read.iter(); + if last_read_iter.next() == Some(&0) + && last_read_iter.next() == Some(&0) + && byte == 3 + { + self.last_read.clear(); + continue; + } + if self.last_read.len() > 1 { + self.last_read.pop_front(); + } + read += 1; + self.last_read.push_back(byte); + buf.write_u8(byte).unwrap(); + } + Ok(read) + } + } + + #[test] + fn check_webvtt_sei() { + let mut writer = vec![]; + + let nalu_writer = H265NalUnitWriter(NalUnitWriter::new(&mut writer)); + let nal_unit_type = UnitType::PrefixSeiNut; + let nuh_layer_id = 0; + let nuh_temporal_id = 0; + let nal_header = H265NalHeader::from_nal_unit_type_and_nuh_ids( + nal_unit_type, + nuh_layer_id, + nuh_temporal_id, + ) + .unwrap(); + let mut payload_writer = nalu_writer.write_nal_header(nal_header).unwrap(); + let track_index = 0; + let chunk_number = 1; + let chunk_version = 0; + let video_offset = Duration::from_millis(200); + let webvtt_payload = "Some unverified data"; + payload_writer + .write_webvtt_payload( + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + ) + .unwrap(); + payload_writer.finish_rbsp().unwrap(); + + assert!(&writer[4..20] == PAYLOAD_GUID.as_bytes()); + assert!(writer[0] == nal_unit_type.id() << 1); + + let mut reader = RbspReader::new(&writer[2..]); + + assert!(usize::from(reader.read_u8().unwrap()) == USER_DATA_UNREGISTERED); + let mut length = 0; + loop { + let byte = reader.read_u8().unwrap(); + length += usize::from(byte); + if byte != 255 { + break; + } + } + assert!(dbg!(length + 1) == dbg!(reader.clone().bytes().count())); + reader.read_u128::().unwrap(); + assert!(track_index == reader.read_u8().unwrap()); + assert!(chunk_number == reader.read_u64::().unwrap()); + assert!(chunk_version == reader.read_u8().unwrap()); + assert!( + u16::try_from(video_offset.as_millis()).unwrap() + == reader.read_u16::().unwrap() + ); + println!("{writer:02x?}"); + } + + #[test] + fn check_webvtt_multi_sei() { + let mut writer = vec![]; + + let nalu_writer = H265NalUnitWriter(NalUnitWriter::new(&mut writer)); + let nal_unit_type = UnitType::PrefixSeiNut; + let nuh_layer_id = 0; + let nuh_temporal_id = 0; + let nal_header = H265NalHeader::from_nal_unit_type_and_nuh_ids( + nal_unit_type, + nuh_layer_id, + nuh_temporal_id, + ) + .unwrap(); + let mut payload_writer = nalu_writer.write_nal_header(nal_header).unwrap(); + let track_index = 0; + let chunk_number = 1; + let chunk_version = 0; + let video_offset = Duration::from_millis(200); + let webvtt_payload = "Some unverified data"; + payload_writer + .write_webvtt_payload( + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + ) + .unwrap(); + payload_writer + .write_webvtt_payload(1, 1, 0, video_offset, "Something else") + .unwrap(); + payload_writer.finish_rbsp().unwrap(); + + assert!(&writer[4..20] == PAYLOAD_GUID.as_bytes()); + assert!(writer[0] == nal_unit_type.id() << 1); + + let mut reader = RbspReader::new(&writer[2..]); + + assert!(usize::from(reader.read_u8().unwrap()) == USER_DATA_UNREGISTERED); + let mut _length = 0; + loop { + let byte = reader.read_u8().unwrap(); + _length += usize::from(byte); + if byte != 255 { + break; + } + } + reader.read_u128::().unwrap(); + assert!(track_index == reader.read_u8().unwrap()); + assert!(chunk_number == reader.read_u64::().unwrap()); + assert!(chunk_version == reader.read_u8().unwrap()); + assert!( + u16::try_from(video_offset.as_millis()).unwrap() + == reader.read_u16::().unwrap() + ); + println!("{writer:02x?}"); + } +} diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265/annex_b.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265/annex_b.rs new file mode 100644 index 0000000..48b50ab --- /dev/null +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h265/annex_b.rs @@ -0,0 +1,78 @@ +use crate::{ + h265::{H265ByteStreamWrite, H265NalHeader}, + h26x::{ + annex_b::{ + AnnexBNalUnitWriter as AnnexBNalUnitWriterImpl, + AnnexBRbspWriter as AnnexBRbspWriterImpl, AnnexBWriter as AnnexBWriterImpl, + }, + NalUnitWrite, RbspWrite, Result, + }, + webvtt::{WebvttTrack, WebvttWrite}, +}; +use std::{io::Write, time::Duration}; + +pub struct AnnexBWriter(AnnexBWriterImpl); + +impl AnnexBWriter { + pub fn new(inner: W) -> Self { + Self(AnnexBWriterImpl::new(inner)) + } +} + +impl H265ByteStreamWrite for AnnexBWriter { + type Writer = AnnexBNalUnitWriter; + + fn start_write_nal_unit(self) -> Result> { + self.0.start_write_nal_unit().map(AnnexBNalUnitWriter) + } +} + +pub struct AnnexBNalUnitWriter(AnnexBNalUnitWriterImpl); + +impl NalUnitWrite for AnnexBNalUnitWriter { + type Writer = AnnexBRbspWriter; + type NalHeader = H265NalHeader; + + fn write_nal_header(self, nal_header: Self::NalHeader) -> Result> { + self.0.write_nal_header(nal_header).map(AnnexBRbspWriter) + } +} + +pub struct AnnexBRbspWriter(AnnexBRbspWriterImpl); + +impl RbspWrite for AnnexBRbspWriter { + type Writer = AnnexBWriter; + + fn finish_rbsp(self) -> Result { + self.0.finish_rbsp().map(AnnexBWriter) + } +} + +impl WebvttWrite for AnnexBRbspWriter { + fn write_webvtt_header( + &mut self, + max_latency_to_video: Duration, + send_frequency_hz: u8, + subtitle_tracks: &[WebvttTrack], + ) -> std::io::Result<()> { + self.0 + .write_webvtt_header(max_latency_to_video, send_frequency_hz, subtitle_tracks) + } + + fn write_webvtt_payload( + &mut self, + track_index: u8, + chunk_number: u64, + chunk_version: u8, + video_offset: Duration, + webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs + ) -> std::io::Result<()> { + self.0.write_webvtt_payload( + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + ) + } +} diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x.rs new file mode 100644 index 0000000..4b812b6 --- /dev/null +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x.rs @@ -0,0 +1,130 @@ +use crate::webvtt::{ + write_webvtt_header, write_webvtt_payload, WebvttTrack, WebvttWrite, USER_DATA_UNREGISTERED, +}; +use byteorder::WriteBytesExt; +use std::{collections::VecDeque, io::Write, time::Duration}; + +pub(crate) mod annex_b; + +pub(crate) type Result = std::result::Result; + +pub(crate) struct NalUnitWriter { + pub(crate) inner: W, +} + +pub trait NalUnitWrite { + type Writer: RbspWrite; + type NalHeader; + fn write_nal_header(self, nal_header: Self::NalHeader) -> Result; +} + +impl NalUnitWriter { + pub(crate) fn new(inner: W) -> Self { + Self { inner } + } +} + +pub(crate) struct RbspWriter { + last_written: VecDeque, + inner: W, +} + +pub trait RbspWrite { + type Writer; + fn finish_rbsp(self) -> Result; +} + +impl RbspWriter { + pub fn new(inner: W) -> Self { + Self { + last_written: VecDeque::with_capacity(3), + inner, + } + } + + pub fn finish_rbsp(mut self) -> Result { + self.write_u8(0x80)?; + Ok(self.inner) + } +} + +impl Write for RbspWriter { + fn write(&mut self, buf: &[u8]) -> Result { + let mut written = 0; + for &byte in buf { + let mut last_written_iter = self.last_written.iter(); + if last_written_iter.next() == Some(&0) + && last_written_iter.next() == Some(&0) + && (byte == 0 || byte == 1 || byte == 2 || byte == 3) + { + self.inner.write_u8(3)?; + self.last_written.clear(); + } + self.inner.write_u8(byte)?; + written += 1; + self.last_written.push_back(byte); + if self.last_written.len() > 2 { + self.last_written.pop_front(); + } + } + Ok(written) + } + + fn flush(&mut self) -> Result<()> { + self.inner.flush() + } +} + +pub(crate) fn write_sei_header( + writer: &mut W, + mut payload_type: usize, + mut payload_size: usize, +) -> std::io::Result<()> { + while payload_type >= 255 { + writer.write_u8(255)?; + payload_type -= 255; + } + writer.write_u8(payload_type.try_into().unwrap())?; + while payload_size >= 255 { + writer.write_u8(255)?; + payload_size -= 255; + } + writer.write_u8(payload_size.try_into().unwrap())?; + Ok(()) +} + +impl WebvttWrite for RbspWriter { + fn write_webvtt_header( + &mut self, + max_latency_to_video: Duration, + send_frequency_hz: u8, + subtitle_tracks: &[WebvttTrack], + ) -> std::io::Result<()> { + write_webvtt_header( + self, + max_latency_to_video, + send_frequency_hz, + subtitle_tracks, + |writer, size| write_sei_header(writer, USER_DATA_UNREGISTERED, size), + ) + } + + fn write_webvtt_payload( + &mut self, + track_index: u8, + chunk_number: u64, + chunk_version: u8, + video_offset: Duration, + webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs + ) -> std::io::Result<()> { + write_webvtt_payload( + self, + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + |writer, size| write_sei_header(writer, USER_DATA_UNREGISTERED, size), + ) + } +} diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x/annex_b.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x/annex_b.rs new file mode 100644 index 0000000..8866c71 --- /dev/null +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/h26x/annex_b.rs @@ -0,0 +1,92 @@ +use crate::{ + h26x::{NalUnitWriter, RbspWriter, Result}, + webvtt::{WebvttTrack, WebvttWrite}, +}; +use byteorder::WriteBytesExt; +use std::{io::Write, time::Duration}; + +pub(crate) struct AnnexBWriter { + leading_zero_8bits_written: bool, + inner: W, +} + +impl AnnexBWriter { + pub fn new(inner: W) -> Self { + Self { + leading_zero_8bits_written: false, + inner, + } + } + + pub fn start_write_nal_unit(mut self) -> Result> { + if !self.leading_zero_8bits_written { + self.inner.write_u8(0)?; + self.leading_zero_8bits_written = true; + } + self.inner.write_all(&[0, 0, 1])?; + Ok(AnnexBNalUnitWriter { + inner: NalUnitWriter::new(self.inner), + }) + } +} + +pub(crate) trait WriteNalHeader { + fn write_to(self, writer: &mut W) -> Result<()>; +} + +pub(crate) struct AnnexBNalUnitWriter { + inner: NalUnitWriter, +} + +impl AnnexBNalUnitWriter { + pub fn write_nal_header( + mut self, + header: impl WriteNalHeader, + ) -> Result> { + header.write_to(&mut self.inner.inner)?; + Ok(AnnexBRbspWriter { + inner: RbspWriter::new(self.inner.inner), + }) + } +} + +pub(crate) struct AnnexBRbspWriter { + inner: RbspWriter, +} + +impl AnnexBRbspWriter { + pub fn finish_rbsp(self) -> Result> { + self.inner + .finish_rbsp() + .map(|writer| AnnexBWriter::new(writer)) + } +} + +impl WebvttWrite for AnnexBRbspWriter { + fn write_webvtt_header( + &mut self, + max_latency_to_video: Duration, + send_frequency_hz: u8, + subtitle_tracks: &[WebvttTrack], + ) -> std::io::Result<()> { + self.inner + .write_webvtt_header(max_latency_to_video, send_frequency_hz, subtitle_tracks) + } + + fn write_webvtt_payload( + &mut self, + track_index: u8, + chunk_number: u64, + chunk_version: u8, + video_offset: Duration, + webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs + ) -> std::io::Result<()> { + self.inner.write_webvtt_payload( + track_index, + chunk_number, + chunk_version, + video_offset, + webvtt_payload, + ) + } +} diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/lib.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/lib.rs index 874d072..81d3b01 100644 --- a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/lib.rs +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/lib.rs @@ -1,2 +1,5 @@ +pub mod av1; pub mod h264; +pub mod h265; +pub mod h26x; pub mod webvtt; diff --git a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/webvtt.rs b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/webvtt.rs index ac692a1..e3cefcd 100644 --- a/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/webvtt.rs +++ b/deps/c-webvtt-in-video-stream/video-bytestream-tools/src/webvtt.rs @@ -1,4 +1,4 @@ -use crate::h264::{write_sei_header, CountingSink}; +use crate::h26x::Result; use byteorder::{BigEndian, WriteBytesExt}; use std::{io::Write, time::Duration}; use uuid::{uuid, Uuid}; @@ -17,6 +17,31 @@ trait WriteCStrExt: Write { impl WriteCStrExt for W {} +pub(crate) struct CountingSink { + count: usize, +} + +impl CountingSink { + pub fn new() -> Self { + Self { count: 0 } + } + + pub fn count(&self) -> usize { + self.count + } +} + +impl Write for CountingSink { + fn write(&mut self, buf: &[u8]) -> Result { + self.count += buf.len(); + Ok(buf.len()) + } + + fn flush(&mut self) -> Result<()> { + Ok(()) + } +} + pub struct WebvttTrack<'a> { pub default: bool, pub autoselect: bool, @@ -32,6 +57,7 @@ pub(crate) fn write_webvtt_header( max_latency_to_video: Duration, send_frequency_hz: u8, subtitle_tracks: &[WebvttTrack], + write_format_header: impl FnOnce(&mut W, usize) -> std::io::Result<()>, ) -> std::io::Result<()> { fn inner( writer: &mut W, @@ -82,7 +108,7 @@ pub(crate) fn write_webvtt_header( send_frequency_hz, subtitle_tracks, )?; - write_sei_header(writer, USER_DATA_UNREGISTERED, count.count())?; + write_format_header(writer, count.count())?; inner( writer, max_latency_to_video, @@ -98,6 +124,7 @@ pub(crate) fn write_webvtt_payload( chunk_version: u8, video_offset: Duration, webvtt_payload: &str, // TODO: replace with string type that checks for interior NULs + write_format_header: impl FnOnce(&mut W, usize) -> std::io::Result<()>, ) -> std::io::Result<()> { fn inner( writer: &mut W, @@ -125,7 +152,7 @@ pub(crate) fn write_webvtt_payload( video_offset, webvtt_payload, )?; - write_sei_header(writer, USER_DATA_UNREGISTERED, count.count())?; + write_format_header(writer, count.count())?; inner( writer, track_index, diff --git a/src/transcription-filter-callbacks.cpp b/src/transcription-filter-callbacks.cpp index 118cdfd..8bdd4b0 100644 --- a/src/transcription-filter-callbacks.cpp +++ b/src/transcription-filter-callbacks.cpp @@ -463,9 +463,9 @@ void output_packet_added_callback(obs_output_t *output, struct encoder_packet *p if (strcmp(obs_encoder_get_codec(encoder), "h264") == 0) { codec_flavor = H264AnnexB; } else if (strcmp(obs_encoder_get_codec(encoder), "av1") == 0) { - continue; + codec_flavor = AV1OBUs; } else if (strcmp(obs_encoder_get_codec(encoder), "hevc") == 0) { - continue; + codec_flavor = H265AnnexB; } else { continue; } @@ -479,7 +479,7 @@ void output_packet_added_callback(obs_output_t *output, struct encoder_packet *p uint8_t track_index = 0; // FIXME: this may be too lazy, i.e. languages should probably be locked in the signal handler instead for (auto &lang : gf.active_languages) { - auto lang_it = whisper_available_lang_reverse.find(lang); + auto lang_it = whisper_available_lang.find(lang); if (lang_it == whisper_available_lang.end()) { obs_log(LOG_WARNING, "requested language '%s' unknown, track not added",