Explorar el Código

LibAudio: Add spec comments to the FlacLoader

This way the FlacLoader can be more easily understood by someone that
doesn't already know the format inside out.
kleines Filmröllchen hace 3 años
padre
commit
cb8e37d436

+ 50 - 6
Userland/Libraries/LibAudio/FlacLoader.cpp

@@ -59,6 +59,7 @@ MaybeLoaderError FlacLoaderPlugin::initialize()
     return {};
 }
 
+// 11.5 STREAM
 MaybeLoaderError FlacLoaderPlugin::parse_header()
 {
     auto bit_input = LOADER_TRY(BigEndianInputBitStream::construct(*m_stream));
@@ -82,7 +83,7 @@ MaybeLoaderError FlacLoaderPlugin::parse_header()
     auto streaminfo_data_memory = LOADER_TRY(Core::Stream::MemoryStream::construct(streaminfo.data.bytes()));
     auto streaminfo_data = LOADER_TRY(BigEndianInputBitStream::construct(*streaminfo_data_memory));
 
-    // STREAMINFO block
+    // 11.10 METADATA_BLOCK_STREAMINFO
     m_min_block_size = LOADER_TRY(streaminfo_data->read_bits<u16>(16));
     FLAC_VERIFY(m_min_block_size >= 16, LoaderError::Category::Format, "Minimum block size must be 16");
     m_max_block_size = LOADER_TRY(streaminfo_data->read_bits<u16>(16));
@@ -139,11 +140,13 @@ MaybeLoaderError FlacLoaderPlugin::parse_header()
     return {};
 }
 
+// 11.13. METADATA_BLOCK_SEEKTABLE
 MaybeLoaderError FlacLoaderPlugin::load_seektable(FlacRawMetadataBlock& block)
 {
     auto memory_stream = LOADER_TRY(Core::Stream::MemoryStream::construct(block.data.bytes()));
     auto seektable_bytes = LOADER_TRY(BigEndianInputBitStream::construct(*memory_stream));
     for (size_t i = 0; i < block.length / 18; ++i) {
+        // 11.14. SEEKPOINT
         FlacSeekPoint seekpoint {
             .sample_index = LOADER_TRY(seektable_bytes->read_bits<u64>(64)),
             .byte_offset = LOADER_TRY(seektable_bytes->read_bits<u64>(64)),
@@ -155,9 +158,10 @@ MaybeLoaderError FlacLoaderPlugin::load_seektable(FlacRawMetadataBlock& block)
     return {};
 }
 
+// 11.6 METADATA_BLOCK
 ErrorOr<FlacRawMetadataBlock, LoaderError> FlacLoaderPlugin::next_meta_block(BigEndianInputBitStream& bit_input)
 {
-
+    // 11.7 METADATA_BLOCK_HEADER
     bool is_last_block = LOADER_TRY(bit_input.read_bit());
     // The block type enum constants agree with the specification
     FlacMetadataBlockType type = (FlacMetadataBlockType)LOADER_TRY(bit_input.read_bits<u8>(7));
@@ -270,6 +274,7 @@ LoaderSamples FlacLoaderPlugin::get_more_samples(size_t max_bytes_to_read_from_i
     return samples;
 }
 
+// 11.21. FRAME
 MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
 {
 #define FLAC_VERIFY(check, category, msg)                                                                                               \
@@ -283,11 +288,12 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
 
     // TODO: Check the CRC-16 checksum (and others) by keeping track of read data
 
-    // FLAC frame sync code starts header
+    // 11.22. FRAME_HEADER
     u16 sync_code = LOADER_TRY(bit_stream->read_bits<u16>(14));
     FLAC_VERIFY(sync_code == 0b11111111111110, LoaderError::Category::Format, "Sync code");
     bool reserved_bit = LOADER_TRY(bit_stream->read_bit());
     FLAC_VERIFY(reserved_bit == 0, LoaderError::Category::Format, "Reserved frame header bit");
+    // 11.22.2. BLOCKING STRATEGY
     [[maybe_unused]] bool blocking_strategy = LOADER_TRY(bit_stream->read_bit());
 
     u32 sample_count = TRY(convert_sample_count_code(LOADER_TRY(bit_stream->read_bits<u8>(4))));
@@ -303,16 +309,19 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
     reserved_bit = LOADER_TRY(bit_stream->read_bit());
     FLAC_VERIFY(reserved_bit == 0, LoaderError::Category::Format, "Reserved frame header end bit");
 
+    // 11.22.8. CODED NUMBER
     // FIXME: sample number can be 8-56 bits, frame number can be 8-48 bits
     m_current_sample_or_frame = LOADER_TRY(read_utf8_char(*bit_stream));
 
     // Conditional header variables
+    // 11.22.9. BLOCK SIZE INT
     if (sample_count == FLAC_BLOCKSIZE_AT_END_OF_HEADER_8) {
         sample_count = LOADER_TRY(bit_stream->read_bits<u32>(8)) + 1;
     } else if (sample_count == FLAC_BLOCKSIZE_AT_END_OF_HEADER_16) {
         sample_count = LOADER_TRY(bit_stream->read_bits<u32>(16)) + 1;
     }
 
+    // 11.22.10. SAMPLE RATE INT
     if (frame_sample_rate == FLAC_SAMPLERATE_AT_END_OF_HEADER_8) {
         frame_sample_rate = LOADER_TRY(bit_stream->read_bits<u32>(8)) * 1000;
     } else if (frame_sample_rate == FLAC_SAMPLERATE_AT_END_OF_HEADER_16) {
@@ -321,6 +330,7 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
         frame_sample_rate = LOADER_TRY(bit_stream->read_bits<u32>(16)) * 10;
     }
 
+    // 11.22.11. FRAME CRC
     // TODO: check header checksum, see above
     [[maybe_unused]] u8 checksum = LOADER_TRY(bit_stream->read_bits<u8>(8));
 
@@ -343,8 +353,10 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
         current_subframes.unchecked_append(move(subframe_samples));
     }
 
+    // 11.2. Overview ("The audio data is composed of...")
     bit_stream->align_to_byte_boundary();
 
+    // 11.23. FRAME_FOOTER
     // TODO: check checksum, see above
     [[maybe_unused]] u16 footer_checksum = LOADER_TRY(bit_stream->read_bits<u16>(16));
     dbgln_if(AFLACLOADER_DEBUG, "Subframe footer checksum: {}", footer_checksum);
@@ -425,6 +437,7 @@ MaybeLoaderError FlacLoaderPlugin::next_frame(Span<Sample> target_vector)
 #undef FLAC_VERIFY
 }
 
+// 11.22.3. INTERCHANNEL SAMPLE BLOCK SIZE
 ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_count_code(u8 sample_count_code)
 {
     // single codes
@@ -444,6 +457,7 @@ ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_count_code(u8 sample_
     return 256 * AK::exp2(sample_count_code - 8);
 }
 
+// 11.22.4. SAMPLE RATE
 ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_rate_code(u8 sample_rate_code)
 {
     switch (sample_rate_code) {
@@ -482,6 +496,7 @@ ErrorOr<u32, LoaderError> FlacLoaderPlugin::convert_sample_rate_code(u8 sample_r
     }
 }
 
+// 11.22.6. SAMPLE SIZE
 ErrorOr<PcmSampleFormat, LoaderError> FlacLoaderPlugin::convert_bit_depth_code(u8 bit_depth_code)
 {
     switch (bit_depth_code) {
@@ -501,6 +516,7 @@ ErrorOr<PcmSampleFormat, LoaderError> FlacLoaderPlugin::convert_bit_depth_code(u
     }
 }
 
+// 11.22.5. CHANNEL ASSIGNMENT
 u8 frame_channel_type_to_channel_count(FlacFrameChannelType channel_type)
 {
     if (channel_type <= FlacFrameChannelType::Surround7p1)
@@ -508,6 +524,7 @@ u8 frame_channel_type_to_channel_count(FlacFrameChannelType channel_type)
     return 2;
 }
 
+// 11.25. SUBFRAME_HEADER
 ErrorOr<FlacSubframeHeader, LoaderError> FlacLoaderPlugin::next_subframe_header(BigEndianInputBitStream& bit_stream, u8 channel_index)
 {
     u8 bits_per_sample = static_cast<u16>(pcm_bits_per_sample(m_current_frame->bit_depth));
@@ -534,7 +551,7 @@ ErrorOr<FlacSubframeHeader, LoaderError> FlacLoaderPlugin::next_subframe_header(
     if (LOADER_TRY(bit_stream.read_bit()) != 0)
         return LoaderError { LoaderError::Category::Format, static_cast<size_t>(m_current_sample_or_frame), "Zero bit padding" };
 
-    // subframe type (encoding)
+    // 11.25.1. SUBFRAME TYPE
     u8 subframe_code = LOADER_TRY(bit_stream.read_bits<u8>(6));
     if ((subframe_code >= 0b000010 && subframe_code <= 0b000111) || (subframe_code > 0b001100 && subframe_code < 0b100000))
         return LoaderError { LoaderError::Category::Format, static_cast<size_t>(m_current_sample_or_frame), "Subframe type" };
@@ -553,7 +570,7 @@ ErrorOr<FlacSubframeHeader, LoaderError> FlacLoaderPlugin::next_subframe_header(
         subframe_type = (FlacSubframeType)subframe_code;
     }
 
-    // wasted bits per sample (unary encoding)
+    // 11.25.2. WASTED BITS PER SAMPLE FLAG
     bool has_wasted_bits = LOADER_TRY(bit_stream.read_bit());
     u8 k = 0;
     if (has_wasted_bits) {
@@ -578,6 +595,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::parse_subframe(FlacSubframeH
 
     switch (subframe_header.type) {
     case FlacSubframeType::Constant: {
+        // 11.26. SUBFRAME_CONSTANT
         u64 constant_value = LOADER_TRY(bit_input.read_bits<u64>(subframe_header.bits_per_sample - subframe_header.wasted_bits_per_sample));
         dbgln_if(AFLACLOADER_DEBUG, "Constant subframe: {}", constant_value);
 
@@ -616,6 +634,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::parse_subframe(FlacSubframeH
     return resampler.resample(samples);
 }
 
+// 11.29. SUBFRAME_VERBATIM
 // Decode a subframe that isn't actually encoded, usually seen in random data
 ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_verbatim(FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
 {
@@ -632,6 +651,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_verbatim(FlacSubframe
     return decoded;
 }
 
+// 11.28. SUBFRAME_LPC
 // Decode a subframe encoded with a custom linear predictor coding, i.e. the subframe provides the polynomial order and coefficients
 ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_custom_lpc(FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
 {
@@ -677,6 +697,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_custom_lpc(FlacSubfra
             // Even though FLAC operates at a maximum bit depth of 32 bits, modern encoders use super-large coefficients for maximum compression.
             // These will easily overflow 32 bits and cause strange white noise that abruptly stops intermittently (at the end of a frame).
             // The simple fix of course is to do intermediate computations in 64 bits.
+            // These considerations are not in the original FLAC spec, but have been added to the IETF standard: https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03#appendix-A.3
             sample += static_cast<i64>(coefficients[t]) * static_cast<i64>(decoded[i - t - 1]);
         }
         decoded[i] += sample >> lpc_shift;
@@ -685,6 +706,7 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_custom_lpc(FlacSubfra
     return decoded;
 }
 
+// 11.27. SUBFRAME_FIXED
 // Decode a subframe encoded with one of the fixed linear predictor codings
 ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
 {
@@ -703,6 +725,23 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubfram
 
     dbgln_if(AFLACLOADER_DEBUG, "decoded length {}, {} order predictor", decoded.size(), subframe.order);
 
+    // Skip these comments if you don't care about the neat math behind fixed LPC :^)
+    // These coefficients for the recursive prediction formula are the only ones that can be resolved to polynomial predictor functions.
+    // The order equals the degree of the polynomial - 1, so the second-order predictor has an underlying polynomial of degree 1, a straight line.
+    // More specifically, the closest approximation to a polynomial is used, and the degree depends on how many previous values are available.
+    // This makes use of a very neat property of polynomials, which is that they are entirely characterized by their finitely many derivatives.
+    // (Mathematically speaking, the infinite Taylor series of any polynomial equals the polynomial itself.)
+    // Now remember that derivation is just the slope of the function, which is the same as the difference of two close-by values.
+    // Therefore, with two samples we can calculate the first derivative at a sample via the difference, which gives us a polynomial of degree 1.
+    // With three samples, we can do the same but also calculate the second derivative via the difference in the first derivatives.
+    // This gives us a polynomial of degree 2, as it has two "proper" (non-constant) derivatives.
+    // This can be continued for higher-order derivatives when we have more coefficients, giving us higher-order polynomials.
+    // In essence, it's akin to a Lagrangian polynomial interpolation for every sample (but already pre-solved).
+
+    // The coefficients for orders 0-3 originate from the SHORTEN codec:
+    // http://mi.eng.cam.ac.uk/reports/svr-ftp/auto-pdf/robinson_tr156.pdf page 4
+    // The coefficients for order 4 are undocumented in the original FLAC specification(s), but can now be found in
+    // https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03#section-10.2.5
     switch (subframe.order) {
     case 0:
         // s_0(t) = 0
@@ -735,20 +774,24 @@ ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubfram
     return decoded;
 }
 
+// 11.30. RESIDUAL
 // Decode the residual, the "error" between the function approximation and the actual audio data
 MaybeLoaderError FlacLoaderPlugin::decode_residual(Vector<i32>& decoded, FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
 {
+    // 11.30.1. RESIDUAL_CODING_METHOD
     auto residual_mode = static_cast<FlacResidualMode>(LOADER_TRY(bit_input.read_bits<u8>(2)));
     u8 partition_order = LOADER_TRY(bit_input.read_bits<u8>(4));
     size_t partitions = 1 << partition_order;
 
     if (residual_mode == FlacResidualMode::Rice4Bit) {
+        // 11.30.2. RESIDUAL_CODING_METHOD_PARTITIONED_EXP_GOLOMB
         // decode a single Rice partition with four bits for the order k
         for (size_t i = 0; i < partitions; ++i) {
             auto rice_partition = TRY(decode_rice_partition(4, partitions, i, subframe, bit_input));
             decoded.extend(move(rice_partition));
         }
     } else if (residual_mode == FlacResidualMode::Rice5Bit) {
+        // 11.30.3. RESIDUAL_CODING_METHOD_PARTITIONED_EXP_GOLOMB2
         // five bits equivalent
         for (size_t i = 0; i < partitions; ++i) {
             auto rice_partition = TRY(decode_rice_partition(5, partitions, i, subframe, bit_input));
@@ -760,10 +803,11 @@ MaybeLoaderError FlacLoaderPlugin::decode_residual(Vector<i32>& decoded, FlacSub
     return {};
 }
 
+// 11.30.2.1. EXP_GOLOMB_PARTITION and 11.30.3.1. EXP_GOLOMB2_PARTITION
 // Decode a single Rice partition as part of the residual, every partition can have its own Rice parameter k
 ALWAYS_INLINE ErrorOr<Vector<i32>, LoaderError> FlacLoaderPlugin::decode_rice_partition(u8 partition_type, u32 partitions, u32 partition_index, FlacSubframeHeader& subframe, BigEndianInputBitStream& bit_input)
 {
-    // Rice parameter / Exp-Golomb order
+    // 11.30.2.2. EXP GOLOMB PARTITION ENCODING PARAMETER and 11.30.3.2. EXP-GOLOMB2 PARTITION ENCODING PARAMETER
     u8 k = LOADER_TRY(bit_input.read_bits<u8>(partition_type));
 
     u32 residual_sample_count;

+ 7 - 0
Userland/Libraries/LibAudio/FlacLoader.h

@@ -38,6 +38,13 @@ ALWAYS_INLINE ErrorOr<u64> read_utf8_char(BigEndianInputBitStream& input);
 // decode a single number encoded with exponential golomb encoding of the specified order
 ALWAYS_INLINE ErrorOr<i32> decode_unsigned_exp_golomb(u8 order, BigEndianInputBitStream& bit_input);
 
+// Loader for the Free Lossless Audio Codec (FLAC)
+// This loader supports all audio features of FLAC, although audio from more than two channels is discarded.
+// The loader currently supports the STREAMINFO, PADDING, and SEEKTABLE metadata blocks.
+// See: https://xiph.org/flac/documentation_format_overview.html
+//      https://xiph.org/flac/format.html (identical to IETF draft version 2)
+//      https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-02 (all section numbers refer to this specification)
+//      https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03 (newer IETF draft that uses incompatible numberings and names)
 class FlacLoaderPlugin : public LoaderPlugin {
 public:
     explicit FlacLoaderPlugin(StringView path);

+ 11 - 7
Userland/Libraries/LibAudio/FlacTypes.h

@@ -14,14 +14,16 @@
 
 namespace Audio {
 
-// Temporary constants for header blocksize/sample rate spec
+// These are not the actual values stored in the file! They are marker constants instead, only used temporarily in the decoder.
+// 11.22.3. INTERCHANNEL SAMPLE BLOCK SIZE
 #define FLAC_BLOCKSIZE_AT_END_OF_HEADER_8 0xffffffff
 #define FLAC_BLOCKSIZE_AT_END_OF_HEADER_16 0xfffffffe
+// 11.22.4. SAMPLE RATE
 #define FLAC_SAMPLERATE_AT_END_OF_HEADER_8 0xffffffff
 #define FLAC_SAMPLERATE_AT_END_OF_HEADER_16 0xfffffffe
 #define FLAC_SAMPLERATE_AT_END_OF_HEADER_16X10 0xfffffffd
 
-// Metadata block type, 7 bits.
+// 11.8 BLOCK_TYPE (7 bits)
 enum class FlacMetadataBlockType : u8 {
     STREAMINFO = 0,     // Important data about the audio format
     PADDING = 1,        // Non-data block to be ignored
@@ -33,7 +35,7 @@ enum class FlacMetadataBlockType : u8 {
     INVALID = 127,      // Error
 };
 
-// follows FLAC codes
+// 11.22.5. CHANNEL ASSIGNMENT
 enum class FlacFrameChannelType : u8 {
     Mono = 0,
     Stereo = 1,
@@ -49,7 +51,7 @@ enum class FlacFrameChannelType : u8 {
     // others are reserved
 };
 
-// follows FLAC codes
+// 11.25.1. SUBFRAME TYPE
 enum class FlacSubframeType : u8 {
     Constant = 0,
     Verbatim = 1,
@@ -58,13 +60,13 @@ enum class FlacSubframeType : u8 {
     // others are reserved
 };
 
-// follows FLAC codes
+// 11.30.1. RESIDUAL_CODING_METHOD
 enum class FlacResidualMode : u8 {
     Rice4Bit = 0,
     Rice5Bit = 1,
 };
 
-// Simple wrapper around any kind of metadata block
+// 11.6. METADATA_BLOCK
 struct FlacRawMetadataBlock {
     bool is_last_block;
     FlacMetadataBlockType type;
@@ -72,7 +74,7 @@ struct FlacRawMetadataBlock {
     ByteBuffer data;
 };
 
-// An abstract, parsed and validated FLAC frame
+// 11.22. FRAME_HEADER
 struct FlacFrameHeader {
     u32 sample_count;
     u32 sample_rate;
@@ -80,6 +82,7 @@ struct FlacFrameHeader {
     PcmSampleFormat bit_depth;
 };
 
+// 11.25. SUBFRAME_HEADER
 struct FlacSubframeHeader {
     FlacSubframeType type;
     // order for fixed and LPC subframes
@@ -88,6 +91,7 @@ struct FlacSubframeHeader {
     u8 bits_per_sample;
 };
 
+// 11.14. SEEKPOINT
 struct FlacSeekPoint {
     u64 sample_index;
     u64 byte_offset;