diff --git a/fuzz/fuzz_targets/differential.rs b/fuzz/fuzz_targets/differential.rs index 330c3a6..9b708c7 100644 --- a/fuzz/fuzz_targets/differential.rs +++ b/fuzz/fuzz_targets/differential.rs @@ -16,11 +16,26 @@ //! - **Malformed PAX records**: tar-core propagates PAX parse errors //! (malformed record format, non-UTF-8 keys). tar-rs silently skips //! malformed PAX records via `.flatten()`. +//! +//! - **Invalid/reserved base-256 numeric fields**: tar-core correctly rejects +//! numeric fields (e.g. size, mtime) whose leading byte is neither valid +//! octal ASCII nor a spec-defined base-256 marker (0x80 positive, 0xff +//! negative). The original tar-rs used `checked_shl(8)` which never +//! detected overflow, silently wrapping reserved leading bytes (e.g. 0x8e) +//! to garbage u64 values and continuing to parse. tar-core correctly +//! returns InvalidOctal for these malformed fields. +//! +//! - **Non-zero size on header-only entry types**: tar-core rejects entries +//! whose type byte indicates they carry no content (FIFOs, directories, +//! character/block devices, symbolic links, hard links) but whose `size` +//! field is non-zero. tar-rs silently accepts such archives and treats the +//! non-zero size as content bytes, which can lead to stream desynchronisation. #![no_main] use libfuzzer_sys::fuzz_target; use tar_core::parse::{Limits, ParseError}; +use tar_core::{HeaderError}; use tar_core_testutil::{parse_tar_core_detailed, parse_tar_rs, OwnedEntry}; /// Dump the raw 512-byte headers from the (post-fixup) data to stderr. @@ -52,6 +67,14 @@ fn is_allowlisted_divergence(err: &ParseError) -> bool { err, // tar-core rejects malformed PAX records; tar-rs silently skips them. ParseError::Pax(_) | ParseError::InvalidUtf8(_) + // tar-core correctly rejects numeric fields with reserved or overflowing + // base-256 leading bytes. tar-rs used checked_shl(8) which never + // detected overflow, silently wrapping these to garbage u64 values. + | ParseError::Header(HeaderError::InvalidOctal(_)) + // tar-core rejects non-zero size fields on header-only entry types + // (FIFOs, directories, device nodes, symlinks). tar-rs accepts them + // and treats the bytes as file content, risking stream desync. + | ParseError::NonZeroSizeForHeaderOnlyEntry(_) ) } diff --git a/src/lib.rs b/src/lib.rs index 85a7c71..af0c092 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -733,6 +733,26 @@ impl EntryType { self == EntryType::Fifo } + /// Returns true for entry types that carry no data content and must + /// therefore have a zero size field: hardlinks, symlinks, character/block + /// devices, directories, and FIFOs. + /// + /// A non-zero size on one of these entry types is a sign of a malformed or + /// malicious archive — different tar implementations disagree on how to + /// handle such a value, creating an archive confusion vector. + #[must_use] + pub fn is_header_only(self) -> bool { + matches!( + self, + EntryType::Link + | EntryType::Symlink + | EntryType::Char + | EntryType::Block + | EntryType::Directory + | EntryType::Fifo + ) + } + /// Returns true if this is a contiguous file entry. #[must_use] pub fn is_contiguous(self) -> bool { @@ -1222,7 +1242,7 @@ impl Header { /// /// Returns [`HeaderError::FieldOverflow`] if the value cannot be /// represented. For ustar, the octal limit is 0o7777777 (2,097,151). - /// For GNU, the base-256 limit is 2^63 - 1. + /// For GNU, the base-256 limit is 2^56 - 1 (7-byte payload). pub fn set_uid(&mut self, uid: u64) -> Result<()> { self.set_numeric_field(|h| &mut h.uid, uid) } @@ -1236,7 +1256,7 @@ impl Header { /// /// Returns [`HeaderError::FieldOverflow`] if the value cannot be /// represented. For ustar, the octal limit is 0o7777777 (2,097,151). - /// For GNU, the base-256 limit is 2^63 - 1. + /// For GNU, the base-256 limit is 2^56 - 1 (7-byte payload). pub fn set_gid(&mut self, gid: u64) -> Result<()> { self.set_numeric_field(|h| &mut h.gid, gid) } @@ -1512,9 +1532,9 @@ pub(crate) fn parse_octal(bytes: &[u8]) -> Result { /// Encode a u64 value to a numeric field. /// -/// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding -/// (high bit set in first byte). This matches tar-rs behavior for -/// compatibility. +/// Uses octal ASCII if the value fits, otherwise GNU base-256 encoding. +/// In base-256, `field[0]` is the pure marker byte (`0x80` for positive +/// values) and `field[1..N]` holds the value as big-endian binary. /// /// # Thresholds /// @@ -1524,7 +1544,9 @@ pub(crate) fn parse_octal(bytes: &[u8]) -> Result { /// # Errors /// /// Returns [`HeaderError::FieldOverflow`] if the value exceeds the field's -/// representable range (e.g., values >= 2^63 in an 8-byte field). +/// representable range: +/// - 8-byte fields: values >= 2^56 (7-byte payload = 56 data bits) +/// - 12-byte fields: any u64 fits (11-byte payload = 88 data bits) pub(crate) fn encode_numeric(field: &mut [u8; N], value: u64) -> Result<()> { const { assert!(N > 0, "encode_numeric requires N > 0") }; @@ -1536,11 +1558,15 @@ pub(crate) fn encode_numeric(field: &mut [u8; N], value: u64) -> }; if use_binary { - // GNU base-256 encoding: high bit of first byte is the indicator, - // leaving N*8-1 data bits. For 8-byte fields that's 63 bits, for - // 12-byte fields it's 95 bits (more than u64 needs). - let data_bits = N * 8 - 1; - if data_bits < 64 && value >= (1u64 << data_bits) { + // GNU base-256 encoding: field[0] is a pure marker byte (0x80 for + // positive values) and field[1..N] holds the value as big-endian. + // This gives N-1 payload bytes = (N-1)*8 data bits. + // + // For N=8: 7 payload bytes = 56 bits (max 2^56-1). + // For N=12: 11 payload bytes = 88 bits (well above u64::MAX; always ok). + let payload_bytes = N - 1; + let payload_bits = payload_bytes * 8; + if payload_bits < 64 && value >= (1u64 << payload_bits) { return Err(HeaderError::FieldOverflow { field_len: N, detail: format!("numeric value {value}"), @@ -1548,16 +1574,17 @@ pub(crate) fn encode_numeric(field: &mut [u8; N], value: u64) -> } field.fill(0); + field[0] = 0x80; - // Write the value in big-endian to the last 8 bytes (or fewer) + // Write the value big-endian into the payload bytes field[1..N]. let value_bytes = value.to_be_bytes(); - if N >= 8 { + if payload_bytes >= 8 { + // payload is wider than u64; value fits in the last 8 bytes of payload. field[N - 8..].copy_from_slice(&value_bytes); } else { - field.copy_from_slice(&value_bytes[8 - N..]); + // payload is narrower than u64; use only the low payload_bytes bytes. + field[1..].copy_from_slice(&value_bytes[8 - payload_bytes..]); } - // Set high bit to indicate base-256 - field[0] |= 0x80; } else { // Standard octal ASCII encoding encode_octal(field, value)?; @@ -1618,13 +1645,31 @@ pub(crate) fn parse_numeric(bytes: &[u8]) -> Result { // Check for GNU base-256 encoding (high bit set) if bytes[0] & 0x80 != 0 { - // Base-256: interpret remaining bytes as big-endian, masking off the - // high bit of the first byte + // Reject negative base-256 values. GNU tar uses 0xff as the leading + // byte for two's complement negatives (e.g. pre-epoch timestamps). + // Bit 6 of the first byte is the sign bit in the 7 remaining payload + // bits; if set, the value is negative and we reject it. + // + // This explicit check is necessary because the overflow-based detection + // below only catches values that exceed 64 bits. For short fields (e.g. + // 8-byte uid/gid/devmajor/devminor), a 0xff-prefixed negative value has + // only 63 payload bits after masking byte[0] to 0x7f, which fits in u64 + // without overflow and would otherwise be silently accepted. + if bytes[0] & 0x40 != 0 { + return Err(HeaderError::InvalidOctal(bytes.to_vec())); + } + + // Positive base-256: decode the field as big-endian, masking off the + // high bit of the first byte. Values too large for u64 are rejected. + // + // Note: we use checked_mul(256) rather than checked_shl(8) because + // checked_shl only guards against an out-of-range shift count, not + // against the result overflowing u64. let mut value: u64 = 0; for (i, &byte) in bytes.iter().enumerate() { let b = if i == 0 { byte & 0x7f } else { byte }; value = value - .checked_shl(8) + .checked_mul(256) .and_then(|v| v.checked_add(u64::from(b))) .ok_or_else(|| HeaderError::InvalidOctal(bytes.to_vec()))?; } @@ -2198,6 +2243,118 @@ mod tests { assert!(!header.is_ustar()); } + #[test] + fn test_base256_negative_is_rejected() { + // GNU tar uses 0xff as the leading byte for negative base-256 values + // (two's complement), primarily for pre-epoch timestamps. These should + // be rejected rather than silently decoded as huge positive values. + + // 0xff followed by zeros: represents -1 in two's complement base-256, + // which would otherwise decode as a garbage huge positive u64. + let neg_one: &[u8] = &[ + 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ]; + assert!( + parse_numeric(neg_one).is_err(), + "0xff... (negative base-256) must be rejected, not decoded as a huge positive u64" + ); + + // 0xff followed by other bytes (e.g., a pre-epoch timestamp like -86400) + let pre_epoch: &[u8] = &[ + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe, 0xa0, + ]; + assert!( + parse_numeric(pre_epoch).is_err(), + "0xff... pre-epoch timestamp must be rejected, not decoded as garbage" + ); + + // Ensure it's specifically an InvalidOctal error + match parse_numeric(neg_one) { + Err(HeaderError::InvalidOctal(_)) => {} // expected + other => panic!("expected InvalidOctal, got {other:?}"), + } + } + + #[test] + fn test_base256_positive_still_works() { + // 0x80 prefix (positive base-256) must continue to work correctly. + let cases: &[(&[u8], u64)] = &[ + (&[0x80, 0x00, 0x00, 0x01], 1), + (&[0x80, 0x01, 0x00], 256), + // Large value that wouldn't fit in octal + ( + &[ + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + 1u64 << 40, + ), + // 0x80 followed by 0xff: the payload byte is 0xff = 255 (the 0x80 + // marker byte contributes 0x00 to the value after masking) + (&[0x80, 0xff], 255), + ]; + for (input, expected) in cases { + assert_eq!( + parse_numeric(input).unwrap(), + *expected, + "positive base-256 parse_numeric({input:?}) should succeed" + ); + } + } + + // ------------------------------------------------------------------------- + // Base-256 short-field (8-byte) sign-bit tests + // ------------------------------------------------------------------------- + + #[test] + fn test_base256_negative_short_field_rejected() { + // [0xff; 8] represents -1 in GNU tar's 8-byte base-256 two's complement + // encoding. The 8-byte form has only 63 payload bits after masking, so + // the previous overflow-based detection silently accepted it as i64::MAX. + let neg_one: &[u8] = &[0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; + match parse_numeric(neg_one) { + Err(HeaderError::InvalidOctal(_)) => {} // expected + other => panic!("expected InvalidOctal for 8-byte 0xff…, got {other:?}"), + } + } + + #[test] + fn test_base256_negative_short_field_large_value_rejected() { + // [0xff, 0x00, ..] is the "most negative" 8-byte base-256 value. + let most_negative: &[u8] = &[0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; + match parse_numeric(most_negative) { + Err(HeaderError::InvalidOctal(_)) => {} // expected + other => panic!("expected InvalidOctal for [0xff, 0x00×7], got {other:?}"), + } + } + + #[test] + fn test_base256_positive_short_field_still_works() { + // [0x80, 0x00, …, 0x01] has bit 7 set (base-256 marker) and bit 6 clear + // (positive), encoding the value 1. + let input: &[u8] = &[0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]; + assert_eq!( + parse_numeric(input).unwrap(), + 1, + "8-byte positive base-256 value 1 should parse correctly" + ); + } + + #[test] + fn test_base256_max_positive_short_field_works() { + // The decoder accepts any byte[0] with bit 7 set and bit 6 clear as a + // positive base-256 value, for backwards compatibility with archives + // produced by tar-rs (which uses the same OR approach). byte[0]=0xbf + // is the maximum such value (0x80..=0xbf accepted, 0xc0..=0xff rejected + // as negative). This is a decoder leniency test; the encoder now only + // produces byte[0]=0x80 (pure marker). + let input: &[u8] = &[0xbf, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff]; + assert_eq!( + parse_numeric(input).unwrap(), + 4_611_686_018_427_387_903, + "decoder must accept byte[0]=0xbf (0x80..=0xbf range) for compat with tar-rs" + ); + } + // ========================================================================= // PAX Extension Tests // ========================================================================= @@ -2450,6 +2607,65 @@ mod tests { check::<8>(2_097_152, true); } + /// For 8-byte fields (uid/gid/devmajor/devminor), the GNU tar base-256 + /// format writes a pure 0x80 marker in byte[0] and the value in bytes[1..8] + /// (7 bytes = 56 bits). Encoding must produce field[0] == 0x80 exactly, + /// and must reject values >= 2^56 that would overflow the 7-byte payload. + #[test] + fn test_encode_numeric_8_produces_pure_0x80_marker() { + // The marker byte must always be exactly 0x80 (not 0x81..0xbf). + // Values [2^21, 2^56-1] use base-256; byte[0] must be 0x80 exactly. + // Values >= 2^56 would need bits in byte[0] beyond the marker, producing + // 0x81..0xbf — not interoperable with a strict GNU tar decoder. + let cases: &[u64] = &[ + 2_097_152, // 2^21, first base-256 value + 0xFFFF_FFFF, // max uint32 (real-world uid/gid ceiling) + (1u64 << 56) - 1, // 2^56 - 1, maximum encodable value + ]; + for &value in cases { + let mut field = [0u8; 8]; + encode_numeric(&mut field, value).unwrap(); + assert_eq!( + field[0], 0x80, + "N=8 base-256 must use pure 0x80 marker, got {:#04x} for value {value}", + field[0] + ); + assert_eq!( + parse_numeric(&field).unwrap(), + value, + "roundtrip failed for value {value}" + ); + } + + // Verify the value lands in bytes[1..8], not byte[0]. + // For 2^21 = 0x0000_0000_0020_0000, bytes[1..8] = [0,0,0,0,0x20,0,0]. + let mut field = [0u8; 8]; + encode_numeric(&mut field, 2_097_152u64).unwrap(); + assert_eq!( + field, + [0x80, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00], + "value must be encoded in bytes[1..8]" + ); + } + + /// Values >= 2^56 cannot fit in the 7-byte payload of an 8-byte + /// base-256 field; the encoder must reject them. + #[test] + fn test_encode_numeric_8_rejects_over_56_bits() { + let over_56: &[u64] = &[ + 1u64 << 56, // 2^56, first value that doesn't fit + 1u64 << 62, // previous limit — now also rejected + u64::MAX, + ]; + for &value in over_56 { + let mut field = [0u8; 8]; + assert!( + encode_numeric(&mut field, value).is_err(), + "expected error for value {value} (>= 2^56) in N=8 field" + ); + } + } + #[test] fn test_header_format_detection() { // (header, is_ustar, is_gnu) @@ -3307,17 +3523,18 @@ mod tests { prop_assert_eq!(parse_octal(&field).unwrap(), value); } - // 8-byte base-256 has 63 data bits, so values < 2^63 roundtrip. + // 8-byte base-256 uses byte[0]=0x80 as a pure marker and + // bytes[1..8] as the 7-byte payload (56 data bits, max 2^56-1). #[test] - fn test_encode_numeric_8_roundtrip(value in 0u64..=(i64::MAX as u64)) { + fn test_encode_numeric_8_roundtrip(value in 0u64..=(1u64 << 56) - 1) { let mut field = [0u8; 8]; encode_numeric(&mut field, value).unwrap(); prop_assert_eq!(parse_numeric(&field).unwrap(), value); } - // Values >= 2^63 cannot be represented in an 8-byte base-256 field. + // Values >= 2^56 overflow the 7-byte payload of an 8-byte field. #[test] - fn test_encode_numeric_8_rejects_huge(value in (i64::MAX as u64 + 1)..=u64::MAX) { + fn test_encode_numeric_8_rejects_huge(value in (1u64 << 56)..=u64::MAX) { let mut field = [0u8; 8]; prop_assert!(encode_numeric(&mut field, value).is_err()); } diff --git a/src/parse.rs b/src/parse.rs index 4c62b04..4dac36d 100644 --- a/src/parse.rs +++ b/src/parse.rs @@ -291,6 +291,25 @@ pub enum ParseError { /// Position in the stream where EOF occurred. pos: u64, }, + + /// A single all-zero 512-byte block was encountered mid-stream. + /// + /// A valid end-of-archive requires two consecutive zero blocks (POSIX). + /// GNU tar and Go's archive/tar treat a lone zero block as an error or + /// end-of-archive, never silently skipping it. Skipping it would let + /// entries after the zero block be visible to tar-core but hidden from + /// other parsers — an archive confusion vector. + #[error("stray zero block in archive")] + StrayZeroBlock, + + /// A header-only entry type (symlink, hardlink, directory, character/block + /// device, FIFO) had a non-zero size field. These entry types carry no data + /// content; their size must be zero. A non-zero size creates a parser + /// confusion vector: tar-core would report bytes to skip while parsers that + /// enforce size=0 (e.g. Go's archive/tar) would read those bytes as the + /// next header, hiding entries from a tar-core-based scanner. + #[error("non-zero size {0} for header-only entry type")] + NonZeroSizeForHeaderOnlyEntry(u64), } /// Result type for parsing operations. @@ -840,11 +859,11 @@ impl Parser { * HEADER_SIZE; return Ok(ParseEvent::End { consumed }); } - // Not end of archive — single stray zero block; skip it and - // continue with the next block as a header. - return self - .parse_header(&input[HEADER_SIZE..], slices) - .map(|e| e.add_consumed(HEADER_SIZE)); + // Not end of archive — single stray zero block. Silently + // skipping it would let entries after the zero block be visible + // to tar-core but hidden from Go's archive/tar (which errors) + // and GNU tar / Python (which stop). Return an error instead. + return Err(ParseError::StrayZeroBlock); } // Check pending entry limit @@ -1391,6 +1410,11 @@ impl Parser { } PAX_SIZE => { if let Some(v) = parse_pax_u64(&ext, PAX_SIZE)? { + // Guard against values that would cause padded_size() to + // wrap around: next_multiple_of(512) on a value near + // u64::MAX overflows to 0, causing stream desynchronization. + v.checked_next_multiple_of(HEADER_SIZE as u64) + .ok_or(ParseError::InvalidSize(v))?; entry_size = v; } } @@ -1548,6 +1572,16 @@ impl Parser { // Validate final path length self.limits.check_path_len(path.len())?; + // Header-only entry types (symlinks, hardlinks, directories, + // character/block devices, FIFOs) carry no data content — their size + // field must be zero. Enforce this AFTER PAX size overrides so an + // attacker cannot bypass the check by leaving size=0 in the header + // and injecting a non-zero value via a PAX "size" record. + let entry_type = header.entry_type(); + if entry_type.is_header_only() && entry_size != 0 { + return Err(ParseError::NonZeroSizeForHeaderOnlyEntry(entry_size)); + } + let entry = ParsedEntry { header, entry_type: header.entry_type(), @@ -3125,6 +3159,22 @@ mod tests { } } + /// A PAX 'size' field of u64::MAX is valid as a u64, but + /// next_multiple_of(512) would overflow, causing stream + /// desynchronization. The parser must reject it with InvalidSize. + #[test] + fn test_pax_size_overflow_rejected() { + // u64::MAX = 18446744073709551615; next_multiple_of(512) wraps to 0 + let size_str = format!("{}", u64::MAX); + let archive = make_archive_with_pax("size", size_str.as_bytes()); + let mut parser = Parser::new(Limits::default()); + let err = parser.parse(&archive).unwrap_err(); + assert!( + matches!(err, ParseError::InvalidSize(v) if v == u64::MAX), + "expected InvalidSize(u64::MAX), got {err:?}" + ); + } + // ========================================================================= // Sparse entry helpers // ========================================================================= @@ -4270,4 +4320,173 @@ mod tests { ), } } + + // ========================================================================= + // Stray zero block tests + // ========================================================================= + + /// A single all-zero 512-byte block appearing mid-stream (between two valid + /// entries) must be rejected. Go's archive/tar returns ErrHeader; GNU tar + /// and Python stop at the first zero block (treating it as end-of-archive). + /// Silently skipping it creates a confusion window where tar-core sees more + /// entries than other parsers do. + #[test] + fn test_stray_zero_block_is_error() { + let mut archive = Vec::new(); + + // First valid file entry (0 bytes of content) + archive.extend_from_slice(&make_header(b"first.txt", 0, b'0')); + + // Single all-zero 512-byte block — stray, not a valid EOA pair + archive.extend(zeroes(512)); + + // Second valid file entry — would be hidden from Go/GNU/Python parsers + archive.extend_from_slice(&make_header(b"second.txt", 0, b'0')); + + // Two zero blocks for proper EOA + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + + // Consume the first entry + let event = parser.parse(&archive).unwrap(); + let consumed = match event { + ParseEvent::Entry { consumed, entry } => { + assert_eq!(entry.path.as_ref(), b"first.txt"); + consumed + } + other => panic!("Expected first Entry, got {:?}", other), + }; + + // The next parse call should encounter the stray zero block and error + let result = parser.parse(&archive[consumed..]); + assert!( + matches!(result, Err(ParseError::StrayZeroBlock)), + "Expected StrayZeroBlock error, got {:?}", + result + ); + } + + /// Two consecutive all-zero 512-byte blocks constitute a valid + /// end-of-archive marker and must still produce ParseEvent::End. + /// This is a regression guard for the normal EOA path. + #[test] + fn test_two_zero_blocks_is_valid_eoa() { + let mut archive = Vec::new(); + + // One valid file entry + archive.extend_from_slice(&make_header(b"file.txt", 0, b'0')); + + // Two consecutive zero blocks = valid EOA + archive.extend(zeroes(1024)); + + let mut parser = Parser::new(Limits::default()); + + // Consume the file entry + let event = parser.parse(&archive).unwrap(); + let consumed = match event { + ParseEvent::Entry { consumed, entry } => { + assert_eq!(entry.path.as_ref(), b"file.txt"); + consumed + } + other => panic!("Expected Entry, got {:?}", other), + }; + + // The two zero blocks should produce End, not an error + let event = parser.parse(&archive[consumed..]).unwrap(); + assert!( + matches!(event, ParseEvent::End { .. }), + "Expected End for two-zero-block EOA, got {:?}", + event + ); + } + + // ========================================================================= + // Header-only entry type size validation tests + // ========================================================================= + + #[test] + fn test_header_only_type_nonzero_size_rejected() { + // A symlink header (type '2') with a non-zero size field must be + // rejected: symlinks carry no data content. + let header = make_header(b"mylink", 512, b'2'); + let mut archive = Vec::new(); + archive.extend_from_slice(&header); + // 512 bytes of "content" (would be the next header in a real attack) + archive.extend(zeroes(512)); + archive.extend(zeroes(1024)); // EOA + + let mut parser = Parser::new(Limits::default()); + let result = parser.parse(&archive); + assert!( + matches!(result, Err(ParseError::NonZeroSizeForHeaderOnlyEntry(512))), + "Expected NonZeroSizeForHeaderOnlyEntry(512), got {:?}", + result + ); + } + + #[test] + fn test_header_only_type_zero_size_ok() { + // A symlink header with size=0 must be accepted (regression guard). + let mut header = make_header(b"mylink", 0, b'2'); + // Set a link target + header[157..164].copy_from_slice(b"target/"); + // Recompute checksum + let hdr = Header::from_bytes(&header); + let checksum = hdr.compute_checksum(); + let checksum_str = format!("{checksum:06o}\0 "); + header[148..156].copy_from_slice(checksum_str.as_bytes()); + + let mut archive = Vec::new(); + archive.extend_from_slice(&header); + archive.extend(zeroes(1024)); // EOA + + let mut parser = Parser::new(Limits::default()); + let event = parser.parse(&archive).unwrap(); + match event { + ParseEvent::Entry { entry, .. } => { + assert!(entry.is_symlink(), "Expected symlink entry"); + assert_eq!(entry.size, 0, "Symlink size must be 0"); + } + other => panic!("Expected Entry, got {:?}", other), + } + } + + #[test] + fn test_hardlink_nonzero_size_rejected() { + // A hardlink header (type '1') with a non-zero size field must be + // rejected. + let header = make_header(b"hardlink", 512, b'1'); + let mut archive = Vec::new(); + archive.extend_from_slice(&header); + archive.extend(zeroes(512)); + archive.extend(zeroes(1024)); // EOA + + let mut parser = Parser::new(Limits::default()); + let result = parser.parse(&archive); + assert!( + matches!(result, Err(ParseError::NonZeroSizeForHeaderOnlyEntry(512))), + "Expected NonZeroSizeForHeaderOnlyEntry(512), got {:?}", + result + ); + } + + #[test] + fn test_directory_nonzero_size_rejected() { + // A directory header (type '5') with a non-zero size field must be + // rejected. + let header = make_header(b"mydir/", 512, b'5'); + let mut archive = Vec::new(); + archive.extend_from_slice(&header); + archive.extend(zeroes(512)); + archive.extend(zeroes(1024)); // EOA + + let mut parser = Parser::new(Limits::default()); + let result = parser.parse(&archive); + assert!( + matches!(result, Err(ParseError::NonZeroSizeForHeaderOnlyEntry(512))), + "Expected NonZeroSizeForHeaderOnlyEntry(512), got {:?}", + result + ); + } }