diff --git a/src/archive.rs b/src/archive.rs index 4d569c63..6ca021e9 100644 --- a/src/archive.rs +++ b/src/archive.rs @@ -7,7 +7,7 @@ use std::io::{self, SeekFrom}; use std::marker; use std::path::Path; -use crate::entry::{EntryFields, EntryIo}; +use crate::entry::{EntryCursor, EntryFields, EntrySegmentKind}; use crate::error::TarError; use crate::header::BLOCK_SIZE; use crate::other; @@ -39,7 +39,7 @@ pub struct Entries<'a, R: 'a + Read> { _ignored: marker::PhantomData<&'a Archive>, } -trait SeekRead: Read + Seek {} +pub(crate) trait SeekRead: Read + Seek {} impl SeekRead for R {} struct EntriesFields<'a> { @@ -345,11 +345,18 @@ impl<'a> EntriesFields<'a> { size = pax_size; } } + let real_size = header.size()?; + + let mut cursor = EntryCursor::default(); + cursor.append_segment(EntrySegmentKind::Data, size, file_pos); + let ret = EntryFields { size: size, + real_size: real_size, header_pos: header_pos, file_pos: file_pos, - data: vec![EntryIo::Data((&self.archive.inner).take(size))], + data: &self.archive.inner, + data_seekable: self.seekable_archive.map(|a| &a.inner), header: header, long_pathname: None, long_linkname: None, @@ -360,6 +367,7 @@ impl<'a> EntriesFields<'a> { preserve_mtime: self.archive.inner.preserve_mtime, overwrite: self.archive.inner.overwrite, preserve_ownerships: self.archive.inner.preserve_ownerships, + cursor: cursor, }; // Store where the next entry is, rounding up by 512 bytes (the size of @@ -470,14 +478,14 @@ impl<'a> EntriesFields<'a> { // the same as the current offset (described by the list of blocks) as // well as the amount of data read equals the size of the entry // (`Header::entry_size`). - entry.data.truncate(0); + entry.cursor.segments.truncate(0); let mut cur = 0; let mut remaining = entry.size; { - let data = &mut entry.data; - let reader = &self.archive.inner; let size = entry.size; + let file_pos = entry.file_pos; + let cursor = &mut entry.cursor; let mut add_block = |block: &GnuSparseHeader| -> io::Result<_> { if block.is_empty() { return Ok(()); @@ -495,8 +503,7 @@ impl<'a> EntriesFields<'a> { blocks", )); } else if cur < off { - let block = io::repeat(0).take(off - cur); - data.push(EntryIo::Pad(block)); + cursor.append_segment(EntrySegmentKind::Pad, off, file_pos); } cur = off .checked_add(len) @@ -507,7 +514,7 @@ impl<'a> EntriesFields<'a> { listed", ) })?; - data.push(EntryIo::Data(reader.take(len))); + cursor.append_segment(EntrySegmentKind::Data, cur, file_pos); Ok(()) }; for block in gnu.sparse.iter() { diff --git a/src/entry.rs b/src/entry.rs index 843719f0..154800f8 100644 --- a/src/entry.rs +++ b/src/entry.rs @@ -9,7 +9,7 @@ use std::path::{Component, Path, PathBuf}; use filetime::{self, FileTime}; -use crate::archive::ArchiveInner; +use crate::archive::{ArchiveInner, SeekRead}; use crate::error::TarError; use crate::header::bytes2path; use crate::other; @@ -18,8 +18,12 @@ use crate::{Archive, Header, PaxExtensions}; /// A read-only view into an entry of an archive. /// /// This structure is a window into a portion of a borrowed archive which can -/// be inspected. It acts as a file handle by implementing the Reader trait. An +/// be inspected. It acts as a file handle by implementing the [Read] trait. An /// entry cannot be rewritten once inserted into an archive. +/// +/// Note that the [Seek] implementation for this type is only valid for values +/// obtained from [`Archive::entries_with_seek`]. Calling [Seek::seek] on a +/// value obtained otherwise will return an error. pub struct Entry<'a, R: 'a + Read> { fields: EntryFields<'a>, _ignored: marker::PhantomData<&'a Archive>, @@ -34,9 +38,12 @@ pub struct EntryFields<'a> { pub mask: u32, pub header: Header, pub size: u64, + pub real_size: u64, pub header_pos: u64, pub file_pos: u64, - pub data: Vec>, + pub data: &'a ArchiveInner, + pub data_seekable: Option<&'a ArchiveInner>, + pub cursor: EntryCursor, pub unpack_xattrs: bool, pub preserve_permissions: bool, pub preserve_ownerships: bool, @@ -44,9 +51,24 @@ pub struct EntryFields<'a> { pub overwrite: bool, } -pub enum EntryIo<'a> { - Pad(io::Take), - Data(io::Take<&'a ArchiveInner>), +#[derive(Default)] +pub struct EntryCursor { + pub pos: u64, + pub segments: Vec, + pub cur_segment: usize, +} + +pub struct EntrySegment { + pub file_off: u64, + pub start: u64, + pub end: u64, + pub kind: EntrySegmentKind, +} + +#[derive(Debug)] +pub enum EntrySegmentKind { + Pad, + Data, } /// When unpacking items the unpacked thing is returned to allow custom @@ -281,6 +303,12 @@ impl<'a, R: Read> Read for Entry<'a, R> { } } +impl<'a, R: Read + Seek> Seek for Entry<'a, R> { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + self.fields.seek(pos) + } +} + impl<'a> EntryFields<'a> { pub fn from(entry: Entry) -> EntryFields { entry.fields @@ -659,21 +687,24 @@ impl<'a> EntryFields<'a> { Err(err) } })?; - for io in self.data.drain(..) { - match io { - EntryIo::Data(mut d) => { - let expected = d.limit(); - if io::copy(&mut d, &mut f)? != expected { + for seg in &self.cursor.segments[self.cursor.cur_segment..] { + let limit = seg.end - self.cursor.pos; + match seg.kind { + EntrySegmentKind::Data => { + let mut d = (&mut self.data).take(limit); + if io::copy(&mut d, &mut f)? != limit { return Err(other("failed to write entire file")); } } - EntryIo::Pad(d) => { + EntrySegmentKind::Pad => { // TODO: checked cast to i64 - let to = SeekFrom::Current(d.limit() as i64); + let to = SeekFrom::Current(limit as i64); let size = f.seek(to)?; f.set_len(size)?; } } + self.cursor.pos += limit; + self.cursor.cur_segment += 1; } Ok(f) })() @@ -951,23 +982,81 @@ impl<'a> EntryFields<'a> { impl<'a> Read for EntryFields<'a> { fn read(&mut self, into: &mut [u8]) -> io::Result { - loop { - match self.data.get_mut(0).map(|io| io.read(into)) { - Some(Ok(0)) => { - self.data.remove(0); - } - Some(r) => return r, - None => return Ok(0), + for seg in &self.cursor.segments[self.cursor.cur_segment..] { + let limit = seg.end - self.cursor.pos; + let n_read = match seg.kind { + EntrySegmentKind::Pad => io::repeat(0).take(limit).read(into), + EntrySegmentKind::Data => self.data.take(limit).read(into), + }?; + if n_read != 0 { + self.cursor.pos += n_read as u64; + return Ok(n_read); } + self.cursor.cur_segment += 1; } + Ok(0) } } -impl<'a> Read for EntryIo<'a> { - fn read(&mut self, into: &mut [u8]) -> io::Result { - match *self { - EntryIo::Pad(ref mut io) => io.read(into), - EntryIo::Data(ref mut io) => io.read(into), +impl<'a> Seek for EntryFields<'a> { + fn seek(&mut self, pos: SeekFrom) -> io::Result { + let mut data = self.data_seekable.ok_or_else(|| { + io::Error::new( + io::ErrorKind::Other, + "seeking only supported on entries produced from Archive::entries_with_seek", + ) + })?; + + let target = match pos { + SeekFrom::Start(n) => Some(n), + SeekFrom::End(n) => self.real_size.checked_add_signed(n), + SeekFrom::Current(n) => self.cursor.pos.checked_add_signed(n), } + .ok_or_else(|| io::Error::new(io::ErrorKind::Other, "seek pos overflow"))?; + + if target == self.cursor.pos { + return Ok(self.cursor.pos); + } + + let cur_segment = self.cursor.segments.partition_point(|s| s.end <= target); + let Some(seg) = self.cursor.segments.get(cur_segment) else { + self.cursor.pos = self.real_size; + self.cursor.cur_segment = cur_segment; + return Ok(self.cursor.pos); + }; + + let pos = match seg.kind { + EntrySegmentKind::Pad => SeekFrom::Start(seg.file_off), + EntrySegmentKind::Data => SeekFrom::Start(seg.file_off + (target - seg.start)), + }; + data.seek(pos)?; + + self.cursor.pos = target; + self.cursor.cur_segment = cur_segment; + Ok(self.cursor.pos) + } +} + +impl EntryCursor { + pub fn append_segment(&mut self, kind: EntrySegmentKind, end: u64, entry_file_pos: u64) { + let (start, file_off) = match self.segments.last() { + Some(prev) => ( + prev.end, + match prev.kind { + EntrySegmentKind::Pad => prev.file_off, + EntrySegmentKind::Data => prev.file_off + (prev.end - prev.start), + }, + ), + None => (0, entry_file_pos), + }; + debug_assert!(end >= start); + + let seg = EntrySegment { + file_off: file_off, + start: start, + end: end, + kind: kind, + }; + self.segments.push(seg); } } diff --git a/tests/all.rs b/tests/all.rs index 0ad67f98..b6e5d728 100644 --- a/tests/all.rs +++ b/tests/all.rs @@ -6,7 +6,7 @@ extern crate xattr; use std::fs::{self, File}; use std::io::prelude::*; -use std::io::{self, BufWriter, Cursor}; +use std::io::{self, BufWriter, Cursor, SeekFrom}; use std::iter::repeat; use std::path::{Path, PathBuf}; @@ -264,6 +264,43 @@ fn reading_entries_with_seek() { reading_entries_common(ar.entries_with_seek().unwrap()); } +#[test] +fn seeking_entries() { + let rdr = Cursor::new(tar!("reading_files.tar")); + let mut ar = Archive::new(rdr); + let mut entries = ar.entries_with_seek().unwrap(); + + let mut a = t!(entries.next().unwrap()); + assert_eq!(&*a.header().path_bytes(), b"a"); + assert_eq!(t!(a.seek(SeekFrom::End(0))), 22); + assert_eq!(t!(a.seek(SeekFrom::Start(2))), 2); + let mut s = String::new(); + t!(a.read_to_string(&mut s)); + assert_eq!(s, "a\na\na\na\na\na\na\na\na\na\n"); + s.truncate(0); + assert!(a.seek(SeekFrom::End(-23)).is_err()); + t!(a.seek(SeekFrom::Current(-5))); + t!(a.read_to_string(&mut s)); + assert_eq!(s, "\na\na\n"); + t!(a.seek(SeekFrom::End(-10))); + + let mut b = t!(entries.next().unwrap()); + assert_eq!(&*b.header().path_bytes(), b"b"); + s.truncate(0); + t!(b.read_to_string(&mut s)); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + s.truncate(0); + t!(b.seek(SeekFrom::Start(0))); + t!(b.read_to_string(&mut s)); + assert_eq!(s, "b\nb\nb\nb\nb\nb\nb\nb\nb\nb\nb\n"); + s.truncate(0); + assert_eq!(t!(b.seek(SeekFrom::End(8))), 22); + t!(b.read_to_string(&mut s)); + assert_eq!(s, ""); + + assert!(entries.next().is_none()); +} + struct LoggingReader { inner: R, read_bytes: u64, @@ -1346,6 +1383,47 @@ fn writing_sparse() { assert!(entries.next().is_none()); } +#[test] +fn seeking_sparse() { + let rdr = Cursor::new(tar!("sparse.tar")); + let mut ar = Archive::new(rdr); + let mut entries = t!(ar.entries_with_seek()); + + let a = t!(entries.next().unwrap()); + assert_eq!(&*a.header().path_bytes(), b"sparse_begin.txt"); + + let a = t!(entries.next().unwrap()); + assert_eq!(&*a.header().path_bytes(), b"sparse_end.txt"); + + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse_ext.txt"); + t!(a.seek(SeekFrom::Start(0xa000))); + t!(a.read_to_string(&mut s)); + assert!(s[..0x1000].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x1000..], "text\n"); + s.truncate(0); + t!(a.seek(SeekFrom::Current(-(0x2000 + 3)))); + t!(a.read_to_string(&mut s)); + assert_eq!(&s[..3], "xt\n"); + assert!(s[3..0x2000 - 2].chars().all(|x| x == '\u{0}')); + assert_eq!(&s[0x2000 - 2..], "text\n"); + s.truncate(0); + t!(a.read_to_string(&mut s)); + assert_eq!(s, ""); + + let mut a = t!(entries.next().unwrap()); + let mut s = String::new(); + assert_eq!(&*a.header().path_bytes(), b"sparse.txt"); + t!(a.seek(SeekFrom::Start(0x2fa0))); + t!(a.read_to_string(&mut s)); + assert_eq!(&s[..6], "world\n"); + assert!(s[6..].chars().all(|x| x == '\u{0}')); + assert_eq!(s.len(), 0x4000 - 0x2fa0); + + assert!(entries.next().is_none()); +} + #[test] fn path_separators() { let mut ar = Builder::new(Vec::new());