Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

`tar-diff` is a golang library and set of commandline tools to diff and patch tar files.

`pkg/tar-diff` and the `tar-diff` tool take two (optionally compressed) tar files and generate a single file representing the delta between them (a tardiff file).
`pkg/tar-diff` and the `tar-diff` tool take one or more old tar files (optionally compressed) and a new tar file to generate a single file representing the delta between them (a tardiff file).

`pkg/tar-patch` takes a tardiff file and the uncompressed contents (such as an extracted directory) of the first tar file and reconstructs (binary identically) the second tar file (uncompressed).
`pkg/tar-patch` takes a tardiff file and the uncompressed contents (such as an extracted directory) of the old tar file(s) and reconstructs (binary identically) the new tar file (uncompressed).

## Example
```
Expand All @@ -15,6 +15,38 @@ $ zcat new.tar.gz | shasum
$ shasum reconstructed.tar
```

## Multi-file example

It is sometimes useful to have multiple sources for delta information, such as for example when the
sources are container image layers. In this case, you need to provide the old tar files in
the order they will be extracted when applying:

```
$ tar-diff layer1.tar layer2.tar layer3.tar new-layer.tar delta.tardiff
$ tar xf layer1.tar -C extracted/
$ tar xf layer2.tar -C extracted/
$ tar xf layer3.tar -C extracted/
$ tar-patch delta.tardiff extracted/ reconstructed.tar
```

This handles the case where a file in a later tar file overwrites another.

### Partial extraction with prefix filtering

If you only plan to extract certain directories from the old tar files on the target system,
you can use `--source-prefix` to restrict which files can be used as delta sources:

```
$ tar-diff --source-prefix=blobs/ --source-prefix=config/ old.tar new.tar delta.tardiff
$ tar xf old.tar blobs/ config/ -C extracted/
$ tar-patch delta.tardiff extracted/ reconstructed.tar
```

This ensures the delta only references files that will be available in the extracted directory.

This is particularly useful for e.g. bootc images, where only the files in the ostree repo
will be available on the system. For that case you would run tar-diff with
`--source-prefix=sysroot/ostree/repo/objects/`

## Build requirements

Expand All @@ -40,4 +72,4 @@ The `tar-diff` file format is described in [file-format.md](file-format.md).
## License

`tar-diff` is licensed under the Apache License, Version 2.0. See
[LICENSE](LICENSE) for the full license text.
[LICENSE](LICENSE) for the full license text.
71 changes: 56 additions & 15 deletions cmd/tar-diff/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main
import (
"flag"
"fmt"
"io"
"log"
"os"
"path"
Expand All @@ -12,14 +13,33 @@ import (
tardiff "github.com/containers/tar-diff/pkg/tar-diff"
)

type prefixList []string

func (p *prefixList) String() string {
return fmt.Sprintf("%v", *p)
}

func (p *prefixList) Set(value string) error {
*p = append(*p, value)
return nil
}

var version = flag.Bool("version", false, "Show version")
var compressionLevel = flag.Int("compression-level", 3, "zstd compression level")
var maxBsdiffSize = flag.Int("max-bsdiff-size", 192, "Max file size in megabytes to consider using bsdiff, or 0 for no limit")
var sourcePrefixes prefixList

func main() {
func closeAndWarn(file *os.File) {
if err := file.Close(); err != nil {
log.Printf("Failed to close file: %v", err)
}
}

func realMain() int {
flag.Var(&sourcePrefixes, "source-prefix", "Only use source files with this path prefix for delta (can be specified multiple times)")

flag.Usage = func() {
_, _ = fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [OPTION] old.tar.gz new.tar.gz result.tardiff\n", path.Base(os.Args[0]))
_, _ = fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [OPTION] old1.tar.gz [old2.tar.gz ...] new.tar.gz result.tardiff\n", path.Base(os.Args[0]))
_, _ = fmt.Fprintf(flag.CommandLine.Output(), "Options:\n")
flag.PrintDefaults()
}
Expand All @@ -28,40 +48,61 @@ func main() {

if *version {
fmt.Printf("%s %s\n", path.Base(os.Args[0]), protocol.VERSION)
return
return 0
}

if flag.NArg() != 3 {
if flag.NArg() < 3 {
flag.Usage()
os.Exit(1)
return 1
}

oldFilename := flag.Arg(0)
newFilename := flag.Arg(1)
deltaFilename := flag.Arg(2)
args := flag.Args()
numOldFiles := len(args) - 2
oldFilenames := args[0:numOldFiles]
newFilename := args[numOldFiles]
deltaFilename := args[numOldFiles+1]

oldFile, err := os.Open(oldFilename)
if err != nil {
log.Fatalf("Error: %s", err)
oldFiles := make([]io.ReadSeeker, numOldFiles)
for i, oldFilename := range oldFilenames {
file, err := os.Open(oldFilename)
if err != nil {
log.Printf("Error: %s", err)
return 1
}
defer closeAndWarn(file)
oldFiles[i] = file
}

newFile, err := os.Open(newFilename)
if err != nil {
log.Fatalf("Error: %s", err)
log.Printf("Error: %s", err)
return 1
}
defer closeAndWarn(newFile)

deltaFile, err := os.Create(deltaFilename)
if err != nil {
log.Fatalf("Error: %s", err)
log.Printf("Error: %s", err)
return 1
}
defer closeAndWarn(deltaFile)

options := tardiff.NewOptions()
options.SetCompressionLevel(*compressionLevel)
options.SetMaxBsdiffFileSize(int64(*maxBsdiffSize) * 1024 * 1024)
if len(sourcePrefixes) > 0 {
options.SetSourcePrefixes(sourcePrefixes)
}

err = tardiff.Diff(oldFile, newFile, deltaFile, options)
err = tardiff.Diff(oldFiles, newFile, deltaFile, options)
if err != nil {
log.Fatalf("Error: %s", err)
log.Printf("Error: %s", err)
return 1
}
return 0
}

// We wrap a function the has a return value so we can safely use defer
func main() {
os.Exit(realMain())
}
Loading
Loading