Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to preserve file attributes #106

Open
wants to merge 49 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
8dbb570
invented option to preserve filemode
NodyHub Dec 5, 2024
49b513b
added support for updating ownership, file permissions and change time
NodyHub Dec 9, 2024
1605a68
code clean up
NodyHub Dec 9, 2024
0405d3f
Further adjustments
NodyHub Dec 9, 2024
576854a
adjusted cli help output
NodyHub Dec 9, 2024
2160ffe
adjusted build tag specification
NodyHub Dec 10, 2024
4c99ad2
corrected mod time adjustment
NodyHub Dec 10, 2024
fd3a385
removed code comment and fixed timestamp of archive file
NodyHub Dec 10, 2024
1ded861
rely on built-in unix.Timeval conversion
samsalisbury Dec 10, 2024
593b990
Merge pull request #109 from hashicorp/simplify-unix-time-handling
NodyHub Dec 10, 2024
03a7cb6
unexpose internal functions
NodyHub Dec 10, 2024
fb7132a
Update target_memory_test.go
NodyHub Dec 11, 2024
f9b2710
check for differences >= 1 micro second
NodyHub Dec 11, 2024
9e69c9d
simplified test print statement
NodyHub Dec 11, 2024
7fa9d28
invent testing as root to adjust ownership
NodyHub Dec 11, 2024
8498e88
transformed canMaintainSymlinkTimestamps into a function and relocate…
NodyHub Dec 11, 2024
900c4d0
skip sudo test on windows
NodyHub Dec 11, 2024
96659b8
added some error handling
NodyHub Dec 11, 2024
f4bfff1
adjusted comment that rar does not support uig/gid
NodyHub Dec 16, 2024
e9c3941
located chown in platform specific implementation
NodyHub Dec 16, 2024
9ab3635
marked in test case that rar does not support file ownership
NodyHub Dec 16, 2024
00f38eb
separated ownership preservation in an own flag
NodyHub Dec 16, 2024
cb45ca4
relocated test data
NodyHub Dec 16, 2024
2e7c380
added copyright header
NodyHub Dec 16, 2024
dacbd03
updated comments to remark that ownership is only transported by tar …
NodyHub Dec 16, 2024
3fb2d38
verified overwrite behaviour for double-symlinks in archive
NodyHub Dec 16, 2024
668eb6b
adjusted error handling and wrapping
NodyHub Dec 17, 2024
e1ce17d
Return current Uid/Gid if archive does not support carriage of owners…
NodyHub Dec 17, 2024
03a9e84
inverted logic to preserve file attributes (mod/access time and file …
NodyHub Dec 17, 2024
f5b7936
removed windows specifc code
NodyHub Dec 17, 2024
ccf382e
adjusted test cases file split
NodyHub Dec 17, 2024
111a61f
adjusted test output log
NodyHub Dec 17, 2024
b4c67e9
drop write permissions on directory if no access is granted
NodyHub Dec 17, 2024
7671e15
drop permission check for directories on windows, bc/ they create uns…
NodyHub Dec 17, 2024
879edcd
updated test case comment
NodyHub Dec 17, 2024
450aba3
clarified test in comment
NodyHub Dec 17, 2024
3c7c310
updated readme
NodyHub Dec 17, 2024
5ff6279
renamed config.noPreserveFileAttributes to config.dropFileAttributes
NodyHub Dec 18, 2024
668a07e
Update target_disk_unix.go
NodyHub Dec 18, 2024
49171eb
fixed import
NodyHub Dec 18, 2024
9d10a84
adjusted short version of flag PreserveOwner from -o to -p , bc/ it i…
NodyHub Dec 18, 2024
5580e05
adjusted comments
NodyHub Dec 18, 2024
bf3804a
removed unused field from test
NodyHub Jan 6, 2025
f59ea35
Update unpack_unix_test.go
NodyHub Jan 8, 2025
6c8d47a
dropped uid/gid from upper test case level and aligned the test case
NodyHub Jan 8, 2025
edfd38f
simplified chown check
NodyHub Jan 8, 2025
064184a
adjusted the current-user-tar test-case to invalid-uid-tar so that ev…
NodyHub Jan 8, 2025
45cb851
adjusted test for invalid uid/gid
NodyHub Jan 8, 2025
5c74796
adjusted uid casting
NodyHub Jan 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions 7zip.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"io"
"io/fs"
"os"
"time"

"github.com/bodgit/sevenzip"
)
Expand Down Expand Up @@ -122,9 +123,14 @@ func (z *sevenZipEntry) Mode() os.FileMode {
}

// Linkname returns the linkname of the 7zip entry
// Remark: 7zip does not support symlinks
func (z *sevenZipEntry) Linkname() string {
return ""
if !z.IsSymlink() {
return ""
}
f, _ := z.f.Open()
defer f.Close()
c, _ := io.ReadAll(f)
return string(c)
}

// IsRegular returns true if the 7zip entry is a regular file
Expand All @@ -140,7 +146,7 @@ func (z *sevenZipEntry) IsDir() bool {
// IsSymlink returns true if the 7zip entry is a symlink
// Remark: 7zip does not support symlinks
func (z *sevenZipEntry) IsSymlink() bool {
return false
return (z.f.FileInfo().Mode()&os.ModeSymlink != 0)
}

// Open returns a reader for the 7zip entry
Expand All @@ -152,3 +158,29 @@ func (z *sevenZipEntry) Open() (io.ReadCloser, error) {
func (z *sevenZipEntry) Type() fs.FileMode {
return fs.FileMode(z.f.FileInfo().Mode())
}

// AccessTime returns the access time of the 7zip entry
func (z *sevenZipEntry) AccessTime() time.Time {
return z.f.Accessed
}

// ModTime returns the modification time of the 7zip entry
func (z *sevenZipEntry) ModTime() time.Time {
return z.f.Modified
}

// Sys returns the system information of the 7zip entry
func (z *sevenZipEntry) Sys() interface{} {
return z.f.FileInfo().Sys()
}

// Gid returns the group ID of the 7zip entry
func (z *sevenZipEntry) Gid() int {
return os.Getgid()
}

// Uid returns the user ID of the 7zip entry
func (z *sevenZipEntry) Uid() int {
// get current uid
return os.Getuid()
}
87 changes: 44 additions & 43 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,18 @@ go install github.com/hashicorp/go-extract/cmd/goextract@latest

These examples demonstrate how to use [hashicorp/go-extract](https://github.com/hashicorp/go-extract) both as a library and as a command-line utility.

### Library

The simplest way to use the library is to call the `extract.Unpack` function with the default configuration. This function extracts the contents from an `io.Reader` to the specified destination on the local filesystem.

```go
// Unpack the archive
if err := extract.Unpack(ctx, dst, archive, config.NewConfig()); err != nil {
// Handle error
log.Fatalf("Failed to unpack archive: %v", err)
}
```

### Command-line Utility

The `goextract` command-line utility offers all available configuration options via dedicated flags.
Expand All @@ -57,54 +69,43 @@ Flags:
--custom-decompress-file-mode=640 File mode for decompressed files. (respecting umask)
-D, --deny-symlinks Deny symlink extraction.
--insecure-traverse-symlinks Traverse symlinks to directories during extraction.
--max-files=1000 Maximum files (including folder and symlinks) that are extracted before stop. (disable check: -1)
--max-files=100000 Maximum files (including folder and symlinks) that are extracted before stop. (disable check: -1)
--max-extraction-size=1073741824 Maximum extraction size that allowed is (in bytes). (disable check: -1)
--max-extraction-time=60 Maximum time that an extraction should take (in seconds). (disable check: -1)
--max-input-size=1073741824 Maximum input size that allowed is (in bytes). (disable check: -1)
-N, --no-untar-after-decompression Disable combined extraction of tar.gz.
-O, --overwrite Overwrite if exist.
-P, --pattern=PATTERN,... Extracted objects need to match shell file name pattern.
-p, --preserve-file-attributes Preserve file attributes from archive (access and modification time, file permissions and owner/group).
-T, --telemetry Print telemetry data to log after extraction.
-t, --type="" Type of archive. (7z, br, bz2, gz, lz4, rar, sz, tar, tgz, xz, zip, zst, zz)
-v, --verbose Verbose logging.
-V, --version Print release version information.
```

### Library

The simplest way to use the library is to call the `extract.Unpack` function with the default configuration. This function extracts the contents from an `io.Reader` to the specified destination on the local filesystem.

```go
// Unpack the archive
if err := extract.Unpack(ctx, dst, archive, config.NewConfig()); err != nil {
// Handle error
log.Fatalf("Failed to unpack archive: %v", err)
}
```

## Configuration

When calling the `extract.Unpack(..)` function, we need to provide `config` object that contains all available configuration.

```golang
// process cli params
cfg := config.NewConfig(
config.WithContinueOnError(..),
config.WithContinueOnUnsupportedFiles(..),
config.WithCreateDestination(..),
config.WithCustomCreateDirMode(..),
config.WithCustomDecompressFileMode(..),
config.WithDenySymlinkExtraction(..),
config.WithExtractType(..),
config.WithTraverseSymlinks(..),
config.WithLogger(..),
config.WithMaxExtractionSize(..),
config.WithMaxFiles(..),
config.WithMaxInputSize(..),
config.WithNoUntarAfterDecompression(..),
config.WithOverwrite(..),
config.WithPatterns(..),
config.WithTelemetryHook(..),
cfg := extract.NewConfig(
extract.WithContinueOnError(..),
extract.WithContinueOnUnsupportedFiles(..),
extract.WithCreateDestination(..),
extract.WithCustomCreateDirMode(..),
extract.WithCustomDecompressFileMode(..),
extract.WithDenySymlinkExtraction(..),
extract.WithExtractType(..),
extract.WithInsecureTraverseSymlinks(..),
extract.WithLogger(..),
extract.WithMaxExtractionSize(..),
extract.WithMaxFiles(..),
extract.WithMaxInputSize(..),
extract.WithNoUntarAfterDecompression(..),
extract.WithOverwrite(..),
extract.WithPatterns(..),
extract.WithPreserveFileAttributes(..),
extract.WithTelemetryHook(..),
)

[..]
Expand Down Expand Up @@ -132,18 +133,18 @@ Here is an example collected telemetry data for the extraction of [`terraform-aw

```json
{
"LastExtractionError": "",
"ExtractedDirs": 51,
"ExtractionDuration": 48598584,
"ExtractionErrors": 0,
"ExtractedFiles": 241,
"ExtractionSize": 539085,
"ExtractedSymlinks": 0,
"ExtractedType": "tar+gzip",
"InputSize": 81477,
"PatternMismatches": 0,
"UnsupportedFiles": 0,
"LastUnsupportedFile": ""
"last_extraction_error": "",
"extracted_dirs": 51,
"extraction_duration": 55025584,
"extraction_errors": 0,
"extracted_files": 241,
"extraction_size": 539085,
"extracted_symlinks": 0,
"extracted_type": "tar.gz",
"input_size": 81477,
"pattern_mismatches": 0,
"unsupported_files": 0,
"last_unsupported_file": ""
}
```

Expand Down
18 changes: 12 additions & 6 deletions archive_walker.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package extract
import (
"io"
"io/fs"
"time"
)

// archiveWalker is an interface that represents a file walker in an archive
Expand All @@ -16,13 +17,18 @@ type archiveWalker interface {

// archiveEntry is an interface that represents a file in an archive
type archiveEntry interface {
Mode() fs.FileMode
Type() fs.FileMode
Name() string
Linkname() string
Size() int64
Open() (io.ReadCloser, error)
AccessTime() time.Time
Gid() int
IsRegular() bool
IsDir() bool
IsSymlink() bool
Linkname() string
Mode() fs.FileMode
ModTime() time.Time
Name() string
Open() (io.ReadCloser, error)
Size() int64
Sys() interface{}
Type() fs.FileMode
Uid() int
}
2 changes: 2 additions & 0 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type CLI struct {
NoUntarAfterDecompression bool `short:"N" optional:"" default:"false" help:"Disable combined extraction of tar.gz."`
Overwrite bool `short:"O" help:"Overwrite if exist."`
Pattern []string `short:"P" optional:"" name:"pattern" help:"Extracted objects need to match shell file name pattern."`
PreserveFileAttributes bool `short:"p" help:"Preserve file attributes from archive (access and modification time, file permissions and owner/group)."`
Telemetry bool `short:"T" optional:"" default:"false" help:"Print telemetry data to log after extraction."`
Type string `short:"t" optional:"" default:"${default_type}" name:"type" help:"Type of archive. (${valid_types})"`
Verbose bool `short:"v" optional:"" help:"Verbose logging."`
Expand Down Expand Up @@ -97,6 +98,7 @@ func Run(version, commit, date string) {
extract.WithNoUntarAfterDecompression(cli.NoUntarAfterDecompression),
extract.WithOverwrite(cli.Overwrite),
extract.WithPatterns(cli.Pattern...),
extract.WithPreserveFileAttributes(cli.PreserveFileAttributes),
extract.WithTelemetryHook(telemetryDataToLog),
)

Expand Down
19 changes: 18 additions & 1 deletion config.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ type Config struct {

// patterns is a list of file patterns to match files to extract
patterns []string

// preserveFileAttributes is a flag to preserve the file attributes of the extracted files
preserveFileAttributes bool
}

// ContinueOnError returns true if the extraction should continue on error.
Expand Down Expand Up @@ -201,6 +204,11 @@ func (c *Config) Patterns() []string {
return c.patterns
}

// PreserveFileAttributes returns true if the file attributes of the extracted files should be preserved.
func (c *Config) PreserveFileAttributes() bool {
return c.preserveFileAttributes
}

// SetNoUntarAfterDecompression sets the noUntarAfterDecompression flag. If true, tar.gz files
// are not untared after decompression.
func (c *Config) SetNoUntarAfterDecompression(b bool) {
Expand Down Expand Up @@ -231,6 +239,7 @@ const (
defaultMaxInputSize = 1 << (10 * 3) // 1 Gb
defaultNoUntarAfterDecompression = false // untar after decompression
defaultOverwrite = false // do not overwrite existing files
defaultPreserveFileAttributes = false // do not preserve file attributes
defaultTraverseSymlinks = false // do not traverse symlinks

)
Expand All @@ -252,6 +261,7 @@ func NewConfig(opts ...ConfigOption) *Config {
config := &Config{
cacheInMemory: defaultCacheInMemory,
continueOnError: defaultContinueOnError,
continueOnUnsupportedFiles: defaultContinueOnUnsupportedFiles,
createDestination: defaultCreateDestination,
customCreateDirMode: defaultCustomCreateDirMode,
customDecompressFileMode: defaultCustomDecompressFileMode,
Expand All @@ -265,7 +275,7 @@ func NewConfig(opts ...ConfigOption) *Config {
telemetryHook: defaultTelemetryHook,
traverseSymlinks: defaultTraverseSymlinks,
noUntarAfterDecompression: defaultNoUntarAfterDecompression,
continueOnUnsupportedFiles: defaultContinueOnUnsupportedFiles,
preserveFileAttributes: defaultPreserveFileAttributes,
}

// Loop through each option
Expand Down Expand Up @@ -405,6 +415,13 @@ func WithPatterns(pattern ...string) ConfigOption {
}
}

// WithPreserveFileAttributes options pattern function to preserve the file attributes of the extracted files.
func WithPreserveFileAttributes(preserve bool) ConfigOption {
return func(c *Config) {
c.preserveFileAttributes = preserve
}
}

// WithTelemetryHook options pattern function to set a [telemetry.TelemetryHook], which is called after extraction.
func WithTelemetryHook(hook TelemetryHook) ConfigOption {
return func(c *Config) {
Expand Down
53 changes: 50 additions & 3 deletions extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,27 @@ func extract(ctx context.Context, t Target, dst string, src archiveWalker, cfg *
var fileCounter int64
var extractionSize int64

// check if attributes should be preserved, but as non-root user
if _, ok := t.(*TargetDisk); ok {
if cfg.PreserveFileAttributes() && os.Geteuid() != 0 {
cfg.Logger().Warn("cannot fully preserve file attributes as non-root user: cannot set file ownership", "uid", os.Geteuid())
NodyHub marked this conversation as resolved.
Show resolved Hide resolved
}
}

// set attributes after all modification are done to ensure that
// the timestamps are set correctly
var extractedEntries []archiveEntry
if cfg.PreserveFileAttributes() {
defer func() {
for _, ae := range extractedEntries {
path := filepath.Join(dst, ae.Name())
if err := setFileAttributes(t, path, ae); err != nil {
cfg.Logger().Error("failed to set file attributes", "path", path, "error", err)
}
}
}()
}

for {
// check if context is canceled
if ctx.Err() != nil {
Expand Down Expand Up @@ -332,10 +353,12 @@ func extract(ctx context.Context, t Target, dst string, src archiveWalker, cfg *
// do not end on error
continue
}
if cfg.PreserveFileAttributes() {
extractedEntries = append(extractedEntries, ae)
}
samsalisbury marked this conversation as resolved.
Show resolved Hide resolved

// store telemetry and continue
td.ExtractedDirs++
continue

// if it's a file create it
case ae.IsRegular():
Expand Down Expand Up @@ -373,8 +396,10 @@ func extract(ctx context.Context, t Target, dst string, src archiveWalker, cfg *
// store telemetry
if fileCreated {
td.ExtractedFiles++
if cfg.PreserveFileAttributes() {
extractedEntries = append(extractedEntries, ae)
}
}
continue

// its a symlink !!
case ae.IsSymlink():
Expand Down Expand Up @@ -402,10 +427,12 @@ func extract(ctx context.Context, t Target, dst string, src archiveWalker, cfg *
// do not end on error
continue
}
if cfg.PreserveFileAttributes() {
extractedEntries = append(extractedEntries, ae)
}

// store telemetry and continue
td.ExtractedSymlinks++
continue

default:

Expand All @@ -426,6 +453,26 @@ func extract(ctx context.Context, t Target, dst string, src archiveWalker, cfg *
}
}

// setFileAttributes sets the file attributes for the given path and archive entry.
func setFileAttributes(t Target, path string, ae archiveEntry) error {
if ae.IsSymlink() { // only time attributes are supported for symlinks
if err := t.Lchtimes(path, ae.AccessTime(), ae.ModTime()); err != nil {
return fmt.Errorf("failed to lchtimes symlink: %w", err)
}
return nil
}
if err := t.Chown(path, ae.Uid(), ae.Gid()); err != nil {
return fmt.Errorf("failed to chown file: %w", err)
}
if err := t.Chmod(path, ae.Mode().Perm()); err != nil {
return fmt.Errorf("failed to chmod file: %w", err)
}
if err := t.Chtimes(path, ae.AccessTime(), ae.ModTime()); err != nil {
return fmt.Errorf("failed to chtimes file: %w", err)
}
return nil
}

// readerToReaderAtSeeker converts an io.Reader to an io.ReaderAt and io.Seeker
func readerToReaderAtSeeker(c *Config, r io.Reader) (seekerReaderAt, error) {
if s, ok := r.(seekerReaderAt); ok {
Expand Down
Loading
Loading