diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 53fb8214..a859119d 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -30,6 +30,14 @@ jobs: run: | make test + - name: Test (as root) + if: matrix.os != 'windows-latest' + id: sudo-test + run: | + make sudo_test + + + fuzzing: strategy: diff --git a/7zip.go b/7zip.go index 2d295c6d..7c1455f1 100644 --- a/7zip.go +++ b/7zip.go @@ -9,6 +9,7 @@ import ( "io" "io/fs" "os" + "time" "github.com/bodgit/sevenzip" ) @@ -122,9 +123,20 @@ func (z *sevenZipEntry) Mode() os.FileMode { } // Linkname returns the linkname of the 7zip entry -// Remark: 7zip does not support symlinks func (z *sevenZipEntry) Linkname() string { - return "" + if !z.IsSymlink() { + return "" + } + f, err := z.f.Open() + if err != nil { + return "" + } + defer f.Close() + c, err := io.ReadAll(f) + if err != nil { + return "" + } + return string(c) } // IsRegular returns true if the 7zip entry is a regular file @@ -140,7 +152,7 @@ func (z *sevenZipEntry) IsDir() bool { // IsSymlink returns true if the 7zip entry is a symlink // Remark: 7zip does not support symlinks func (z *sevenZipEntry) IsSymlink() bool { - return false + return (z.f.FileInfo().Mode()&os.ModeSymlink != 0) } // Open returns a reader for the 7zip entry @@ -152,3 +164,30 @@ func (z *sevenZipEntry) Open() (io.ReadCloser, error) { func (z *sevenZipEntry) Type() fs.FileMode { return fs.FileMode(z.f.FileInfo().Mode()) } + +// AccessTime returns the access time of the 7zip entry +func (z *sevenZipEntry) AccessTime() time.Time { + return z.f.Accessed +} + +// ModTime returns the modification time of the 7zip entry +func (z *sevenZipEntry) ModTime() time.Time { + return z.f.Modified +} + +// Sys returns the system information of the 7zip entry +func (z *sevenZipEntry) Sys() interface{} { + return z.f.FileInfo().Sys() +} + +// Gid is not supported for 7zip files. The used library does not provide +// this information. The function returns the group ID of the current process. +func (z *sevenZipEntry) Gid() int { + return os.Getegid() +} + +// Uid is not supported for 7zip files. The used library does not provide +// this information. The function returns the user ID of the current process. +func (z *sevenZipEntry) Uid() int { + return os.Getuid() +} diff --git a/Makefile b/Makefile index d506cc78..da562541 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,11 @@ clean: test: go test ./... +sudo_test: + sudo -E go test ./... + + + test_coverage: go test ./... -coverprofile=coverage.out diff --git a/README.md b/README.md index 253da28b..6aea05f8 100644 --- a/README.md +++ b/README.md @@ -34,6 +34,18 @@ go install github.com/hashicorp/go-extract/cmd/goextract@latest These examples demonstrate how to use [hashicorp/go-extract](https://github.com/hashicorp/go-extract) both as a library and as a command-line utility. +### Library + +The simplest way to use the library is to call the `extract.Unpack` function with the default configuration. This function extracts the contents from an `io.Reader` to the specified destination on the local filesystem. + +```go +// Unpack the archive +if err := extract.Unpack(ctx, dst, archive, config.NewConfig()); err != nil { + // Handle error + log.Fatalf("Failed to unpack archive: %v", err) +} +``` + ### Command-line Utility The `goextract` command-line utility offers all available configuration options via dedicated flags. @@ -56,55 +68,46 @@ Flags: --custom-create-dir-mode=750 File mode for created directories, which are not listed in the archive. (respecting umask) --custom-decompress-file-mode=640 File mode for decompressed files. (respecting umask) -D, --deny-symlinks Deny symlink extraction. + -d, --drop-file-attributes Drop file attributes (mode, modtime, access time). --insecure-traverse-symlinks Traverse symlinks to directories during extraction. - --max-files=1000 Maximum files (including folder and symlinks) that are extracted before stop. (disable check: -1) + --max-files=100000 Maximum files (including folder and symlinks) that are extracted before stop. (disable check: -1) --max-extraction-size=1073741824 Maximum extraction size that allowed is (in bytes). (disable check: -1) --max-extraction-time=60 Maximum time that an extraction should take (in seconds). (disable check: -1) --max-input-size=1073741824 Maximum input size that allowed is (in bytes). (disable check: -1) -N, --no-untar-after-decompression Disable combined extraction of tar.gz. -O, --overwrite Overwrite if exist. -P, --pattern=PATTERN,... Extracted objects need to match shell file name pattern. + -p, --preserve-owner Preserve owner and group of files from archive (only root/uid:0 on unix systems for tar files). -T, --telemetry Print telemetry data to log after extraction. -t, --type="" Type of archive. (7z, br, bz2, gz, lz4, rar, sz, tar, tgz, xz, zip, zst, zz) -v, --verbose Verbose logging. -V, --version Print release version information. ``` -### Library - -The simplest way to use the library is to call the `extract.Unpack` function with the default configuration. This function extracts the contents from an `io.Reader` to the specified destination on the local filesystem. - -```go -// Unpack the archive -if err := extract.Unpack(ctx, dst, archive, config.NewConfig()); err != nil { - // Handle error - log.Fatalf("Failed to unpack archive: %v", err) -} -``` - ## Configuration When calling the `extract.Unpack(..)` function, we need to provide `config` object that contains all available configuration. ```golang - // process cli params - cfg := config.NewConfig( - config.WithContinueOnError(..), - config.WithContinueOnUnsupportedFiles(..), - config.WithCreateDestination(..), - config.WithCustomCreateDirMode(..), - config.WithCustomDecompressFileMode(..), - config.WithDenySymlinkExtraction(..), - config.WithExtractType(..), - config.WithTraverseSymlinks(..), - config.WithLogger(..), - config.WithMaxExtractionSize(..), - config.WithMaxFiles(..), - config.WithMaxInputSize(..), - config.WithNoUntarAfterDecompression(..), - config.WithOverwrite(..), - config.WithPatterns(..), - config.WithTelemetryHook(..), + cfg := extract.NewConfig( + extract.WithContinueOnError(..), + extract.WithContinueOnUnsupportedFiles(..), + extract.WithCreateDestination(..), + extract.WithCustomCreateDirMode(..), + extract.WithCustomDecompressFileMode(..), + extract.WithDenySymlinkExtraction(..), + extract.WithDropFileAttributes(..), + extract.WithExtractType(..), + extract.WithInsecureTraverseSymlinks(..), + extract.WithLogger(..), + extract.WithMaxExtractionSize(..), + extract.WithMaxFiles(..), + extract.WithMaxInputSize(..), + extract.WithNoUntarAfterDecompression(..), + extract.WithOverwrite(..), + extract.WithPatterns(..), + extract.WithPreserveOwner(..), + extract.WithTelemetryHook(..), ) [..] @@ -132,18 +135,18 @@ Here is an example collected telemetry data for the extraction of [`terraform-aw ```json { - "LastExtractionError": "", - "ExtractedDirs": 51, - "ExtractionDuration": 48598584, - "ExtractionErrors": 0, - "ExtractedFiles": 241, - "ExtractionSize": 539085, - "ExtractedSymlinks": 0, - "ExtractedType": "tar+gzip", - "InputSize": 81477, - "PatternMismatches": 0, - "UnsupportedFiles": 0, - "LastUnsupportedFile": "" + "last_extraction_error": "", + "extracted_dirs": 51, + "extraction_duration": 55025584, + "extraction_errors": 0, + "extracted_files": 241, + "extraction_size": 539085, + "extracted_symlinks": 0, + "extracted_type": "tar.gz", + "input_size": 81477, + "pattern_mismatches": 0, + "unsupported_files": 0, + "last_unsupported_file": "" } ``` diff --git a/archive_walker.go b/archive_walker.go index 8948e260..b65de83c 100644 --- a/archive_walker.go +++ b/archive_walker.go @@ -6,6 +6,7 @@ package extract import ( "io" "io/fs" + "time" ) // archiveWalker is an interface that represents a file walker in an archive @@ -16,13 +17,18 @@ type archiveWalker interface { // archiveEntry is an interface that represents a file in an archive type archiveEntry interface { - Mode() fs.FileMode - Type() fs.FileMode - Name() string - Linkname() string - Size() int64 - Open() (io.ReadCloser, error) + AccessTime() time.Time + Gid() int IsRegular() bool IsDir() bool IsSymlink() bool + Linkname() string + Mode() fs.FileMode + ModTime() time.Time + Name() string + Open() (io.ReadCloser, error) + Size() int64 + Sys() interface{} + Type() fs.FileMode + Uid() int } diff --git a/cmd/run.go b/cmd/run.go index cf93b199..88a3968d 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -30,6 +30,7 @@ type CLI struct { CustomDecompressFileMode int `optional:"" default:"640" help:"File mode for decompressed files. (respecting umask)"` DenySymlinks bool `short:"D" help:"Deny symlink extraction."` Destination string `arg:"" name:"destination" default:"." help:"Output directory/file."` + DropFileAttributes bool `short:"d" help:"Drop file attributes (mode, modtime, access time)."` InsecureTraverseSymlinks bool `help:"Traverse symlinks to directories during extraction."` MaxFiles int64 `optional:"" default:"${default_max_files}" help:"Maximum files (including folder and symlinks) that are extracted before stop. (disable check: -1)"` MaxExtractionSize int64 `optional:"" default:"${default_max_extraction_size}" help:"Maximum extraction size that allowed is (in bytes). (disable check: -1)"` @@ -37,7 +38,8 @@ type CLI struct { MaxInputSize int64 `optional:"" default:"${default_max_input_size}" help:"Maximum input size that allowed is (in bytes). (disable check: -1)"` NoUntarAfterDecompression bool `short:"N" optional:"" default:"false" help:"Disable combined extraction of tar.gz."` Overwrite bool `short:"O" help:"Overwrite if exist."` - Pattern []string `short:"P" optional:"" name:"pattern" help:"Extracted objects need to match shell file name pattern."` + Pattern []string `optional:"" short:"P" name:"pattern" help:"Extracted objects need to match shell file name pattern."` + PreserveOwner bool `short:"p" help:"Preserve owner and group of files from archive (only root/uid:0 on unix systems for tar files)."` Telemetry bool `short:"T" optional:"" default:"false" help:"Print telemetry data to log after extraction."` Type string `short:"t" optional:"" default:"${default_type}" name:"type" help:"Type of archive. (${valid_types})"` Verbose bool `short:"v" optional:"" help:"Verbose logging."` @@ -94,9 +96,11 @@ func Run(version, commit, date string) { extract.WithMaxExtractionSize(cli.MaxExtractionSize), extract.WithMaxFiles(cli.MaxFiles), extract.WithMaxInputSize(cli.MaxInputSize), + extract.WithDropFileAttributes(cli.DropFileAttributes), extract.WithNoUntarAfterDecompression(cli.NoUntarAfterDecompression), extract.WithOverwrite(cli.Overwrite), extract.WithPatterns(cli.Pattern...), + extract.WithPreserveOwner(cli.PreserveOwner), extract.WithTelemetryHook(telemetryDataToLog), ) diff --git a/config.go b/config.go index fd607bab..2ff0456b 100644 --- a/config.go +++ b/config.go @@ -43,6 +43,9 @@ type Config struct { // denySymlinkExtraction offers the option to enable/disable the extraction of symlinks denySymlinkExtraction bool + // dropFileAttributes is a flag drop the file attributes of the extracted files + dropFileAttributes bool + // extractionType is the type of extraction algorithm extractionType string @@ -76,6 +79,9 @@ type Config struct { // patterns is a list of file patterns to match files to extract patterns []string + + // preserveOwner is a flag to preserve the owner of the extracted files + preserveOwner bool } // ContinueOnError returns true if the extraction should continue on error. @@ -155,6 +161,11 @@ func (c *Config) DenySymlinkExtraction() bool { return c.denySymlinkExtraction } +// DropFileAttributes returns true if the file attributes should be dropped. +func (c *Config) DropFileAttributes() bool { + return c.dropFileAttributes +} + // ExtractType returns the specified extraction type. func (c *Config) ExtractType() string { return c.extractionType @@ -201,6 +212,13 @@ func (c *Config) Patterns() []string { return c.patterns } +// PreserveOwner returns true if the owner of the extracted files should +// be preserved. This option is only available on Unix systems requiring +// root privileges and tar archives as input. +func (c *Config) PreserveOwner() bool { + return c.preserveOwner +} + // SetNoUntarAfterDecompression sets the noUntarAfterDecompression flag. If true, tar.gz files // are not untared after decompression. func (c *Config) SetNoUntarAfterDecompression(b bool) { @@ -221,17 +239,19 @@ const ( defaultCacheInMemory = false // cache on disk defaultContinueOnError = false // stop on error and return error defaultContinueOnUnsupportedFiles = false // stop on unsupported files and return error - defaultCreateDestination = false // do not create destination directory + defaultCreateDestination = false // don't create destination directory defaultCustomCreateDirMode = 0750 // default directory permissions rwxr-x--- defaultCustomDecompressFileMode = 0640 // default decompression permissions rw-r----- defaultDenySymlinkExtraction = false // allow symlink extraction - defaultExtractionType = "" // do not limit extraction type + defaultDropFileAttributes = false // drop file attributes from archive + defaultExtractionType = "" // don't limit extraction type defaultMaxFiles = 100000 // 100k files defaultMaxExtractionSize = 1 << (10 * 3) // 1 Gb defaultMaxInputSize = 1 << (10 * 3) // 1 Gb defaultNoUntarAfterDecompression = false // untar after decompression - defaultOverwrite = false // do not overwrite existing files - defaultTraverseSymlinks = false // do not traverse symlinks + defaultOverwrite = false // don't overwrite existing files + defaultPreserveOwner = false // don't preserve owner + defaultTraverseSymlinks = false // don't traverse symlinks ) @@ -252,10 +272,12 @@ func NewConfig(opts ...ConfigOption) *Config { config := &Config{ cacheInMemory: defaultCacheInMemory, continueOnError: defaultContinueOnError, + continueOnUnsupportedFiles: defaultContinueOnUnsupportedFiles, createDestination: defaultCreateDestination, customCreateDirMode: defaultCustomCreateDirMode, customDecompressFileMode: defaultCustomDecompressFileMode, denySymlinkExtraction: defaultDenySymlinkExtraction, + dropFileAttributes: defaultDropFileAttributes, extractionType: defaultExtractionType, logger: defaultLogger, maxFiles: defaultMaxFiles, @@ -265,7 +287,7 @@ func NewConfig(opts ...ConfigOption) *Config { telemetryHook: defaultTelemetryHook, traverseSymlinks: defaultTraverseSymlinks, noUntarAfterDecompression: defaultNoUntarAfterDecompression, - continueOnUnsupportedFiles: defaultContinueOnUnsupportedFiles, + preserveOwner: defaultPreserveOwner, } // Loop through each option @@ -336,6 +358,14 @@ func WithDenySymlinkExtraction(deny bool) ConfigOption { } } +// WithDropFileAttributes options pattern function to drop the +// file attributes of the extracted files. +func WithDropFileAttributes(drop bool) ConfigOption { + return func(c *Config) { + c.dropFileAttributes = drop + } +} + // WithExtractType options pattern function to set the extraction type in the [Config]. func WithExtractType(extractionType string) ConfigOption { return func(c *Config) { @@ -405,6 +435,15 @@ func WithPatterns(pattern ...string) ConfigOption { } } +// WithPreserveOwner options pattern function to preserve the owner of +// the extracted files. This option is only available on Unix systems +// requiring root privileges and tar archives as input. +func WithPreserveOwner(preserve bool) ConfigOption { + return func(c *Config) { + c.preserveOwner = preserve + } +} + // WithTelemetryHook options pattern function to set a [telemetry.TelemetryHook], which is called after extraction. func WithTelemetryHook(hook TelemetryHook) ConfigOption { return func(c *Config) { diff --git a/extractor.go b/extractor.go index c23bcae6..021a4925 100644 --- a/extractor.go +++ b/extractor.go @@ -270,160 +270,218 @@ func extract(ctx context.Context, t Target, dst string, src archiveWalker, cfg * var fileCounter int64 var extractionSize int64 - for { - // check if context is canceled - if ctx.Err() != nil { - return ctx.Err() - } - - // get next file - ae, err := src.Next() - - switch { + // collect extracted entries if file attributes should be preserved + collectEntries := (!cfg.DropFileAttributes()) || cfg.PreserveOwner() + var extractedEntries []archiveEntry - // if no more files are found exit loop - case err == io.EOF: - // extraction finished - return nil + if cfg.PreserveOwner() && src.Type() != fileExtensionTar { + cfg.Logger().Info("owner preservation is only supported for tar archives", "type", src.Type()) + } - // handle other errors and end extraction or continue - case err != nil: - if err := handleError(cfg, td, "error reading", err); err != nil { - return err + // iterate over all files in archive + err := func() error { + for { + // check if context is canceled + if ctx.Err() != nil { + return ctx.Err() } - continue - - // if the header is nil, just skip it (not sure how this happens) - case ae == nil: - continue - } - // check for to many files (including folder and symlinks) in archive - fileCounter++ + // get next file + ae, err := src.Next() - // check if maximum of files (including folder and symlinks) is exceeded - if err := cfg.CheckMaxFiles(fileCounter); err != nil { - return handleError(cfg, td, "max objects check failed", err) - } + switch { - // check if file needs to match patterns - match, err := checkPatterns(cfg.Patterns(), ae.Name()) - if err != nil { - return handleError(cfg, td, "cannot check pattern", err) - } - if !match { - cfg.Logger().Info("skipping file (pattern mismatch)", "name", ae.Name()) - td.PatternMismatches++ - continue - } + // if no more files are found exit loop + case err == io.EOF: + // extraction finished + return nil - cfg.Logger().Debug("extract", "name", ae.Name()) - switch { - - // if its a dir and it doesn't exist create it - case ae.IsDir(): - - // handle directory - if err := createDir(t, dst, ae.Name(), ae.Mode(), cfg); err != nil { - if err := handleError(cfg, td, "failed to create safe directory", err); err != nil { + // handle other errors and end extraction or continue + case err != nil: + if err := handleError(cfg, td, "error reading", err); err != nil { return err } + continue - // do not end on error + // if the header is nil, just skip it (not sure how this happens) + case ae == nil: continue } - // store telemetry and continue - td.ExtractedDirs++ - continue + // check for to many files (including folder and symlinks) in archive + fileCounter++ - // if it's a file create it - case ae.IsRegular(): + // check if maximum of files (including folder and symlinks) is exceeded + if err := cfg.CheckMaxFiles(fileCounter); err != nil { + return handleError(cfg, td, "max objects check failed", err) + } - // check extraction size forecast - if err := cfg.CheckExtractionSize(extractionSize + ae.Size()); err != nil { - return handleError(cfg, td, "max extraction size exceeded", err) + // check if file needs to match patterns + match, err := checkPatterns(cfg.Patterns(), ae.Name()) + if err != nil { + return handleError(cfg, td, "cannot check pattern", err) + } + if !match { + cfg.Logger().Info("skipping file (pattern mismatch)", "name", ae.Name()) + td.PatternMismatches++ + continue } - // open file inm archive - err, fileCreated := func() (error, bool) { - fin, err := ae.Open() - if err != nil { - return handleError(cfg, td, "failed to open file", err), false + cfg.Logger().Debug("extract", "name", ae.Name()) + switch { + + // if its a dir and it doesn't exist create it + case ae.IsDir(): + + // handle directory + if err := createDir(t, dst, ae.Name(), ae.Mode(), cfg); err != nil { + if err := handleError(cfg, td, "failed to create safe directory", err); err != nil { + return err + } + + // do not end on error + continue + } + if collectEntries { + extractedEntries = append(extractedEntries, ae) + } + + // store telemetry and continue + td.ExtractedDirs++ + + // if it's a file create it + case ae.IsRegular(): + + // check extraction size forecast + if err := cfg.CheckExtractionSize(extractionSize + ae.Size()); err != nil { + return handleError(cfg, td, "max extraction size exceeded", err) } - defer fin.Close() - // create file - n, err := createFile(t, dst, ae.Name(), fin, ae.Mode(), cfg.MaxExtractionSize()-extractionSize, cfg) - extractionSize = extractionSize + n - td.ExtractionSize = extractionSize + // open file in archive + err, fileCreated := func() (error, bool) { + fin, err := ae.Open() + if err != nil { + return handleError(cfg, td, "failed to open file", err), false + } + defer fin.Close() + + // create file + n, err := createFile(t, dst, ae.Name(), fin, ae.Mode(), cfg.MaxExtractionSize()-extractionSize, cfg) + extractionSize = extractionSize + n + td.ExtractionSize = extractionSize + if err != nil { + + // increase error counter, set error and end if necessary + return handleError(cfg, td, "failed to create safe file", err), false + } + + // do not end on error + return nil, true + }() if err != nil { + return err + } - // increase error counter, set error and end if necessary - return handleError(cfg, td, "failed to create safe file", err), false + // store telemetry + if fileCreated { + td.ExtractedFiles++ + if collectEntries { + extractedEntries = append(extractedEntries, ae) + } } - // do not end on error - return nil, true - }() - if err != nil { - return err - } + // its a symlink !! + case ae.IsSymlink(): - // store telemetry - if fileCreated { - td.ExtractedFiles++ - } - continue + // check if symlinks are allowed + if cfg.DenySymlinkExtraction() { - // its a symlink !! - case ae.IsSymlink(): + err := unsupportedFile(ae.Name()) + if err := handleError(cfg, td, "symlink extraction disabled", err); err != nil { + return err + } - // check if symlinks are allowed - if cfg.DenySymlinkExtraction() { + // do not end on error + continue + } - err := unsupportedFile(ae.Name()) - if err := handleError(cfg, td, "symlink extraction disabled", err); err != nil { - return err + // create link + if err := createSymlink(t, dst, ae.Name(), ae.Linkname(), cfg); err != nil { + + // increase error counter, set error and end if necessary + if err := handleError(cfg, td, "failed to create safe symlink", err); err != nil { + return err + } + + // do not end on error + continue + } + if collectEntries { + extractedEntries = append(extractedEntries, ae) } - // do not end on error - continue - } + // store telemetry and continue + td.ExtractedSymlinks++ + + default: - // create link - if err := createSymlink(t, dst, ae.Name(), ae.Linkname(), cfg); err != nil { + // tar specific: check for git comment file `pax_global_header` from type `67` and skip + if ae.Type()&tar.TypeXGlobalHeader == tar.TypeXGlobalHeader && ae.Name() == "pax_global_header" { + continue + } - // increase error counter, set error and end if necessary - if err := handleError(cfg, td, "failed to create safe symlink", err); err != nil { + err := unsupportedFile(ae.Name()) + msg := fmt.Sprintf("unsupported filetype in archive (%x)", ae.Mode()) + if err := handleError(cfg, td, msg, err); err != nil { return err } // do not end on error continue } + } + }() + if err != nil { + return err + } - // store telemetry and continue - td.ExtractedSymlinks++ - continue + // set attributes after all modification are done to ensure that + // the timestamps are set correctly + if collectEntries { + for _, ae := range extractedEntries { + path := filepath.Join(dst, ae.Name()) + if err := setFileAttributesAndOwner(t, path, ae, cfg.DropFileAttributes(), cfg.PreserveOwner()); err != nil { + return fmt.Errorf("failed to set file attributes: %w", err) + } + } + } - default: + // extraction finished + return nil +} - // tar specific: check for git comment file `pax_global_header` from type `67` and skip - if ae.Type()&tar.TypeXGlobalHeader == tar.TypeXGlobalHeader && ae.Name() == "pax_global_header" { - continue +// setFileAttributesAndOwner sets the file attributes for the given path and archive entry. +func setFileAttributesAndOwner(t Target, path string, ae archiveEntry, dropFileAttributes bool, owner bool) error { + if !dropFileAttributes { // preserve file attributes + if ae.IsSymlink() { // only time attributes are supported for symlinks + if err := t.Lchtimes(path, ae.AccessTime(), ae.ModTime()); err != nil { + return fmt.Errorf("failed to lchtimes symlink: %w", err) } - - err := unsupportedFile(ae.Name()) - msg := fmt.Sprintf("unsupported filetype in archive (%x)", ae.Mode()) - if err := handleError(cfg, td, msg, err); err != nil { - return err + } else { + if err := t.Chmod(path, ae.Mode().Perm()); err != nil { + return fmt.Errorf("failed to chmod file: %w", err) } - - // do not end on error - continue + if err := t.Chtimes(path, ae.AccessTime(), ae.ModTime()); err != nil { + return fmt.Errorf("failed to chtimes file: %w", err) + } + } + } + if owner { // preserve owner and group + if err := t.Chown(path, ae.Uid(), ae.Gid()); err != nil { + return fmt.Errorf("failed to chown file: %w", err) } } + return nil } // readerToReaderAtSeeker converts an io.Reader to an io.ReaderAt and io.Seeker diff --git a/go.mod b/go.mod index 2b44b8ab..0af27182 100644 --- a/go.mod +++ b/go.mod @@ -21,5 +21,6 @@ require ( github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect go4.org v0.0.0-20200411211856-f5505b9728dd // indirect + golang.org/x/sys v0.28.0 // indirect golang.org/x/text v0.20.0 // indirect ) diff --git a/go.sum b/go.sum index c77a3b9d..818b389d 100644 --- a/go.sum +++ b/go.sum @@ -192,6 +192,8 @@ golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/rar.go b/rar.go index 3937482d..945771fd 100644 --- a/rar.go +++ b/rar.go @@ -8,6 +8,7 @@ import ( "io" "io/fs" "os" + "time" "github.com/nwaples/rardecode" ) @@ -82,9 +83,6 @@ func (rw *rarWalker) Next() (archiveEntry, error) { return nil, err } re := &rarEntry{fh, rw.r} - if re.IsSymlink() { // symlink not supported - return nil, unsupportedFile(re.Name()) - } return re, nil } @@ -95,46 +93,73 @@ type rarEntry struct { } // Name returns the name of the file. -func (re *rarEntry) Name() string { - return re.f.Name +func (r *rarEntry) Name() string { + return r.f.Name } // Size returns the size of the file. -func (re *rarEntry) Size() int64 { - return re.f.UnPackedSize +func (r *rarEntry) Size() int64 { + return r.f.UnPackedSize } // Mode returns the mode of the file. -func (z *rarEntry) Mode() os.FileMode { - return z.f.Mode() +func (r *rarEntry) Mode() os.FileMode { + return r.f.Mode() } // Linkname symlinks are not supported. -func (re *rarEntry) Linkname() string { +func (r *rarEntry) Linkname() string { return "" } // IsRegular returns true if the file is a regular file. -func (re *rarEntry) IsRegular() bool { - return re.f.Mode().IsRegular() +func (r *rarEntry) IsRegular() bool { + return r.f.Mode().IsRegular() } // IsDir returns true if the file is a directory. -func (z *rarEntry) IsDir() bool { - return z.f.IsDir +func (r *rarEntry) IsDir() bool { + return r.f.IsDir } // IsSymlink returns true if the file is a symlink. -func (z *rarEntry) IsSymlink() bool { - return z.f.Mode()&fs.ModeSymlink != 0 +func (r *rarEntry) IsSymlink() bool { + return false } // Type returns the type of the file. -func (z *rarEntry) Type() fs.FileMode { - return z.f.Mode().Type() +func (r *rarEntry) Type() fs.FileMode { + return r.f.Mode().Type() } // Open returns a reader for the file. -func (z *rarEntry) Open() (io.ReadCloser, error) { - return io.NopCloser(z.r), nil +func (r *rarEntry) Open() (io.ReadCloser, error) { + return io.NopCloser(r.r), nil +} + +// AccessTime returns the access time of the file. +func (r *rarEntry) AccessTime() time.Time { + return r.f.AccessTime +} + +// ModTime returns the modification time of the file. +func (r *rarEntry) ModTime() time.Time { + return r.f.ModificationTime +} + +// Sys returns the system information of the file. +func (r *rarEntry) Sys() interface{} { + return r.f +} + +// Gid is not supported for Rar files. The used library does not provide +// this information. The function returns the group ID of the current process. +func (r *rarEntry) Gid() int { + return os.Getegid() +} + +// Uid is not supported for Rar files. The used library does not provide +// this information. The function returns the user ID of the current process. +func (r *rarEntry) Uid() int { + return os.Geteuid() } diff --git a/tar.go b/tar.go index 9f80e5fe..e501ab6e 100644 --- a/tar.go +++ b/tar.go @@ -9,6 +9,7 @@ import ( "io" "io/fs" "os" + "time" ) // fileExtensionTar is the file extension for tar files @@ -118,3 +119,28 @@ func (t *tarEntry) Open() (io.ReadCloser, error) { func (t *tarEntry) Type() fs.FileMode { return fs.FileMode(t.hdr.Typeflag) } + +// AccessTime returns the access time of the entry +func (t *tarEntry) AccessTime() time.Time { + return t.hdr.AccessTime +} + +// ModTime returns the modification time of the entry +func (t *tarEntry) ModTime() time.Time { + return t.hdr.ModTime +} + +// Sys returns the system information of the entry +func (t *tarEntry) Sys() interface{} { + return t.hdr +} + +// Gid returns the group id of the entry +func (t *tarEntry) Gid() int { + return t.hdr.Gid +} + +// Uid returns the user id of the entry +func (t *tarEntry) Uid() int { + return t.hdr.Uid +} diff --git a/target.go b/target.go index e30f4f5a..d6d88890 100644 --- a/target.go +++ b/target.go @@ -11,6 +11,7 @@ import ( "os" "path/filepath" "strings" + "time" ) // Target specifies all function that are needed to be implemented to extract contents from an archive @@ -38,6 +39,18 @@ type Target interface { // Stat see docs for os.Stat. Main purpose is to check if a symlink is pointing to a file or directory. Stat(path string) (fs.FileInfo, error) + + // Chmod see docs for os.Chmod. Main purpose is to set the file mode of a file or directory. + Chmod(name string, mode fs.FileMode) error + + // Chtimes see docs for os.Chtimes. Main purpose is to set the file times of a file or directory. + Chtimes(name string, atime, mtime time.Time) error + + // Lchtimes see docs for os.Lchtimes. Main purpose is to set the file times of a file or directory. + Lchtimes(name string, atime, mtime time.Time) error + + // Chown see docs for os.Chown. Main purpose is to set the file owner and group of a file or directory. + Chown(name string, uid, gid int) error } // createFile is a wrapper around the CreateFile function @@ -73,7 +86,12 @@ func createFile(t Target, dst string, name string, src io.Reader, mode fs.FileMo return 0, fmt.Errorf("cannot create directory: %w", err) } - return t.CreateFile(filepath.Join(dst, name), src, mode, cfg.Overwrite(), maxSize) + // ensure that if the file exist that it is not a symlink + if err := securityCheck(t, dst, name, cfg); err != nil { + return 0, fmt.Errorf("security check path failed: %w", err) + } + path := filepath.Join(dst, name) + return t.CreateFile(path, src, mode, cfg.Overwrite(), maxSize) } // createDir is a wrapper around the CreateDir function @@ -111,15 +129,14 @@ func createDir(t Target, dst string, name string, mode fs.FileMode, cfg *Config) return nil } + // perform security check to ensure that the path is safe to write to if err := securityCheck(t, dst, name, cfg); err != nil { return fmt.Errorf("security check path failed: %w", err) } // combine the path parts := strings.Split(name, "/") - name = filepath.Join(parts...) - path := filepath.Join(dst, name) - + path := filepath.Join(dst, filepath.Join(parts...)) return t.CreateDir(path, mode) } @@ -155,12 +172,6 @@ func createSymlink(t Target, dst string, name string, linkTarget string, cfg *Co // Check if link target is absolute path if filepath.IsAbs(linkTarget) { - // continue on error? - if cfg.ContinueOnError() { - cfg.Logger().Info("skip link target with absolute path", "link target", linkTarget) - return nil - } - // return error return fmt.Errorf("symlink with absolute path as target: %s", linkTarget) } @@ -191,7 +202,6 @@ func createSymlink(t Target, dst string, name string, linkTarget string, cfg *Co // create symlink return t.CreateSymlink(linkTarget, filepath.Join(dst, name), cfg.Overwrite()) - } // securityCheck checks if the targetDirectory contains path traversal diff --git a/target_disk.go b/target_disk.go index 6b9e0618..170f1980 100644 --- a/target_disk.go +++ b/target_disk.go @@ -8,6 +8,7 @@ import ( "io" "io/fs" "os" + "time" ) // TargetDisk is the struct type that holds all information for interacting with the filesystem @@ -108,3 +109,21 @@ func (d *TargetDisk) Lstat(name string) (fs.FileInfo, error) { func (d *TargetDisk) Stat(name string) (os.FileInfo, error) { return os.Stat(name) } + +// Chmod changes the mode of the named file to mode. +func (d *TargetDisk) Chmod(name string, mode fs.FileMode) error { + return os.Chmod(name, mode.Perm()) +} + +// Chtimes changes the access and modification times of the named file. +func (d *TargetDisk) Chtimes(name string, atime, mtime time.Time) error { + return os.Chtimes(name, atime, mtime) +} + +// Lchtimes changes the access and modification times of the named file. +func (d *TargetDisk) Lchtimes(name string, atime, mtime time.Time) error { + if canMaintainSymlinkTimestamps { + return lchtimes(name, atime, mtime) + } + return nil +} diff --git a/target_disk_others.go b/target_disk_others.go new file mode 100644 index 00000000..9a2ab63d --- /dev/null +++ b/target_disk_others.go @@ -0,0 +1,32 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !unix + +package extract + +import ( + "fmt" + "runtime" + "time" +) + +// lchtimes modifies the access and modified timestamps on a target path +// This capability is only available on unix as of now. +func lchtimes(_ string, _, _ time.Time) error { + return fmt.Errorf("Lchtimes is not supported on this platform (%s)", runtime.GOOS) +} + +// canMaintainSymlinkTimestamps determines whether is is possible to change +// timestamps on symlinks for the the current platform. For regular files +// and directories, attempts are made to restore permissions and timestamps +// after extraction. But for symbolic links, go's cross-platform +// packages (Chmod and Chtimes) are not capable of changing symlink info +// because those methods follow the symlinks. However, a platform-dependent option +// is provided for unix (see Lchtimes) +const canMaintainSymlinkTimestamps = false + +// Chown changes the numeric uid and gid of the named file. +func (d *TargetDisk) Chown(name string, uid, gid int) error { + return fmt.Errorf("Chown is not supported on this platform (%s)", runtime.GOOS) +} diff --git a/target_disk_unix.go b/target_disk_unix.go new file mode 100644 index 00000000..0500e6da --- /dev/null +++ b/target_disk_unix.go @@ -0,0 +1,48 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build unix + +package extract + +import ( + "fmt" + "os" + "time" + + "golang.org/x/sys/unix" +) + +// Chown changes the numeric uid and gid of the named file. +func (d *TargetDisk) Chown(name string, uid, gid int) error { + if err := os.Lchown(name, uid, gid); err != nil { + return fmt.Errorf("chown failed: %w", err) + } + return nil +} + +// lchtimes modifies the access and modified timestamps on a target path +// This capability is only available on unix as of now. +func lchtimes(path string, atime, mtime time.Time) error { + return unix.Lutimes(path, []unix.Timeval{ + unixTimeval(atime), + unixTimeval(mtime), + }) +} + +// unixTimeval converts a time.Time to a unix.Timeval. Note that it always rounds +// up to the nearest microsecond, so even one nanosecond past the previous nanosecond +// will be rounded up to the next microsecond. +// See the implementation of unix.NsecToTimeval for details on how this happens. +func unixTimeval(t time.Time) unix.Timeval { + return unix.NsecToTimeval(t.UnixNano()) +} + +// canMaintainSymlinkTimestamps determines whether is is possible to change +// timestamps on symlinks for the the current platform. For regular files +// and directories, attempts are made to restore permissions and timestamps +// after extraction. But for symbolic links, go's cross-platform +// packages (Chmod and Chtimes) are not capable of changing symlink info +// because those methods follow the symlinks. However, a platform-dependent option +// is provided for unix (see Lchtimes) +const canMaintainSymlinkTimestamps = true diff --git a/target_disk_unix_test.go b/target_disk_unix_test.go new file mode 100644 index 00000000..4fba3668 --- /dev/null +++ b/target_disk_unix_test.go @@ -0,0 +1,62 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build unix + +package extract + +import ( + "testing" + "time" + + "golang.org/x/sys/unix" +) + +func TestUnixTimeval(t *testing.T) { + tests := []struct { + input time.Time + want unix.Timeval + }{ + { + time.Date(1970, 1, 1, 0, 0, 0, 0, time.UTC), + unix.Timeval{Sec: 0, Usec: 0}, + }, + { + // Note: the single nanosecond is rounded up to the next microsecond. + time.Date(1970, 1, 1, 0, 0, 0, 1, time.UTC), + unix.Timeval{Sec: 0, Usec: 1}, + }, + { + // Note: the 100 nanoseconds are rounded up to the next microsecond. + time.Date(1970, 1, 1, 0, 0, 0, 100, time.UTC), + unix.Timeval{Sec: 0, Usec: 1}, + }, + { + // Note: exactly 1 microsecond is not rounded up. + time.Date(1970, 1, 1, 0, 0, 0, 1000, time.UTC), + unix.Timeval{Sec: 0, Usec: 1}, + }, + { + // Note: exactly 1 nanosecond past the microsecond is rounded up. + time.Date(1970, 1, 1, 0, 0, 0, 1001, time.UTC), + unix.Timeval{Sec: 0, Usec: 2}, + }, + { + time.Date(1970, 1, 1, 0, 0, 1, 1000, time.UTC), + unix.Timeval{Sec: 1, Usec: 1}, + }, + { + time.Date(1970, 1, 1, 0, 0, 1, 2000, time.UTC), + unix.Timeval{Sec: 1, Usec: 2}, + }, + } + + for _, test := range tests { + t.Run(test.input.String(), func(t *testing.T) { + got := unixTimeval(test.input) + if got != test.want { + t.Errorf("unixTimeval(%v) = %v; want %v", test.input, got, test.want) + } + }) + } +} diff --git a/target_memory.go b/target_memory.go index ddc34c28..69061360 100644 --- a/target_memory.go +++ b/target_memory.go @@ -100,7 +100,7 @@ func (m *TargetMemory) createFile(path string, mode fs.FileMode, src io.Reader, // create entry m.files.Store(path, &memoryEntry{ - fileInfo: &memoryFileInfo{name: name, size: n, mode: mode.Perm(), modTime: time.Now()}, + fileInfo: &memoryFileInfo{name: name, size: n, mode: mode.Perm(), accessTime: time.Now(), modTime: time.Now()}, data: buf.Bytes(), lock: sync.RWMutex{}, }) @@ -240,6 +240,73 @@ func (m *TargetMemory) Open(path string) (fs.File, error) { return &fileEntry{memoryEntry: me, reader: bytes.NewReader(me.data)}, nil } +// Chmod changes the mode of the file at the given path. If the file does not exist, an error is returned. +func (m *TargetMemory) Chmod(path string, mode fs.FileMode) error { + if !fs.ValidPath(path) { + return &fs.PathError{Op: "Chmod", Path: path, Err: fs.ErrInvalid} + } + me, err := m.resolveEntry(path) + if err != nil { + return &fs.PathError{Op: "Chmod", Path: path, Err: err} + } + me.lock.Lock() + defer me.lock.Unlock() + // inverse & with 0777 to remove the file mode bits and then or with the new mode bits + me.fileInfo.(*memoryFileInfo).mode = (me.fileInfo.(*memoryFileInfo).mode &^ 0777) | mode.Perm() + return nil +} + +// Chtime changes the access and modification times of the file at the given path. +// If the file does not exist, an error is returned. +func (m *TargetMemory) Chtimes(path string, atime time.Time, mtime time.Time) error { + if !fs.ValidPath(path) { + return &fs.PathError{Op: "Chtimes", Path: path, Err: fs.ErrInvalid} + } + me, err := m.resolveEntry(path) + if err != nil { + return &fs.PathError{Op: "Chtimes", Path: path, Err: err} + } + me.lock.Lock() + defer me.lock.Unlock() + me.fileInfo.(*memoryFileInfo).accessTime = atime + me.fileInfo.(*memoryFileInfo).modTime = mtime + return nil +} + +// Chown changes the owner and group of the file at the given path. +// If the file does not exist, an error is returned. +func (m *TargetMemory) Chown(path string, uid, gid int) error { + if !fs.ValidPath(path) { + return &fs.PathError{Op: "Chtimes", Path: path, Err: fs.ErrInvalid} + } + me, err := m.resolveEntry(path) + if err != nil { + return &fs.PathError{Op: "Chtimes", Path: path, Err: err} + } + me.lock.Lock() + defer me.lock.Unlock() + me.fileInfo.(*memoryFileInfo).uid = uid + me.fileInfo.(*memoryFileInfo).gid = gid + return nil +} + +// Lchtimes changes the access and modification times of the file at the given path. +// If the file does not exist, an error is returned. +func (m *TargetMemory) Lchtimes(path string, atime time.Time, mtime time.Time) error { + if !fs.ValidPath(path) { + return &fs.PathError{Op: "Lchtimes", Path: path, Err: fs.ErrInvalid} + } + me, err := m.resolveEntry(path) + if err != nil { + return &fs.PathError{Op: "Lchtimes", Path: path, Err: err} + } + me.lock.Lock() + defer me.lock.Unlock() + me.fileInfo.(*memoryFileInfo).accessTime = atime + me.fileInfo.(*memoryFileInfo).modTime = mtime + return nil +} + type dirEntry struct { *memoryEntry memory *TargetMemory @@ -334,7 +401,7 @@ func (m *TargetMemory) resolveEntry(path string) (*memoryEntry, error) { dir := p.Dir(path) existingEntry, err := m.resolvePath(dir) if errors.Is(err, fs.ErrNotExist) { - return nil, fs.ErrNotExist + return nil, err } if err != nil { return nil, &fs.PathError{Op: "resolveEntry", Path: path, Err: err} @@ -450,11 +517,15 @@ func (m *TargetMemory) Lstat(path string) (fs.FileInfo, error) { } // return file info copy + mfi := me.fileInfo.(*memoryFileInfo) return &memoryFileInfo{ - name: me.fileInfo.Name(), - size: me.fileInfo.Size(), - mode: me.fileInfo.Mode(), - modTime: me.fileInfo.ModTime(), + name: mfi.Name(), + size: mfi.Size(), + mode: mfi.Mode(), + accessTime: mfi.AccessTime(), + modTime: mfi.ModTime(), + gid: mfi.Gid(), + uid: mfi.Uid(), }, nil } @@ -717,20 +788,28 @@ func (me *memoryEntry) Info() (fs.FileInfo, error) { // memoryFileInfo is a FileInfo implementation for the in-memory filesystem type memoryFileInfo struct { - name string - size int64 - mode fs.FileMode - modTime time.Time + accessTime time.Time + gid int + name string + mode fs.FileMode + modTime time.Time + size int64 + uid int } -// Name implements [io/fs.FileInfo] interface -func (fi *memoryFileInfo) Name() string { - return fi.name +// AccessTime returns the access time of the file +func (fi *memoryFileInfo) AccessTime() time.Time { + return fi.accessTime } -// Size implements [io/fs.FileInfo] interface -func (fi *memoryFileInfo) Size() int64 { - return fi.size +// Gid returns the group id of the file +func (fi *memoryFileInfo) Gid() int { + return fi.gid +} + +// IsDir implements [io/fs.FileInfo] interface +func (fi *memoryFileInfo) IsDir() bool { + return fi.mode.IsDir() } // Mode implements [io/fs.FileInfo] interface @@ -743,12 +822,22 @@ func (fi *memoryFileInfo) ModTime() time.Time { return fi.modTime } -// IsDir implements [io/fs.FileInfo] interface -func (fi *memoryFileInfo) IsDir() bool { - return fi.mode.IsDir() +// Name implements [io/fs.FileInfo] interface +func (fi *memoryFileInfo) Name() string { + return fi.name +} + +// Size implements [io/fs.FileInfo] interface +func (fi *memoryFileInfo) Size() int64 { + return fi.size } // Sys implements [io/fs.FileInfo] interface, but returns always nil func (fi *memoryFileInfo) Sys() any { return nil } + +// Uid returns the user id of the file +func (fi *memoryFileInfo) Uid() int { + return fi.uid +} diff --git a/target_memory_test.go b/target_memory_test.go index c3a7170f..9761e812 100644 --- a/target_memory_test.go +++ b/target_memory_test.go @@ -5,11 +5,15 @@ package extract_test import ( "bytes" + "context" "io" "io/fs" p "path" + "path/filepath" + "strings" "testing" "testing/fstest" + "time" extract "github.com/hashicorp/go-extract" ) @@ -710,6 +714,16 @@ func TestCreateFile(t *testing.T) { t.Fatalf("CreateFile() failed: %s", err) } + // create the same file, but fail bc it already exists# + if _, err := tm.CreateFile(testPath, bytes.NewReader([]byte(testContent)), fs.FileMode(testPerm), false, -1); err == nil { + t.Fatalf("CreateFile() failed: expected error, got nil") + } + + // create the same file, but overwrite + if _, err := tm.CreateFile(testPath, bytes.NewReader([]byte(testContent)), fs.FileMode(testPerm), true, -1); err != nil { + t.Fatalf("CreateFile() failed: %s", err) + } + // open the file f, err := tm.Open(testPath) if err != nil { @@ -758,3 +772,125 @@ func TestCreateFile(t *testing.T) { t.Fatalf("CreateFile() failed: expected error, got nil") } } + +// TestCreateSymlink tests the CreateSymlink method +func TestCreateSymlink(t *testing.T) { + // instantiate a new memory + tm := extract.NewTargetMemory() + + // test data + testPath := "test" + testLink := "link" + testContent := "test" + testPerm := 0644 + + // create a file + if _, err := tm.CreateFile(testPath, bytes.NewReader([]byte(testContent)), fs.FileMode(testPerm), false, -1); err != nil { + t.Fatalf("CreateFile() failed: %s", err) + } + + // create a symlink + if err := tm.CreateSymlink(testPath, testLink, false); err != nil { + t.Fatalf("CreateSymlink() failed: %s", err) + } + + // open the symlink + f, err := tm.Open(testLink) + if err != nil { + t.Fatalf("Open() failed: %s", err) + } + + // stat the symlink (which is the link target) + stat, err := f.Stat() + if err != nil { + t.Fatalf("Stat() failed: %s", err) + } + + // check name + if stat.Name() != testPath { + t.Fatalf("Name() returned unexpected value: expected %s, got %s", testLink, stat.Name()) + } + + // check mode + if int(stat.Mode().Perm()&fs.ModePerm) != testPerm { + t.Fatalf("Mode() returned unexpected value: expected %d, got %d", testPerm, stat.Mode().Perm()) + } + + // read the symlink + data, err := io.ReadAll(f) + if err != nil { + t.Fatalf("ReadAll() failed: %s", err) + } + if !bytes.Equal(data, []byte(testContent)) { + t.Fatalf("unexpected file contents: expected %s, got %s", testContent, data) + } + + // close the symlink + if err := f.Close(); err != nil { + t.Fatalf("Close() failed: %s", err) + } + + // overwrite the symlink, but fail + if err := tm.CreateSymlink(testPath, testLink, false); err == nil { + t.Fatalf("CreateSymlink() failed: expected error, got nil") + } + + // overwrite the symlink + if err := tm.CreateSymlink(testPath, testLink, true); err != nil { + t.Fatalf("CreateSymlink() failed: %s", err) + } +} + +func TestUnpackToMemoryWithPreserveFileAttributesAndOwner(t *testing.T) { + type ownershipAccessor interface { + Uid() int + Gid() int + } + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var ( + ctx = context.Background() + m = extract.NewTargetMemory() + src = asIoReader(t, tc.packer(t, tc.contents)) + cfg = extract.NewConfig( + extract.WithDropFileAttributes(false), + extract.WithPreserveOwner(true), + ) + ) + if err := extract.UnpackTo(ctx, m, "", src, cfg); err != nil { + t.Fatalf("error unpacking archive: %v", err) + } + + for _, c := range tc.contents { + parts := strings.Split(c.Name, "/") // create system specific path + path := filepath.Join(parts...) + stat, err := m.Lstat(path) + if err != nil { + t.Fatalf("error getting file stats: %v", err) + } + if !(c.Mode&fs.ModeSymlink != 0) { // skip symlink checks + if stat.Mode().Perm() != c.Mode.Perm() { + t.Fatalf("expected file mode %v, got %v, file %s", c.Mode.Perm(), stat.Mode().Perm(), path) + } + } + if !tc.doesNotSupportModTime { + // calculate the time difference + modTimeDiff := abs(stat.ModTime().UnixNano() - c.ModTime.UnixNano()) + if modTimeDiff >= int64(time.Microsecond) { + t.Fatalf("expected file modtime %v, got %v, file %s, diff %v", c.ModTime, stat.ModTime(), path, modTimeDiff) + } + } + if !tc.doesNotSupportOwner { + if oa, ok := stat.(ownershipAccessor); ok { + if oa.Uid() != c.Uid { + t.Fatalf("expected file uid %v, got %v, file %s", c.Uid, oa.Uid(), path) + } + if oa.Gid() != c.Gid { + t.Fatalf("expected file gid %v, got %v, file %s", c.Gid, oa.Gid(), path) + } + } + } + } + }) + } +} diff --git a/unpack_other_test.go b/unpack_other_test.go new file mode 100644 index 00000000..90dcb96e --- /dev/null +++ b/unpack_other_test.go @@ -0,0 +1,197 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build !unix + +package extract_test + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "runtime" + "testing" + + "github.com/hashicorp/go-extract" +) + +func TestToWindowsFileMode(t *testing.T) { + if runtime.GOOS != "windows" { + t.Skip("skipping test on non-windows systems") + } + otherMasks := []int{00, 01, 02, 03, 04, 05, 06, 07} + groupMasks := []int{00, 010, 020, 030, 040, 050, 060, 070} + userMasks := []int{00, 0100, 0200, 0300, 0400, 0500, 0600, 0700} + for _, dir := range []bool{true, false} { + for _, o := range otherMasks { + for _, g := range groupMasks { + for _, u := range userMasks { + var ( + path = filepath.Join(t.TempDir(), "test") + mode = fs.FileMode(u | g | o) + ) + if err := func() error { + if dir { + return os.Mkdir(path, mode) + } + return os.WriteFile(path, []byte("foobar content"), mode) + }(); err != nil { + t.Fatalf("error creating test resource: %s", err) + } + stat, err := os.Stat(path) + if err != nil { + t.Fatalf("error getting file stats: %s", err) + } + if dir { + continue // skip directory tests, as they are not supported on windows und create unpredictable results + } + calculated := toWindowsFileMode(dir, mode) + if stat.Mode().Perm() != calculated.Perm() { + t.Errorf("toWindowsFileMode(%t, %s) calculated mode mode %s, but actual windows mode: %s", dir, mode, calculated.Perm(), stat.Mode().Perm()) + } + } + } + } + } +} + +func TestWithCustomMode(t *testing.T) { + + if runtime.GOOS != "windows" { + t.Skip("test only runs on Windows") + } + + tests := []struct { + name string + data []byte + dst string + cfg *extract.Config + expected map[string]fs.FileMode + }{ + { + name: "dir with 0755 and file with 0644", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "sub/file", + Mode: fs.FileMode(0644), // 420 + }, + })), + cfg: extract.NewConfig( + extract.WithCustomCreateDirMode(fs.FileMode(0755)), // 493 + ), + expected: map[string]fs.FileMode{ + "sub": fs.FileMode(0755), // 493 + "sub/file": fs.FileMode(0644), // 420 + }, + }, + { + name: "decompress with custom mode", + data: compressGzip(t, []byte("foobar content")), + dst: "out", // specify decompressed file name + cfg: extract.NewConfig( + extract.WithCustomDecompressFileMode(fs.FileMode(0666)), // 438 + ), + expected: map[string]fs.FileMode{ + "out": fs.FileMode(0666), // 438 + }, + }, + { + name: "dir with 0755 and file with 0777", + data: compressGzip(t, []byte("foobar content")), + dst: "foo/out", + cfg: extract.NewConfig( + extract.WithCreateDestination(true), // create destination^ + extract.WithCustomCreateDirMode(fs.FileMode(0750)), // 488 + extract.WithCustomDecompressFileMode(fs.FileMode(0777)), // 511 + ), + expected: map[string]fs.FileMode{ + "foo": fs.FileMode(0750), // 488 + "foo/out": fs.FileMode(0777), // 511 + }, + }, + { + name: "dir with 0777 and file with 0777", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "sub/file", + Mode: fs.FileMode(0777), // 511 + }, + })), + cfg: extract.NewConfig( + extract.WithCustomCreateDirMode(fs.FileMode(0777)), // 511 + ), + expected: map[string]fs.FileMode{ + "sub": fs.FileMode(0777), // 511 + "sub/file": fs.FileMode(0777), // 511 + }, + }, + { + name: "file with 0000 permissions", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "file", + Mode: fs.FileMode(0000), // 0 + }, + { + Name: "dir/", + Mode: fs.ModeDir, // 000 permission + }, + })), + cfg: extract.NewConfig(), + expected: map[string]fs.FileMode{ + "file": fs.FileMode(0000), // 0 + "dir": fs.FileMode(0000), // 0 + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.cfg == nil { + test.cfg = extract.NewConfig() + } + var ( + ctx = context.Background() + tmp = t.TempDir() + dst = filepath.Join(tmp, test.dst) + src = asIoReader(t, test.data) + cfg = test.cfg + ) + err := extract.Unpack(ctx, dst, src, cfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for name, expectedMode := range test.expected { + stat, err := os.Stat(filepath.Join(tmp, name)) + if err != nil { + t.Fatalf("error getting file stats: %s", err) + } + if stat.IsDir() { + continue // skip directory tests, as they are not supported on windows und create unpredictable results + } + expectedMode = toWindowsFileMode(stat.IsDir(), expectedMode) + if stat.Mode().Perm() != expectedMode.Perm() { + t.Fatalf("expected %s to have mode %s, but got: %s", name, expectedMode.Perm(), stat.Mode().Perm()) + } + } + }) + } +} + +// toWindowsFileMode converts a fs.FileMode to a windows file mode +func toWindowsFileMode(isDir bool, mode fs.FileMode) fs.FileMode { + + // handle special case + if isDir { + return fs.FileMode(0777) + } + + // check for write permission + if mode&0200 != 0 { + return fs.FileMode(0666) + } + + // return the mode + return fs.FileMode(0444) +} diff --git a/unpack_test.go b/unpack_test.go index 348eb895..42e9780f 100644 --- a/unpack_test.go +++ b/unpack_test.go @@ -15,11 +15,13 @@ import ( "fmt" "io" "io/fs" + "math" "os" "path/filepath" "runtime" "strings" "testing" + "time" "github.com/andybalholm/brotli" "github.com/dsnet/compress/bzip2" @@ -371,7 +373,10 @@ func TestUnpackArchive(t *testing.T) { ctx = context.Background() dst = t.TempDir() src = cacheFunction(t, tc.src) - cfg = extract.NewConfig(extract.WithCreateDestination(true), extract.WithContinueOnUnsupportedFiles(true)) + cfg = extract.NewConfig( + extract.WithCreateDestination(true), + extract.WithContinueOnUnsupportedFiles(true), + ) ) if err := extract.Unpack(ctx, dst, src, cfg); err != nil { @@ -604,8 +609,8 @@ func TestUnpackWithConfig(t *testing.T) { }, { Name: "dir/link", - Linktarget: "../test", Mode: fs.ModeSymlink | 0755, + Linktarget: "../test", }, } canceledCtx, cancel := context.WithCancel(context.Background()) @@ -699,7 +704,7 @@ func TestUnpackWithConfig(t *testing.T) { expectError: true, }, { - name: "unpack with overwrite enabled", + name: "unpack with overwrite enabled (files)", testArchive: []archiveContent{ {Name: "test", Content: []byte("hello world"), Mode: 0644}, {Name: "test", Content: []byte("hello world"), Mode: 0644}, @@ -707,6 +712,16 @@ func TestUnpackWithConfig(t *testing.T) { cfg: extract.NewConfig(extract.WithOverwrite(true)), expectError: false, }, + { + name: "unpack with overwrite enabled (symlink)", + testArchive: []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0644}, + {Name: "link", Mode: fs.ModeSymlink | 0755, Linktarget: "test"}, + {Name: "link", Mode: fs.ModeSymlink | 0755, Linktarget: "test"}, + }, + cfg: extract.NewConfig(extract.WithOverwrite(true)), + expectError: false, + }, { name: "traverse symlink disabled", testArchive: []archiveContent{ @@ -1131,43 +1146,6 @@ func TestUnpackWithTypes(t *testing.T) { } } -func TestToWindowsFileMode(t *testing.T) { - if runtime.GOOS != "windows" { - t.Skip("skipping test on non-windows systems") - } - otherMasks := []int{00, 01, 02, 03, 04, 05, 06, 07} - groupMasks := []int{00, 010, 020, 030, 040, 050, 060, 070} - userMasks := []int{00, 0100, 0200, 0300, 0400, 0500, 0600, 0700} - for _, dir := range []bool{true, false} { - for _, o := range otherMasks { - for _, g := range groupMasks { - for _, u := range userMasks { - var ( - path = filepath.Join(t.TempDir(), "test") - mode = fs.FileMode(u | g | o) - ) - if err := func() error { - if dir { - return os.Mkdir(path, mode) - } - return os.WriteFile(path, []byte("foobar content"), mode) - }(); err != nil { - t.Fatalf("error creating test resource: %s", err) - } - stat, err := os.Stat(path) - if err != nil { - t.Fatalf("error getting file stats: %s", err) - } - calculated := toWindowsFileMode(dir, mode) - if stat.Mode().Perm() != calculated.Perm() { - t.Errorf("toWindowsFileMode(%t, %s) calculated mode mode %s, but actual windows mode: %s", dir, mode, calculated.Perm(), stat.Mode().Perm()) - } - } - } - } - } -} - func TestUnsupportedArchiveNames(t *testing.T) { // test testCases testCases := []struct { @@ -1325,6 +1303,10 @@ func TestHasKnownArchiveExtension(t *testing.T) { } } +func abs(v int64) int64 { + return int64(math.Abs(float64(v))) +} + func compressBrotli(t *testing.T, data []byte) []byte { t.Helper() b := new(bytes.Buffer) @@ -1450,6 +1432,10 @@ type archiveContent struct { Content []byte Linktarget string Mode fs.FileMode + AccessTime time.Time + ModTime time.Time + Uid int + Gid int } // packTar creates a tar file with the given content @@ -1484,9 +1470,17 @@ func packTar(t *testing.T, content []archiveContent) []byte { Linkname: c.Linktarget, Typeflag: tFlag, } + header.Uid = c.Uid + header.Gid = c.Gid + header.AccessTime = c.AccessTime + header.ModTime = c.ModTime if tFlag == tar.TypeXGlobalHeader { header.Mode = 0 header.Size = 0 + header.Uid = 0 + header.Gid = 0 + header.AccessTime = time.Time{} + header.ModTime = time.Time{} header.Format = tar.FormatPAX header.PAXRecords = map[string]string{} header.PAXRecords["path"] = c.Name @@ -1515,6 +1509,7 @@ func packZip(t *testing.T, content []archiveContent) []byte { Name: c.Name, } h.SetMode(c.Mode) + h.Modified = c.ModTime f, err := w.CreateHeader(h) if err != nil { t.Fatalf("error creating zip header: %v", err) @@ -1549,6 +1544,27 @@ func pack7z(t *testing.T, _ []archiveContent) []byte { return b } +// pack7z2 creates always the same a 7z archive with following files: +// -rw-r--r-- 1 503 20 27B 6 Dez 14:12 test +// drwxr-xr-x 3 503 20 96B 6 Dez 14:12 sub/ +// -rw-r--r-- 1 503 20 27B 6 Dez 14:12 sub/test +// lrwxr-xr-x 1 503 20 8B 6 Dez 14:12 link@ -> sub/test +var contents7z2 = []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0644, AccessTime: time.Date(2024, 12, 6, 14, 12, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 13, 12, 42, 315443500, time.UTC), Uid: 503, Gid: 20}, + {Name: "sub", Mode: fs.ModeDir | 0755, AccessTime: time.Date(2024, 12, 6, 14, 12, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 13, 12, 49, 378600200, time.UTC), Uid: 503, Gid: 20}, + {Name: "sub/test", Content: []byte("hello world"), Mode: 0644, AccessTime: time.Date(2024, 12, 6, 14, 12, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 13, 12, 49, 378790200, time.UTC), Uid: 503, Gid: 20}, + {Name: "link", Linktarget: "sub/test", Mode: fs.ModeSymlink | 0755, AccessTime: time.Date(2024, 12, 6, 14, 12, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 13, 12, 54, 532031200, time.UTC), Uid: 503, Gid: 20}, +} + +func pack7z2(t *testing.T, _ []archiveContent) []byte { + t.Helper() + b, err := hex.DecodeString("377abcaf271c00042d5fc057b50000000000000022000000000000004e8d3aa1e0003d00285d00399d486415d3bb7a709d8c05b9a4f8a601c485ca32a1ba56fbed0277df127ac8b5849a02ef89b000000000813307ae0fd100d43ca090a0775ec540189123d516c0a4234b6046777137a236d0c100afd4540a63bac5dbcdd5f4954e1321f89bc2fee32eda1ffebe24d8ec7f5495f31cb107f418f1a438bedfa190f8d5e9bd34f41831a3e85fb8590ee2d3eb6854856ce91c64623e7b1bec5c6bf403f9b195d06eb0810540f173e9abd2005e6a00001706300109808500070b01000123030101055d001000000c80ae0a01d53cb2d70000") + if err != nil { + t.Fatalf("error decoding 7z data: %v", err) + } + return b +} + // packRar creates always the same a rar archive with following files: // - dir <- directory // - test <- file with content 'hello world' @@ -1563,6 +1579,97 @@ func packRar(t *testing.T, _ []archiveContent) []byte { return b } +// packRar2 creates always the same a rar archive with following files: +// -rw-r--r-- 1 503 20 27B 6 Dez 14:07 test +// drwxr-xr-x 3 503 20 96B 6 Dez 14:08 sub/ +// -rw-r--r-- 1 503 20 27B 6 Dez 14:08 sub/test +var contentsRar2 = []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0644, AccessTime: time.Date(2024, 12, 6, 14, 7, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 14, 8, 0, 0, time.Local), Uid: 503, Gid: 20}, + {Name: "sub", Mode: fs.ModeDir | 0755, AccessTime: time.Date(2024, 12, 6, 14, 8, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 14, 7, 8, 0, time.Local), Uid: 503, Gid: 20}, + {Name: "sub/test", Content: []byte("hello world"), Mode: 0644, AccessTime: time.Date(2024, 12, 6, 14, 8, 0, 0, time.Local), ModTime: time.Date(2024, 12, 6, 14, 8, 0, 0, time.Local), Uid: 503, Gid: 20}, +} + +func packRar2(t *testing.T, _ []archiveContent) []byte { + t.Helper() + b, err := hex.DecodeString("526172211a0701003392b5e50a010506000501018080003afe2e322202030b9b00049b00a48302032d6c9680000104746573740a03132ff752678a911e136861736869207361797320686920746f2074686520776f726c640a7db74f802602030b9b00049b00a48302032d6c96800001087375622f746573740a031334f752672333f02b6861736869207361797320686920746f2074686520776f726c640a5311ba9e1b02030b000100ed8301800001037375620a031334f752673549ed2b1d77565103050400") + if err != nil { + t.Fatalf("error decoding rar data: %v", err) + } + return b +} + +var ( + testDataUid, testDataGid = 1337, 42 + testDataRootUid, testDataWheelGid = 0, 0 + testDataInvalidUid, testDataInvalidGid = -1, -2 + baseTime = time.Date(2021, 1, 1, 0, 0, 0, 0, time.Local) +) + +var testCases = []struct { + name string + contents []archiveContent + packer func(*testing.T, []archiveContent) []byte + doesNotSupportModTime bool + doesNotSupportOwner bool + invalidUidGid bool +}{ + { + name: "tar", + contents: []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataUid, Gid: testDataGid}, + {Name: "sub", Mode: fs.ModeDir | 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataUid, Gid: testDataGid}, + {Name: "sub/test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataUid, Gid: testDataGid}, + {Name: "link", Mode: fs.ModeSymlink | 0777, Linktarget: "sub/test", AccessTime: baseTime, ModTime: baseTime, Uid: testDataUid, Gid: testDataGid}, + }, + packer: packTar, + }, + { + name: "invalid-uid-tar", + contents: []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataInvalidUid, Gid: testDataInvalidGid}, + {Name: "sub", Mode: fs.ModeDir | 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataInvalidUid, Gid: testDataInvalidGid}, + {Name: "sub/test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataInvalidUid, Gid: testDataInvalidGid}, + {Name: "link", Mode: fs.ModeSymlink | 0777, Linktarget: "sub/test", AccessTime: baseTime, ModTime: baseTime, Uid: testDataInvalidUid, Gid: testDataInvalidGid}, + }, + packer: packTar, + invalidUidGid: true, + }, + { + name: "root-tar", + contents: []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataRootUid, Gid: testDataWheelGid}, + {Name: "sub", Mode: fs.ModeDir | 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataRootUid, Gid: testDataWheelGid}, + {Name: "sub/test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: testDataRootUid, Gid: testDataWheelGid}, + {Name: "link", Mode: fs.ModeSymlink | 0777, Linktarget: "sub/test", AccessTime: baseTime, ModTime: baseTime, Uid: testDataRootUid, Gid: testDataWheelGid}, + }, + packer: packTar, + }, + { + name: "zip", + contents: []archiveContent{ + {Name: "test", Content: []byte("hello world"), Mode: 0777, AccessTime: baseTime, ModTime: baseTime, Uid: os.Getuid(), Gid: os.Getgid()}, + {Name: "sub", Mode: fs.ModeDir | 0777, AccessTime: baseTime, ModTime: baseTime, Uid: os.Getuid(), Gid: os.Getgid()}, + {Name: "sub/test", Content: []byte("hello world"), Mode: 0644, AccessTime: baseTime, ModTime: baseTime, Uid: os.Getuid(), Gid: os.Getgid()}, + {Name: "link", Mode: fs.ModeSymlink | 0777, Linktarget: "sub/test", AccessTime: baseTime, ModTime: baseTime, Uid: os.Getuid(), Gid: os.Getgid()}, + }, + doesNotSupportOwner: true, + packer: packZip, + }, + { + name: "rar", + contents: contentsRar2, + doesNotSupportOwner: true, + doesNotSupportModTime: true, + packer: packRar2, + }, + { + name: "7z", + contents: contents7z2, + doesNotSupportOwner: true, + packer: pack7z2, + }, +} + // openFile is a helper function to "open" a file, // but it returns an in-memory reader for example purposes. func openFile(_ string) io.ReadCloser { @@ -1629,136 +1736,6 @@ func asIoReader(t *testing.T, b []byte) io.Reader { return r } -func TestWithCustomMode(t *testing.T) { - umask := sniffUmask(t) - - tests := []struct { - name string - data []byte - dst string - cfg *extract.Config - expected map[string]fs.FileMode - expectError bool - }{ - { - name: "dir with 0755 and file with 0644", - data: compressGzip(t, packTar(t, []archiveContent{ - { - Name: "sub/file", - Mode: fs.FileMode(0644), // 420 - }, - })), - cfg: extract.NewConfig( - extract.WithCustomCreateDirMode(fs.FileMode(0755)), // 493 - ), - expected: map[string]fs.FileMode{ - "sub": fs.FileMode(0755), // 493 - "sub/file": fs.FileMode(0644), // 420 - }, - }, - { - name: "decompress with custom mode", - data: compressGzip(t, []byte("foobar content")), - dst: "out", // specify decompressed file name - cfg: extract.NewConfig( - extract.WithCustomDecompressFileMode(fs.FileMode(0666)), // 438 - ), - expected: map[string]fs.FileMode{ - "out": fs.FileMode(0666), // 438 - }, - }, - { - name: "dir with 0755 and file with 0777", - data: compressGzip(t, []byte("foobar content")), - dst: "foo/out", - cfg: extract.NewConfig( - extract.WithCreateDestination(true), // create destination^ - extract.WithCustomCreateDirMode(fs.FileMode(0750)), // 488 - extract.WithCustomDecompressFileMode(fs.FileMode(0777)), // 511 - ), - expected: map[string]fs.FileMode{ - "foo": fs.FileMode(0750), // 488 - "foo/out": fs.FileMode(0777), // 511 - }, - }, - { - name: "dir with 0777 and file with 0777", - data: compressGzip(t, packTar(t, []archiveContent{ - { - Name: "sub/file", - Mode: fs.FileMode(0777), // 511 - }, - })), - cfg: extract.NewConfig( - extract.WithCustomCreateDirMode(fs.FileMode(0777)), // 511 - ), - expected: map[string]fs.FileMode{ - "sub": fs.FileMode(0777), // 511 - "sub/file": fs.FileMode(0777), // 511 - }, - }, - { - name: "file with 0000 permissions", - data: compressGzip(t, packTar(t, []archiveContent{ - { - Name: "file", - Mode: fs.FileMode(0000), // 0 - }, - { - Name: "dir/", - Mode: fs.ModeDir, // 000 permission - }, - })), - cfg: extract.NewConfig(), - expected: map[string]fs.FileMode{ - "file": fs.FileMode(0000), // 0 - "dir": fs.FileMode(0000), // 0 - }, - }, - } - - for _, test := range tests { - t.Run(test.name, func(t *testing.T) { - if test.cfg == nil { - test.cfg = extract.NewConfig() - } - var ( - ctx = context.Background() - tmp = t.TempDir() - dst = filepath.Join(tmp, test.dst) - src = asIoReader(t, test.data) - cfg = test.cfg - ) - err := extract.Unpack(ctx, dst, src, cfg) - if test.expectError && err == nil { - t.Fatalf("expected error, got nil") - } - if !test.expectError && err != nil { - t.Fatalf("unexpected error: %v", err) - } - for name, expectedMode := range test.expected { - stat, err := os.Stat(filepath.Join(tmp, name)) - if err != nil { - t.Fatalf("error getting file stats: %s", err) - } - - if runtime.GOOS == "windows" { - if stat.IsDir() { - continue // Skip directory checks on Windows - } - expectedMode = toWindowsFileMode(stat.IsDir(), expectedMode) - } else { - expectedMode &= ^umask // Adjust for umask on non-Windows systems - } - - if stat.Mode().Perm() != expectedMode.Perm() { - t.Fatalf("expected directory/file to have mode %s, but got: %s", expectedMode.Perm(), stat.Mode().Perm()) - } - } - }) - } -} - // sniffUmask is a helper function to get the umask func sniffUmask(t *testing.T) fs.FileMode { t.Helper() @@ -1783,20 +1760,3 @@ func sniffUmask(t *testing.T) fs.FileMode { // return the umask return umask } - -// toWindowsFileMode converts a fs.FileMode to a windows file mode -func toWindowsFileMode(isDir bool, mode fs.FileMode) fs.FileMode { - - // handle special case - if isDir { - return fs.FileMode(0777) - } - - // check for write permission - if mode&0200 != 0 { - return fs.FileMode(0666) - } - - // return the mode - return fs.FileMode(0444) -} diff --git a/unpack_unix_test.go b/unpack_unix_test.go new file mode 100644 index 00000000..5c45707b --- /dev/null +++ b/unpack_unix_test.go @@ -0,0 +1,264 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +//go:build unix + +package extract_test + +import ( + "context" + "io/fs" + "os" + "path/filepath" + "syscall" + "testing" + "time" + + "github.com/hashicorp/go-extract" +) + +func TestUnpackWithPreserveFileAttributes(t *testing.T) { + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + var ( + ctx = context.Background() + dst = t.TempDir() + src = asIoReader(t, tc.packer(t, tc.contents)) + cfg = extract.NewConfig() + ) + if err := extract.Unpack(ctx, dst, src, cfg); err != nil { + t.Fatalf("error unpacking archive: %v", err) + } + for _, c := range tc.contents { + path := filepath.Join(dst, c.Name) + stat, err := os.Lstat(path) + if err != nil { + t.Fatalf("error getting file stats: %v", err) + } + if !(c.Mode&fs.ModeSymlink != 0) { // skip symlink checks + if stat.Mode().Perm() != c.Mode.Perm() { + t.Fatalf("expected file mode %v, got %v, file %s", c.Mode.Perm(), stat.Mode().Perm(), c.Name) + } + } + if tc.doesNotSupportModTime { + continue + } + modTimeDiff := abs(stat.ModTime().UnixNano() - c.ModTime.UnixNano()) + if modTimeDiff >= int64(time.Microsecond) { + t.Fatalf("expected mod time %v, got %v, file %s, diff %v", c.ModTime, stat.ModTime(), c.Name, modTimeDiff) + } + } + }) + } +} + +func TestUnpackWithPreserveOwnershipAsNonRoot(t *testing.T) { + + if os.Getuid() == 0 { + t.Skip("test requires non-root privileges") + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + + // skip test if the archive does not store ownership information + if tc.doesNotSupportOwner { + t.Skipf("archive %s does not store ownership information", tc.name) + } + + var ( + ctx = context.Background() + dst = t.TempDir() + src = asIoReader(t, tc.packer(t, tc.contents)) + cfg = extract.NewConfig(extract.WithPreserveOwner(true)) + ) + + // Unpack should fail if the user is not root and the uid/gid + // in the archive is different from the current user (only + // if the archive supports owner information) + err := extract.Unpack(ctx, dst, src, cfg) + + // chown will only fail if the user is not root and the + // uid/gid in the archive is different from the current user + if err == nil { + t.Fatalf("error unpacking archive: %v", err) + } + }) + } +} + +func TestUnpackWithPreserveOwnershipAsRoot(t *testing.T) { + + if os.Getuid() != 0 { + t.Skip("test requires root privileges") + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + + // skip test if the archive does not store ownership information + if tc.doesNotSupportOwner { + t.Skipf("archive type %s does not store ownership information", tc.name) + } + + var ( + ctx = context.Background() + dst = t.TempDir() + src = asIoReader(t, tc.packer(t, tc.contents)) + cfg = extract.NewConfig(extract.WithPreserveOwner(true)) + ) + + if err := extract.Unpack(ctx, dst, src, cfg); err != nil { + t.Fatalf("error unpacking archive: %v", err) + } + + // check ownership of files + expectUidMatch := !tc.invalidUidGid + for _, c := range tc.contents { + path := filepath.Join(dst, c.Name) + stat, err := os.Lstat(path) + if err != nil { + t.Fatalf("error getting file stats: %v", err) + } + uidMatch := c.Uid == int(stat.Sys().(*syscall.Stat_t).Uid) + if expectUidMatch != uidMatch { + t.Fatalf("expected uid %d, got %d, file %s", c.Uid, stat.Sys().(*syscall.Stat_t).Uid, c.Name) + } + } + }) + } +} + +func TestWithCustomMode(t *testing.T) { + umask := sniffUmask(t) + + tests := []struct { + name string + data []byte + dst string + cfg *extract.Config + expected map[string]fs.FileMode + }{ + { + name: "dir with 0755 and file with 0644", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "sub/file", + Mode: fs.FileMode(0644), // 420 + }, + })), + cfg: extract.NewConfig( + extract.WithCustomCreateDirMode(fs.FileMode(0757 & ^umask)), // 493 & ^umask + ), + expected: map[string]fs.FileMode{ + "sub": fs.FileMode(0757 & ^umask), // 493 & ^umask <-- implicit created dir + "sub/file": fs.FileMode(0644), // 420 + }, + }, + { + name: "decompress with custom mode", + data: compressGzip(t, []byte("foobar content")), + dst: "out", // specify decompressed file name + cfg: extract.NewConfig( + extract.WithCustomDecompressFileMode(fs.FileMode(0666)), // 438 + umask is applied while file creation + ), + expected: map[string]fs.FileMode{ + "out": 0666 & ^umask, // 438 & ^umask + }, + }, + { + name: "dir with 0755 and file with 0777", + data: compressGzip(t, []byte("foobar content")), + dst: "foo/out", + cfg: extract.NewConfig( + extract.WithCreateDestination(true), // create destination^ + extract.WithCustomCreateDirMode(fs.FileMode(0750)), // 488 + umask is applied while dir creation + extract.WithCustomDecompressFileMode(fs.FileMode(0777)), // 511 + umask is applied while file creation + ), + expected: map[string]fs.FileMode{ + "foo": fs.FileMode(0750 & ^umask), // 488 & ^umask + "foo/out": fs.FileMode(0777 & ^umask), // 511 & ^umask + }, + }, + { + name: "dir with 0777 and file with 0777", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "sub/file", + Mode: fs.FileMode(0777), // 511 + }, + })), + cfg: extract.NewConfig( + extract.WithCustomCreateDirMode(fs.FileMode(0777)), // 511 + umask is applied while dir creation + ), + expected: map[string]fs.FileMode{ + "sub": fs.FileMode(0777 & ^umask), // 511 + "sub/file": fs.FileMode(0777), // 511 <-- is preserved from the archive and umask is not applied + }, + }, + { + name: "file with 0000 permissions", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "file", + Mode: fs.FileMode(0000), // 0 + }, + { + Name: "dir/", + Mode: fs.ModeDir, // 000 permission + }, + })), + cfg: extract.NewConfig(), + expected: map[string]fs.FileMode{ + "file": fs.FileMode(0000), // 0 + "dir": fs.FileMode(0000), // 0 + }, + }, + { + name: "dir with 777 and file with 777 but no file attribute mode preservation", + data: compressGzip(t, packTar(t, []archiveContent{ + { + Name: "file", + Mode: fs.FileMode(0777), // 511 + }, + { + Name: "dir", + Mode: fs.ModeDir | 0777, // 511 + }, + })), + cfg: extract.NewConfig(extract.WithDropFileAttributes(true)), + expected: map[string]fs.FileMode{ + "file": fs.FileMode(0777 & ^umask), // 438 + "dir": fs.FileMode(0777 & ^umask), // 438 + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if test.cfg == nil { + test.cfg = extract.NewConfig() + } + var ( + ctx = context.Background() + tmp = t.TempDir() + dst = filepath.Join(tmp, test.dst) + src = asIoReader(t, test.data) + cfg = test.cfg + ) + err := extract.Unpack(ctx, dst, src, cfg) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + for name, expectedMode := range test.expected { + stat, err := os.Stat(filepath.Join(tmp, name)) + if err != nil { + t.Fatalf("error getting file stats: %s", err) + } + if stat.Mode().Perm() != expectedMode.Perm() { + t.Fatalf("expected %s to have mode %s, but got: %s", name, expectedMode.Perm(), stat.Mode().Perm()) + } + } + }) + } +} diff --git a/zip.go b/zip.go index e161be57..d0a77cde 100644 --- a/zip.go +++ b/zip.go @@ -10,6 +10,7 @@ import ( "io" "io/fs" "os" + "time" ) // fileExtensionZip is the file extension for zip files. @@ -154,3 +155,30 @@ func (z *zipEntry) Open() (io.ReadCloser, error) { func (z *zipEntry) Type() fs.FileMode { return z.zf.FileHeader.Mode().Type() } + +// AccessTime returns the access time of the entry +func (z *zipEntry) AccessTime() time.Time { + return z.zf.FileHeader.FileInfo().ModTime() +} + +// ModTime returns the modification time of the entry +func (z *zipEntry) ModTime() time.Time { + return z.zf.FileHeader.FileInfo().ModTime() +} + +// Sys returns the system information of the entry +func (z *zipEntry) Sys() interface{} { + return z.zf.FileHeader +} + +// Gid is not supported for zip files. The used library does not provide +// this information. The function returns the group ID of the current process. +func (z *zipEntry) Gid() int { + return os.Getegid() +} + +// Uid is not supported for zip files. The used library does not provide +// this information. The function returns the user ID of the current process. +func (z *zipEntry) Uid() int { + return os.Getuid() +}