-
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy path7z.go
125 lines (102 loc) · 3.27 KB
/
7z.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package archives
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"io/fs"
"log"
"strings"
"github.com/bodgit/sevenzip"
)
func init() {
RegisterFormat(SevenZip{})
// looks like the sevenzip package registers a lot of decompressors for us automatically:
// https://github.com/bodgit/sevenzip/blob/46c5197162c784318b98b9a3f80289a9aa1ca51a/register.go#L38-L61
}
type SevenZip struct {
// If true, errors encountered during reading or writing
// a file within an archive will be logged and the
// operation will continue on remaining files.
ContinueOnError bool
// The password, if dealing with an encrypted archive.
Password string
}
func (SevenZip) Extension() string { return ".7z" }
func (SevenZip) MediaType() string { return "application/x-7z-compressed" }
func (z SevenZip) Match(_ context.Context, filename string, stream io.Reader) (MatchResult, error) {
var mr MatchResult
// match filename
if strings.Contains(strings.ToLower(filename), z.Extension()) {
mr.ByName = true
}
// match file header
buf, err := readAtMost(stream, len(sevenZipHeader))
if err != nil {
return mr, err
}
mr.ByStream = bytes.Equal(buf, sevenZipHeader)
return mr, nil
}
// Archive is not implemented for 7z because I do not know of a pure-Go 7z writer.
// Extract extracts files from z, implementing the Extractor interface. Uniquely, however,
// sourceArchive must be an io.ReaderAt and io.Seeker, which are oddly disjoint interfaces
// from io.Reader which is what the method signature requires. We chose this signature for
// the interface because we figure you can Read() from anything you can ReadAt() or Seek()
// with. Due to the nature of the zip archive format, if sourceArchive is not an io.Seeker
// and io.ReaderAt, an error is returned.
func (z SevenZip) Extract(ctx context.Context, sourceArchive io.Reader, handleFile FileHandler) error {
sra, ok := sourceArchive.(seekReaderAt)
if !ok {
return fmt.Errorf("input type must be an io.ReaderAt and io.Seeker because of zip format constraints")
}
size, err := streamSizeBySeeking(sra)
if err != nil {
return fmt.Errorf("determining stream size: %w", err)
}
zr, err := sevenzip.NewReaderWithPassword(sra, size, z.Password)
if err != nil {
return err
}
// important to initialize to non-nil, empty value due to how fileIsIncluded works
skipDirs := skipList{}
for i, f := range zr.File {
if err := ctx.Err(); err != nil {
return err // honor context cancellation
}
if fileIsIncluded(skipDirs, f.Name) {
continue
}
fi := f.FileInfo()
file := FileInfo{
FileInfo: fi,
Header: f.FileHeader,
NameInArchive: f.Name,
Open: func() (fs.File, error) {
openedFile, err := f.Open()
if err != nil {
return nil, err
}
return fileInArchive{openedFile, fi}, nil
},
}
err := handleFile(ctx, file)
if errors.Is(err, fs.SkipAll) {
break
} else if errors.Is(err, fs.SkipDir) && file.IsDir() {
skipDirs.add(f.Name)
} else if err != nil {
if z.ContinueOnError {
log.Printf("[ERROR] %s: %v", f.Name, err)
continue
}
return fmt.Errorf("handling file %d: %s: %w", i, f.Name, err)
}
}
return nil
}
// https://py7zr.readthedocs.io/en/latest/archive_format.html#signature
var sevenZipHeader = []byte("7z\xBC\xAF\x27\x1C")
// Interface guard
var _ Extractor = SevenZip{}