diff --git a/README.md b/README.md index 98e87ec..cae3d2f 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ If you're already familiar with shell scripting and the Unix toolset, here is a | `jq` | [`JQ`](https://pkg.go.dev/github.com/bitfield/script#Pipe.JQ) | | `ls` | [`ListFiles`](https://pkg.go.dev/github.com/bitfield/script#ListFiles) | | `sed` | [`Replace`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Replace) / [`ReplaceRegexp`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ReplaceRegexp) | -| `sha256sum` | [`SHA256Sum`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sum) / [`SHA256Sums`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sums) | +| `sha256sum` | [`Hash`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Hash) / [`HashSums`](https://pkg.go.dev/github.com/bitfield/script#Pipe.HashSums) | | `tail` | [`Last`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Last) | | `tee` | [`Tee`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Tee) | | `uniq -c` | [`Freq`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Freq) | @@ -317,6 +317,7 @@ Filters are methods on an existing pipe that also return a pipe, allowing you to | [`First`](https://pkg.go.dev/github.com/bitfield/script#Pipe.First) | first N lines of input | | [`Freq`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Freq) | frequency count of unique input lines, most frequent first | | [`Get`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Get) | response to HTTP GET on supplied URL | +| [`HashSums`](https://pkg.go.dev/github.com/bitfield/script#Pipe.HashSums) | hashes of each listed file | | [`Join`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Join) | replace all newlines with spaces | | [`JQ`](https://pkg.go.dev/github.com/bitfield/script#Pipe.JQ) | result of `jq` query | | [`Last`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Last) | last N lines of input| @@ -327,7 +328,6 @@ Filters are methods on an existing pipe that also return a pipe, allowing you to | [`RejectRegexp`](https://pkg.go.dev/github.com/bitfield/script#Pipe.RejectRegexp) | lines not matching given regexp | | [`Replace`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Replace) | matching text replaced with given string | | [`ReplaceRegexp`](https://pkg.go.dev/github.com/bitfield/script#Pipe.ReplaceRegexp) | matching text replaced with given string | -| [`SHA256Sums`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sums) | SHA-256 hashes of each listed file | | [`Tee`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Tee) | input copied to supplied writers | Note that filters run concurrently, rather than producing nothing until each stage has fully read its input. This is convenient for executing long-running commands, for example. If you do need to wait for the pipeline to complete, call [`Wait`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Wait). @@ -340,9 +340,9 @@ Sinks are methods that return some data from a pipe, ending the pipeline and ext | ---- | ----------- | ------- | | [`AppendFile`](https://pkg.go.dev/github.com/bitfield/script#Pipe.AppendFile) | appended to file, creating if it doesn't exist | bytes written, error | | [`Bytes`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Bytes) | | data as `[]byte`, error +| [`Hash`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Hash) | | hash, error | | [`CountLines`](https://pkg.go.dev/github.com/bitfield/script#Pipe.CountLines) | |number of lines, error | | [`Read`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Read) | given `[]byte` | bytes read, error | -| [`SHA256Sum`](https://pkg.go.dev/github.com/bitfield/script#Pipe.SHA256Sum) | | SHA-256 hash, error | | [`Slice`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Slice) | | data as `[]string`, error | | [`Stdout`](https://pkg.go.dev/github.com/bitfield/script#Pipe.Stdout) | standard output | bytes written, error | | [`String`](https://pkg.go.dev/github.com/bitfield/script#Pipe.String) | | data as `string`, error | diff --git a/script.go b/script.go index e25b1c4..1fb2978 100644 --- a/script.go +++ b/script.go @@ -8,6 +8,7 @@ import ( "encoding/hex" "encoding/json" "fmt" + "hash" "io" "math" "net/http" @@ -650,6 +651,40 @@ func (p *Pipe) Get(url string) *Pipe { return p.Do(req) } +// Hash returns the hex-encoded hash of the entire contents of the +// pipe based on the provided hasher, or an error. +// To perform hashing on files, see [Pipe.HashSums]. +func (p *Pipe) Hash(hasher hash.Hash) (string, error) { + if p.Error() != nil { + return "", p.Error() + } + _, err := io.Copy(hasher, p) + if err != nil { + p.SetError(err) + return "", err + } + return hex.EncodeToString(hasher.Sum(nil)), nil +} + +// HashSums reads paths from the pipe, one per line, and produces the +// hex-encoded hash of each corresponding file based on the provided hasher, +// one per line. Any files that cannot be opened or read will be ignored. +// To perform hashing on the contents of the pipe, see [Pipe.Hash]. +func (p *Pipe) HashSums(hasher hash.Hash) *Pipe { + return p.FilterScan(func(line string, w io.Writer) { + f, err := os.Open(line) + if err != nil { + return // skip unopenable files + } + defer f.Close() + _, err = io.Copy(hasher, f) + if err != nil { + return // skip unreadable files + } + fmt.Fprintln(w, hex.EncodeToString(hasher.Sum(nil))) + }) +} + // Join joins all the lines in the pipe's contents into a single // space-separated string, which will always end with a newline. func (p *Pipe) Join() *Pipe { @@ -816,36 +851,19 @@ func (p *Pipe) SetError(err error) { // SHA256Sum returns the hex-encoded SHA-256 hash of the entire contents of the // pipe, or an error. +// Deprecated: SHA256Sum has been deprecated by [Pipe.Hash]. To get the SHA-256 +// hash for the contents of the pipe, call `Hash(sha256.new())` func (p *Pipe) SHA256Sum() (string, error) { - if p.Error() != nil { - return "", p.Error() - } - hasher := sha256.New() - _, err := io.Copy(hasher, p) - if err != nil { - p.SetError(err) - return "", err - } - return hex.EncodeToString(hasher.Sum(nil)), p.Error() + return p.Hash(sha256.New()) } // SHA256Sums reads paths from the pipe, one per line, and produces the // hex-encoded SHA-256 hash of each corresponding file, one per line. Any files // that cannot be opened or read will be ignored. +// Deprecated: SHA256Sums has been deprecated by [Pipe.HashSums]. To get the SHA-256 +// hash for each file path in the pipe, call `HashSums(sha256.new())` func (p *Pipe) SHA256Sums() *Pipe { - return p.FilterScan(func(line string, w io.Writer) { - f, err := os.Open(line) - if err != nil { - return // skip unopenable files - } - defer f.Close() - h := sha256.New() - _, err = io.Copy(h, f) - if err != nil { - return // skip unreadable files - } - fmt.Fprintln(w, hex.EncodeToString(h.Sum(nil))) - }) + return p.HashSums(sha256.New()) } // Slice returns the pipe's contents as a slice of strings, one element per diff --git a/script_test.go b/script_test.go index e674219..a58ff47 100644 --- a/script_test.go +++ b/script_test.go @@ -3,8 +3,11 @@ package script_test import ( "bufio" "bytes" + "crypto/sha256" + "crypto/sha512" "errors" "fmt" + "hash" "io" "log" "net/http" @@ -1127,7 +1130,7 @@ func TestSHA256Sums_OutputsCorrectHashForEachSpecifiedFile(t *testing.T) { want string }{ // To get the checksum run: openssl dgst -sha256 - {"testdata/sha256Sum.input.txt", "1870478d23b0b4db37735d917f4f0ff9393dd3e52d8b0efa852ab85536ddad8e\n"}, + {"testdata/hashSum.input.txt", "1870478d23b0b4db37735d917f4f0ff9393dd3e52d8b0efa852ab85536ddad8e\n"}, {"testdata/hello.txt", "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9\n"}, {"testdata/multiple_files", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n"}, } @@ -2013,6 +2016,110 @@ func TestWithStdErr_IsConcurrencySafeAfterExec(t *testing.T) { } } +func TestHash_OutputsCorrectHash(t *testing.T) { + t.Parallel() + tcs := []struct { + name, input, want string + hasher hash.Hash + }{ + { + name: "for no data", + input: "", + hasher: sha256.New(), + want: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + }, + { + name: "for short string with SHA 256 hasher", + input: "hello, world", + hasher: sha256.New(), + want: "09ca7e4eaa6e8ae9c7d261167129184883644d07dfba7cbfbc4c8a2e08360d5b", + }, + { + name: "for short string with SHA 512 hasher", + input: "hello, world", + hasher: sha512.New(), + want: "8710339dcb6814d0d9d2290ef422285c9322b7163951f9a0ca8f883d3305286f44139aa374848e4174f5aada663027e4548637b6d19894aec4fb6c46a139fbf9", + }, + { + name: "for string containing newline with SHA 256 hasher", + input: "The tao that can be told\nis not the eternal Tao", + hasher: sha256.New(), + want: "788542cb92d37f67e187992bdb402fdfb68228a1802947f74c6576e04790a688", + }, + } + for _, tc := range tcs { + t.Run(tc.name, func(t *testing.T) { + got, err := script.Echo(tc.input).Hash(tc.hasher) + if err != nil { + t.Fatal(err) + } + if got != tc.want { + t.Errorf("want %q, got %q", tc.want, got) + } + }) + } +} + +func TestHashSums_OutputsCorrectHashForEachSpecifiedFile(t *testing.T) { + t.Parallel() + tcs := []struct { + testFileName string + hasher hash.Hash + want string + }{ + // To get the checksum run: openssl dgst -sha256 + { + testFileName: "testdata/hashSum.input.txt", + hasher: sha256.New(), + want: "1870478d23b0b4db37735d917f4f0ff9393dd3e52d8b0efa852ab85536ddad8e\n", + }, + { + testFileName: "testdata/hashSum.input.txt", + hasher: sha512.New(), + want: "3543bd0d68129e860598ccabcee1beb6bb90d91105cea74a8e555588634ec6f6d6d02033139972da2dc4929b1fb61bd24c91c8e82054e9ae865cf7f70909be8c\n", + }, + { + testFileName: "testdata/hello.txt", + hasher: sha256.New(), + want: "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9\n", + }, + { + testFileName: "testdata/multiple_files", + hasher: sha256.New(), + want: "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\ne3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855\n", + }, + } + for _, tc := range tcs { + got, err := script.ListFiles(tc.testFileName).HashSums(tc.hasher).String() + if err != nil { + t.Fatal(err) + } + if got != tc.want { + t.Errorf("%q: want %q, got %q", tc.testFileName, tc.want, got) + } + } +} + +func TestHash_ReturnsErrorGivenReadErrorOnPipe(t *testing.T) { + t.Parallel() + brokenReader := iotest.ErrReader(errors.New("oh no")) + _, err := script.NewPipe().WithReader(brokenReader).Hash(sha256.New()) + if err == nil { + t.Fatal(nil) + } +} + +func TestHashSums_OutputsEmptyStringForFileThatCannotBeHashed(t *testing.T) { + got, err := script.Echo("file_does_not_exist.txt").HashSums(sha256.New()).String() + if err != nil { + t.Fatal(err) + } + want := "" + if got != want { + t.Errorf("want %q, got %q", want, got) + } +} + func ExampleArgs() { script.Args().Stdout() // prints command-line arguments @@ -2276,6 +2383,24 @@ func ExamplePipe_Get() { // You said: hello } +func ExamplePipe_Hash() { + sum, err := script.Echo("hello world").Hash(sha512.New()) + if err != nil { + panic(err) + } + fmt.Println(sum) + // Output: + // 309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f +} + +func ExamplePipe_HashSums() { + script.ListFiles("testdata/multiple_files").HashSums(sha256.New()).Stdout() + // Output: + // e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + // e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + // e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 +} + func ExamplePipe_Join() { script.Echo("hello\nworld\n").Join().Stdout() // Output: diff --git a/testdata/sha256Sum.input.txt b/testdata/hashSum.input.txt similarity index 100% rename from testdata/sha256Sum.input.txt rename to testdata/hashSum.input.txt