diff --git a/cmd/guacone/cmd/gcs_test.go b/cmd/guacone/cmd/gcs_test.go index 687daa7fe1..eb106bd994 100644 --- a/cmd/guacone/cmd/gcs_test.go +++ b/cmd/guacone/cmd/gcs_test.go @@ -15,7 +15,11 @@ package cmd -import "testing" +import ( + "testing" + + "github.com/spf13/cobra" +) func TestValidateGCSFlags(t *testing.T) { testCases := []struct { @@ -78,3 +82,18 @@ func TestValidateGCSFlags(t *testing.T) { } } + +func TestJsonBz2Ingestion(t *testing.T) { + rootCmd := &cobra.Command{ + Use: "guacone", + Short: "guacone", + } + rootCmd.AddCommand(collectCmd) + rootCmd.AddCommand(filesCmd) + bz2Path := "./../../../internal/testing/testdata/exampledata/busybox-cyclonedx.json.bz2" + rootCmd.SetArgs([]string{"collect", "files", bz2Path}) + err := rootCmd.Execute() + if err != nil { + t.Fatal(err) + } +} diff --git a/internal/testing/testdata/exampledata/busybox-cyclonedx.json.bz2 b/internal/testing/testdata/exampledata/busybox-cyclonedx.json.bz2 new file mode 100644 index 0000000000..f39d16880a Binary files /dev/null and b/internal/testing/testdata/exampledata/busybox-cyclonedx.json.bz2 differ diff --git a/pkg/handler/processor/process/process.go b/pkg/handler/processor/process/process.go index 8af4d6f433..1e3c0a314e 100644 --- a/pkg/handler/processor/process/process.go +++ b/pkg/handler/processor/process/process.go @@ -22,6 +22,8 @@ import ( "encoding/xml" "fmt" "io" + "path/filepath" + "strings" uuid "github.com/gofrs/uuid" "github.com/guacsec/guac/pkg/emitter" @@ -216,6 +218,13 @@ func decodeDocument(ctx context.Context, i *processor.Document) error { logger := logging.FromContext(ctx) var reader io.Reader var err error + if i.Encoding == "" { + ext := filepath.Ext(i.SourceInformation.Source) + encoding, ok := processor.EncodingExts[strings.ToLower(ext)] + if ok { + i.Encoding = encoding + } + } logger.Infof("Decoding document with encoding: %v", i.Encoding) switch i.Encoding { case processor.EncodingBzip2: @@ -225,17 +234,16 @@ func decodeDocument(ctx context.Context, i *processor.Document) error { if err != nil { return fmt.Errorf("unable to create zstd reader: %w", err) } - case processor.EncodingUnknown: } if reader != nil { - if err := decompressDocument(ctx, i, reader); err != nil { + if err := decompressDocument(i, reader); err != nil { return fmt.Errorf("unable to decode document: %w", err) } } return nil } -func decompressDocument(ctx context.Context, i *processor.Document, reader io.Reader) error { +func decompressDocument(i *processor.Document, reader io.Reader) error { uncompressed, err := io.ReadAll(reader) if err != nil { return fmt.Errorf("unable to decompress document: %w", err) diff --git a/pkg/handler/processor/processor.go b/pkg/handler/processor/processor.go index 6a4eb0aba4..adefc680f4 100644 --- a/pkg/handler/processor/processor.go +++ b/pkg/handler/processor/processor.go @@ -87,6 +87,11 @@ const ( EncodingUnknown EncodingType = "UNKNOWN" ) +var EncodingExts = map[string]EncodingType{ + ".bz2": EncodingBzip2, + ".zst": EncodingZstd, +} + // SourceInformation provides additional information about where the document comes from type SourceInformation struct { // Collector describes the name of the collector providing this information