Skip to content

Commit

Permalink
feat(npm): detect generic auth
Browse files Browse the repository at this point in the history
  • Loading branch information
rgmz committed Dec 31, 2024
1 parent b27bbb9 commit f4039b0
Show file tree
Hide file tree
Showing 8 changed files with 472 additions and 8 deletions.
3 changes: 2 additions & 1 deletion pkg/detectors/npm/token/detector.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ func (s BaseScanner) VerifyToken(
ctx context.Context,
data string,
token string,
includeDefaultRegistry bool,
) (bool, map[string]string, error) {
logger := ctx.Logger().WithName("npm")
if s.client == nil {
Expand All @@ -59,7 +60,7 @@ func (s BaseScanner) VerifyToken(
} else {
// A high confidence match was not found.
// Attempt to verify the token against any registries we can find.
for uri, info := range registry.FindAllURLs(ctx, data, true) {
for uri, info := range registry.FindAllURLs(ctx, data, includeDefaultRegistry) {
registries[uri] = info
}
logger.V(4).Info("Found low-confidence matches for token", "token", token, "registries", maps.Keys(registries))
Expand Down
107 changes: 107 additions & 0 deletions pkg/detectors/npm/token/generic/generic.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
package generic

import (
"context"
"errors"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token"
newToken "github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token/new"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors/npm/token/uuid"
)

type Scanner struct {
token.BaseScanner
}

// Ensure the Scanner satisfies the interfaces at compile time.
var _ interface {
detectors.Detector
detectors.Versioner
} = (*Scanner)(nil)

func (s Scanner) Version() int { return int(npm.TokenGeneric) }

func (s Scanner) Keywords() []string {
return []string{
"npm", // generic
"_authToken", // npmrc
}
}

var (
// genericKeyPat should match all possible values for .npmrc auth tokens.
genericKeyPat = regexp.MustCompile(`(?:_authToken|(?i:npm(?:[_.-]?config)?[_\-.]?token))['"]?(?:[ \t]*[:=][ \t]*|[ \t]+)(?:'([^']+)'|"([^"]+)"|([a-zA-Z0-9_+-][[:graph:]]{6,}[a-zA-Z0-9_+/=-]))`)
uuidPat = regexp.MustCompile("(?i)" + common.UUIDPattern)

// TODO: Skip package-lock.json and yarn.lock, which are common sources of false positives.
invalidKeyPat = func() *regexp.Regexp {
return regexp.MustCompile(`(?i)(data\.token|process\.env\.[a-z_]+|-(assignments|defines|descope|inject-block|properties|providers|stream|string|substitute|whitespace-trim)|-(\d+\.\d+\.\d+|[a-z0-9-]+)\.tgz|registry\.npmjs\.org/[a-z_-]+/\d+\.\d+\.\d+)`)
}()
)

func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (results []detectors.Result, err error) {
dataStr := string(data)
logCtx := logContext.AddLogger(ctx)

// Deduplicate results for more efficient handling.
tokens := make(map[string]struct{})
for _, match := range genericKeyPat.FindAllStringSubmatch(dataStr, -1) {
_, t := firstNonEmptyMatch(match, 1)
t = strings.TrimSpace(t)
// Ignore results that can be handled by the v1 or v2 detectors.
if uuid.TokenPat.MatchString(t) || newToken.TokenPat.MatchString(t) {
continue
} else if detectors.StringShannonEntropy(t) < 3 {
continue
} else if invalidKeyPat.MatchString(t) {
continue
}
tokens[t] = struct{}{}
}

// Handle results.
for t := range tokens {
r := detectors.Result{
DetectorType: s.Type(),
Raw: []byte(t),
}

if verify {
verified, extraData, vErr := s.VerifyToken(logCtx, dataStr, t, false)
r.Verified = verified
r.ExtraData = extraData
if vErr != nil {
if errors.Is(vErr, detectors.ErrNoLocalIP) {
continue
}
r.SetVerificationError(vErr)
}
}

results = append(results, r)
}

return
}

// firstNonEmptyMatch returns the index and value of the first non-empty match.
// If no non-empty match is found, it will return: 0, "".
func firstNonEmptyMatch(matches []string, skip int) (int, string) {
if len(matches) < skip {
return 0, ""
}
// The first index is the entire matched string.
for i, val := range matches[skip:] {
if val != "" {
return i + skip, val
}
}
return 0, ""
}
120 changes: 120 additions & 0 deletions pkg/detectors/npm/token/generic/generic_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
//go:build detectors
// +build detectors

package generic

import (
"context"
"fmt"
"testing"
"time"

"github.com/kylelemons/godebug/pretty"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestNpmToken_Generic_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
secret := testSecrets.MustGetField("NPMTOKEN_GENERIC")
inactiveSecret := testSecrets.MustGetField("NPMTOKEN_GENERIC_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a npmtoken_generic secret %s within", secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_NpmToken,
Verified: true,
},
},
wantErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a npmtoken_generic secret %s within but not valid", inactiveSecret)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_NpmToken,
Verified: false,
},
},
wantErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := Scanner{}
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("NpmToken_New.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
got[i].Raw = nil
}
if diff := pretty.Compare(got, tt.want); diff != "" {
t.Errorf("NpmToken_New.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err := s.FromData(ctx, false, data)
if err != nil {
b.Fatal(err)
}
}
})
}
}
Loading

0 comments on commit f4039b0

Please sign in to comment.