Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Tag node_modules archive #971

Merged
merged 19 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions internal/hashio/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"io"
"os"
"strings"
)

// SHA256 hashes the given file with crypto.SHA256 and returns the checksum as a
Expand All @@ -20,3 +21,28 @@ func SHA256(filename string) (string, error) {
}
return fmt.Sprintf("%x", h.Sum(nil)), nil
}

// HashContent computes a SHA-256 hash of the file content combined with extra content,
// and returns the first 16 characters of the hex-encoded hash.
func HashContent(filePath string, extraContent ...string) (string, error) {
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
file, err := os.Open(filePath)
if err != nil {
return "", fmt.Errorf("failed to open file: %w", err)
}
defer file.Close()

fileInfo, err := file.Stat()
if err != nil {
return "", fmt.Errorf("failed to get file info: %w", err)
}

buffer := make([]byte, fileInfo.Size())
if _, err := file.Read(buffer); err != nil {
return "", fmt.Errorf("failed to read file: %w", err)
}

combinedContent := string(buffer) + strings.Join(extraContent, "")

hash := sha256.Sum256([]byte(combinedContent))
return fmt.Sprintf("%x", hash)[:15], nil
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
}
149 changes: 117 additions & 32 deletions internal/saucecloud/cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,9 @@ func (r *CloudRunner) runJobs(jobOpts chan job.StartOptions, results chan<- resu
}

// remoteArchiveProject archives the contents of the folder and uploads to remote storage.
// It returns app uri as the uploaded project, otherApps as the collection of runner config and node_modules bundle.
func (r *CloudRunner) remoteArchiveProject(project interface{}, folder string, sauceignoreFile string, dryRun bool) (app string, otherApps []string, err error) {
// Returns the app URI for the uploaded project and additional URIs for the
// runner config, node_modules, and other resources.
func (r *CloudRunner) remoteArchiveProject(project interface{}, projectDir string, sauceignoreFile string, dryRun bool) (app string, otherApps []string, err error) {
tempDir, err := os.MkdirTemp(os.TempDir(), "saucectl-app-payload-")
if err != nil {
return
Expand All @@ -419,65 +420,148 @@ func (r *CloudRunner) remoteArchiveProject(project interface{}, folder string, s
defer os.RemoveAll(tempDir)
}

var files []string
files, err := collectFiles(projectDir)
if err != nil {
return "", nil, fmt.Errorf("failed to retrieve project files: %w", err)
}

matcher, err := sauceignore.NewMatcherFromFile(sauceignoreFile)
if err != nil {
return
}

contents, err := os.ReadDir(folder)
// Create archives for the project's main files and runner configuration.
archives, err := r.createArchives(tempDir, projectDir, project, files, matcher)
if err != nil {
return
}

uris, err := r.uploadFiles(archives, dryRun)
if err != nil {
return
}

if len(r.NPMDependencies) > 0 {
nodeModulesURI, err := r.handleNodeModules(tempDir, projectDir, matcher, dryRun)
if err != nil {
return "", nil, err
}
if nodeModulesURI != "" {
uris[nodeModulesUpload] = nodeModulesURI
}
}

return uris[projectUpload], r.refineURIs(uris), nil
}

// collectFiles retrieves all relevant files in the project directory, excluding "node_modules".
func collectFiles(dir string) ([]string, error) {
var files []string
contents, err := os.ReadDir(dir)
if err != nil {
return nil, fmt.Errorf("failed to read project directory: %w", err)
}

for _, file := range contents {
// we never want mode_modules as part of the app payload
if file.Name() == "node_modules" {
continue
if file.Name() != "node_modules" {
files = append(files, filepath.Join(dir, file.Name()))
}
files = append(files, filepath.Join(folder, file.Name()))
}
return files, nil
}

func (r *CloudRunner) createArchives(tempDir, projectDir string, project interface{}, files []string, matcher sauceignore.Matcher) (map[uploadType]string, error) {
archives := make(map[uploadType]string)

matcher, err := sauceignore.NewMatcherFromFile(sauceignoreFile)
projectArchive, err := zip.ArchiveFiles("app", tempDir, projectDir, files, matcher)
if err != nil {
return
return nil, fmt.Errorf("failed to archive project files: %w", err)
}
archives[projectUpload] = projectArchive

appZip, err := zip.ArchiveFiles("app", tempDir, folder, files, matcher)
configArchive, err := zip.ArchiveRunnerConfig(project, tempDir)
if err != nil {
return
return nil, fmt.Errorf("failed to archive runner configuration: %w", err)
}
archives[projectUpload] = appZip
archives[runnerConfigUpload] = configArchive

modZip, err := zip.ArchiveNodeModules(tempDir, folder, matcher, r.NPMDependencies)
return archives, nil
}

// handleNodeModules archives the node_modules directory and uploads it to remote storage.
// If tagging is enabled and a tagged version of node_modules already exists in storage,
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
// it returns the URI of the existing archive.
// Otherwise, it creates a new archive, uploads it and returns the storage ID.
func (r *CloudRunner) handleNodeModules(tempDir, projectDir string, matcher sauceignore.Matcher, dryRun bool) (string, error) {
var tags []string

if taggableModules(projectDir, r.NPMDependencies) {
tag, err := hashio.HashContent(filepath.Join(projectDir, "package-lock.json"), r.NPMDependencies...)
if err != nil {
return "", err
}
tags = append(tags, tag)

log.Info().Msgf("Searching remote node_modules archive by tag %s", tag)
existingURI := r.findTaggedArchives(tag)
if existingURI != "" {
log.Info().Msgf("Skipping archive and upload node_modules, use %s", existingURI)
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
return existingURI, nil
}
}

archive, err := zip.ArchiveNodeModules(tempDir, projectDir, matcher, r.NPMDependencies)
if err != nil {
return
return "", fmt.Errorf("failed to archive node_modules: %w", err)
}
if modZip != "" {
archives[nodeModulesUpload] = modZip
if archive == "" {
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
return "", nil
}

configZip, err := zip.ArchiveRunnerConfig(project, tempDir)
if err != nil {
return
return r.uploadArchive(storage.FileInfo{Name: archive, Tags: tags}, nodeModulesUpload, dryRun)
}

// taggableModules checks if tagging should be applied based on the presence of package-lock.json and dependencies.
func taggableModules(dir string, npmDependencies []string) bool {
if len(npmDependencies) == 0 {
return false
}
archives[runnerConfigUpload] = configZip
_, err := os.Stat(filepath.Join(dir, "package-lock.json"))
return err == nil
}

var uris = map[uploadType]string{}
for k, v := range archives {
uri, err := r.uploadArchive(storage.FileInfo{Name: v}, k, dryRun)
// findTaggedArchives searches storage for a tagged archive with a matching tag.
func (r *CloudRunner) findTaggedArchives(tag string) string {
list, err := r.ProjectUploader.List(context.TODO(), storage.ListOptions{Tags: []string{tag}, MaxResults: 1})
if err != nil || len(list.Items) == 0 {
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
return ""
}

return fmt.Sprintf("storage:%s", list.Items[0].ID)
}

// uploadFiles uploads each archive and returns a map of URIs.
func (r *CloudRunner) uploadFiles(archives map[uploadType]string, dryRun bool) (map[uploadType]string, error) {
uris := make(map[uploadType]string)
for uploadType, path := range archives {
uri, err := r.uploadArchive(storage.FileInfo{Name: path}, uploadType, dryRun)
if err != nil {
return "", []string{}, err
return nil, fmt.Errorf("failed to upload %s archive: %w", uploadType, err)
}
uris[k] = uri
uris[uploadType] = uri
}
return uris, nil
}

app = uris[projectUpload]
for _, item := range []uploadType{runnerConfigUpload, nodeModulesUpload, otherAppsUpload} {
if val, ok := uris[item]; ok {
otherApps = append(otherApps, val)
// refineURIs picks extra URIs and sorts them.
func (r *CloudRunner) refineURIs(uriMap map[uploadType]string) []string {
tianfeng92 marked this conversation as resolved.
Show resolved Hide resolved
var uris []string
for _, t := range []uploadType{runnerConfigUpload, nodeModulesUpload, otherAppsUpload} {
if uri, ok := uriMap[t]; ok {
uris = append(uris, uri)
}
}

return
return uris
}

// remoteArchiveFiles archives the files to a remote storage.
Expand Down Expand Up @@ -614,6 +698,7 @@ func (r *CloudRunner) uploadArchive(fileInfo storage.FileInfo, pType uploadType,
if err != nil {
return "", fmt.Errorf("unable to download app from %s: %w", filename, err)
}

defer os.RemoveAll(dest)

filename = dest
Expand Down
1 change: 0 additions & 1 deletion internal/storage/storage.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ type Item struct {
Name string `json:"name"`
Size int `json:"size"`
Uploaded time.Time `json:"uploaded"`
Tags []string
}

// ErrFileNotFound is returned when the requested file does not exist.
Expand Down
Loading