diff --git a/utils/pythonutils/utils.go b/utils/pythonutils/utils.go index 084d9f41..94f0bb85 100644 --- a/utils/pythonutils/utils.go +++ b/utils/pythonutils/utils.go @@ -16,6 +16,12 @@ const ( Pip PythonTool = "pip" Pipenv PythonTool = "pipenv" Poetry PythonTool = "poetry" + + startDownloadingPattern = `^\s*Downloading\s` + downloadingCaptureGroup = `[^\s]*` + startUsingCachedPattern = `^\s*Using\scached\s` + usingCacheCaptureGroup = `[\S]+` + endPattern = `\s\(` ) type PythonTool string @@ -152,6 +158,50 @@ func getFilePath(srcPath, fileName string) (string, error) { return filePath, nil } +// Create the CmdOutputPattern objects that can capture group content that may span multiple lines for logs that have line size limitations. +// Since the log parser parse line by line, we need to create a parser that can capture group content that may span multiple lines. +func getMultilineSplitCaptureOutputPattern(startCollectingPattern, captureGroup, endCollectingPattern string, handler func(pattern *gofrogcmd.CmdOutputPattern) (string, error)) (parsers []*gofrogcmd.CmdOutputPattern) { + // Prepare regex patterns. + oneLineRegex := regexp.MustCompile(startCollectingPattern + `(` + captureGroup + `)` + endCollectingPattern) + startCollectionRegexp := regexp.MustCompile(startCollectingPattern) + endCollectionRegexp := regexp.MustCompile(endCollectingPattern) + + // Create a parser for single line pattern matches. + parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: oneLineRegex, ExecFunc: handler}) + + // Create a parser for multi line pattern matches. + lineBuffer := "" + collectingMultiLineValue := false + parsers = append(parsers, &gofrogcmd.CmdOutputPattern{RegExp: regexp.MustCompile(".*"), ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { + // Check if the line matches the startCollectingPattern. + if !collectingMultiLineValue && startCollectionRegexp.MatchString(pattern.Line) { + // Start collecting lines. + collectingMultiLineValue = true + lineBuffer = pattern.Line + // We assume that the content is multiline so no need to check end at this point. + // Single line will be handled and matched by the other parser. + return pattern.Line, nil + } + if !collectingMultiLineValue { + return pattern.Line, nil + } + // Add the line content to the buffer. + lineBuffer += pattern.Line + // Check if the line matches the endCollectingPattern. + if endCollectionRegexp.MatchString(pattern.Line) { + collectingMultiLineValue = false + // Simulate a one line content check to make sure we have regex match. + if oneLineRegex.MatchString(lineBuffer) { + return handler(&gofrogcmd.CmdOutputPattern{Line: pattern.Line, MatchedResults: oneLineRegex.FindStringSubmatch(lineBuffer)}) + } + } + + return pattern.Line, nil + }}) + + return +} + func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, srcPath string) (map[string]entities.Dependency, error) { if tool == Pipenv { // Add verbosity flag to pipenv commands to collect necessary data @@ -161,19 +211,14 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, installCmd.Dir = srcPath dependenciesMap := map[string]entities.Dependency{} - - // Create regular expressions for log parsing. - collectingRegexp := regexp.MustCompile(`^Collecting\s(\w[\w-.]+)`) - downloadingRegexp := regexp.MustCompile(`^\s*Downloading\s([^\s]*)\s\(`) - usingCachedRegexp := regexp.MustCompile(`^\s*Using\scached\s([\S]+)\s\(`) - alreadySatisfiedRegexp := regexp.MustCompile(`^Requirement\salready\ssatisfied:\s(\w[\w-.]+)`) + parsers := []*gofrogcmd.CmdOutputPattern{} var packageName string expectingPackageFilePath := false // Extract downloaded package name. - dependencyNameParser := gofrogcmd.CmdOutputPattern{ - RegExp: collectingRegexp, + parsers = append(parsers, &gofrogcmd.CmdOutputPattern{ + RegExp: regexp.MustCompile(`^Collecting\s(\w[\w-.]+)`), ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { // If this pattern matched a second time before downloaded-file-name was found, prompt a message. if expectingPackageFilePath { @@ -186,7 +231,7 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, } // Check for out of bound results. - if len(pattern.MatchedResults)-1 < 0 { + if len(pattern.MatchedResults)-1 <= 0 { log.Debug(fmt.Sprintf("Failed extracting package name from line: %s", pattern.Line)) return pattern.Line, nil } @@ -197,49 +242,34 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, return pattern.Line, nil }, - } - - // Extract downloaded file, stored in Artifactory. - downloadedFileParser := gofrogcmd.CmdOutputPattern{ - RegExp: downloadingRegexp, - ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { - // Check for out of bound results. - if len(pattern.MatchedResults)-1 < 0 { - log.Debug(fmt.Sprintf("Failed extracting download path from line: %s", pattern.Line)) - return pattern.Line, nil - } - - // If this pattern matched before package-name was found, do not collect this path. - if !expectingPackageFilePath { - log.Debug(fmt.Sprintf("Could not resolve package name for download path: %s , continuing...", packageName)) - return pattern.Line, nil - } + }) - // Save dependency information. - filePath := pattern.MatchedResults[1] - lastSlashIndex := strings.LastIndex(filePath, "/") - var fileName string - if lastSlashIndex == -1 { - fileName = filePath - } else { - fileName = filePath[lastSlashIndex+1:] - } - dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: fileName} - expectingPackageFilePath = false - - log.Debug(fmt.Sprintf("Found package: %s installed with: %s", packageName, fileName)) + saveCaptureGroupAsDependencyInfo := func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { + fileName := extractFileNameFromRegexCaptureGroup(pattern) + if fileName == "" { + log.Debug(fmt.Sprintf("Failed extracting download path from line: %s", pattern.Line)) return pattern.Line, nil - }, + } + // If this pattern matched before package-name was found, do not collect this path. + if !expectingPackageFilePath { + log.Debug(fmt.Sprintf("Could not resolve package name for download path: %s , continuing...", packageName)) + return pattern.Line, nil + } + // Save dependency information. + dependenciesMap[strings.ToLower(packageName)] = entities.Dependency{Id: fileName} + expectingPackageFilePath = false + log.Debug(fmt.Sprintf("Found package: %s installed with: %s", packageName, fileName)) + return pattern.Line, nil } - cachedFileParser := gofrogcmd.CmdOutputPattern{ - RegExp: usingCachedRegexp, - ExecFunc: downloadedFileParser.ExecFunc, - } + // Extract downloaded file, stored in Artifactory. (value at log may be split into multiple lines) + parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startDownloadingPattern, downloadingCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...) + // Extract cached file, stored in Artifactory. (value at log may be split into multiple lines) + parsers = append(parsers, getMultilineSplitCaptureOutputPattern(startUsingCachedPattern, usingCacheCaptureGroup, endPattern, saveCaptureGroupAsDependencyInfo)...) // Extract already installed packages names. - installedPackagesParser := gofrogcmd.CmdOutputPattern{ - RegExp: alreadySatisfiedRegexp, + parsers = append(parsers, &gofrogcmd.CmdOutputPattern{ + RegExp: regexp.MustCompile(`^Requirement\salready\ssatisfied:\s(\w[\w-.]+)`), ExecFunc: func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { // Check for out of bound results. if len(pattern.MatchedResults)-1 < 0 { @@ -252,12 +282,26 @@ func InstallWithLogParsing(tool PythonTool, commandArgs []string, log utils.Log, log.Debug(fmt.Sprintf("Found package: %s already installed", pattern.MatchedResults[1])) return pattern.Line, nil }, - } + }) // Execute command. - _, errorOut, _, err := gofrogcmd.RunCmdWithOutputParser(installCmd, true, &dependencyNameParser, &downloadedFileParser, &cachedFileParser, &installedPackagesParser) + _, errorOut, _, err := gofrogcmd.RunCmdWithOutputParser(installCmd, true, parsers...) if err != nil { return nil, fmt.Errorf("failed running %s command with error: '%s - %s'", string(tool), err.Error(), errorOut) } return dependenciesMap, nil } + +func extractFileNameFromRegexCaptureGroup(pattern *gofrogcmd.CmdOutputPattern) (fileName string) { + // Check for out of bound results (no captures). + if len(pattern.MatchedResults) <= 1 { + return "" + } + // Extract file information from capture group. + filePath := pattern.MatchedResults[1] + lastSlashIndex := strings.LastIndex(filePath, "/") + if lastSlashIndex == -1 { + return filePath + } + return filePath[lastSlashIndex+1:] +} diff --git a/utils/pythonutils/utils_test.go b/utils/pythonutils/utils_test.go new file mode 100644 index 00000000..92c15d80 --- /dev/null +++ b/utils/pythonutils/utils_test.go @@ -0,0 +1,127 @@ +package pythonutils + +import ( + "fmt" + "strings" + "testing" + + gofrogcmd "github.com/jfrog/gofrog/io" + "github.com/stretchr/testify/assert" +) + +func TestGetMultilineCaptureOutputPattern(t *testing.T) { + tests := []struct { + name string + text string + startCapturePattern string + captureGroupPattern string + endCapturePattern string + expectedCapture string + }{ + { + name: "Using cached - single line captures", + startCapturePattern: startUsingCachedPattern, + captureGroupPattern: usingCacheCaptureGroup, + endCapturePattern: endPattern, + text: ` +Looking in indexes: +***localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-1698829624/simple + +Collecting pexpect==4.8.0 (from -r /tmp/pipenv-qzun2hd3-requirements/pipenv-o_899oue-hashed-reqs.txt (line 1)) + + Using cached http://localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-1698829624/packages/packages/39/7b/88dbb785881c28a102619d46423cb853b46dbccc70d3ac362d99773a78ce/pexpect-4.8.0-py2.py3-none-any.whl (59 kB)`, + expectedCapture: `pexpect-4.8.0-py2.py3-none-any.whl`, + }, + { + name: "Using cached - multi line captures", + startCapturePattern: startUsingCachedPattern, + captureGroupPattern: usingCacheCaptureGroup, + endCapturePattern: endPattern, + text: ` +Looking in indexes: +***localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-16 +98829624/simple + +Collecting pexpect==4.8.0 (from -r +/tmp/pipenv-qzun2hd3-requirements/pipenv-o_899oue-hashed-reqs.txt (line 1)) + + Using cached +http://localhost:8081/artifactory/api/pypi/cli-pipenv-pypi-virtual-1698829624/pa +ckages/packages/39/7b/88dbb785881c28a102619d46423cb853b46dbccc70d3ac362d99773a78 +ce/pexpect-4.8.0-py2.py3-none-any.whl (59 kB)`, + expectedCapture: `pexpect-4.8.0-py2.py3-none-any.whl`, + }, + { + name: "Downloading - single line captures", + startCapturePattern: startDownloadingPattern, + captureGroupPattern: downloadingCaptureGroup, + endCapturePattern: endPattern, + text: ` Preparing metadata (pyproject.toml): finished with status 'done' +Collecting PyYAML==5.1.2 (from jfrog-python-example==1.0) + Downloading http://localhost:8081/artifactory/api/pypi/cli-pypi-virtual-1698829558/packages/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849582fe/PyYAML-5.1.2.tar.gz (265 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 265.0/265.0 kB 364.4 MB/s eta 0:00:00 +Installing build dependencies: started`, + expectedCapture: `PyYAML-5.1.2.tar.gz`, + }, + { + name: "Downloading - multi line captures", + startCapturePattern: startDownloadingPattern, + captureGroupPattern: downloadingCaptureGroup, + endCapturePattern: endPattern, + text: ` Preparing metadata (pyproject.toml): finished with status 'done' +Collecting PyYAML==5.1.2 (from jfrog-python-example==1.0) + Downloading http://localhost:8081/artifactory/api/pypi/cli-pypi-virtual-1698 +829558/packages/packages/e3/e8/b3212641ee2718d556df0f23f78de8303f068fe29cdaa7a91018849 +582fe/PyYAML-5.1.2.tar.gz (265 kB) + ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 265.0/265.0 kB 364.4 MB/s eta 0:00:00 + Installing build dependencies: started`, + expectedCapture: `PyYAML-5.1.2.tar.gz`, + }, + } + + for _, testCase := range tests { + t.Run(testCase.name, func(t *testing.T) { + aggFunc, captures := getCapturesFromTest(testCase.expectedCapture) + runDummyTextStream(t, testCase.text, getMultilineSplitCaptureOutputPattern( + testCase.startCapturePattern, + testCase.captureGroupPattern, + testCase.endCapturePattern, + aggFunc, + )) + assert.Len(t, (*captures), 1, fmt.Sprintf("Expected 1 captured group, got size: %d", len(*captures))) + assert.Equal(t, testCase.expectedCapture, (*captures)[0], fmt.Sprintf("Expected capture group: %s, got: %s", testCase.expectedCapture, (*captures)[0])) + }) + } +} + +func getCapturesFromTest(expectedCaptures ...string) (func(pattern *gofrogcmd.CmdOutputPattern) (string, error), *[]string) { + captures := []string{} + aggFunc := func(pattern *gofrogcmd.CmdOutputPattern) (string, error) { + captured := extractFileNameFromRegexCaptureGroup(pattern) + for _, expectedCapture := range expectedCaptures { + if expectedCapture == captured { + captures = append(captures, expectedCapture) + } + } + return pattern.Line, nil + } + return aggFunc, &captures +} + +func runDummyTextStream(t *testing.T, txt string, parsers []*gofrogcmd.CmdOutputPattern) { + // tokenize the text to be represented line by line to simulate expected cmd log output + lines := strings.Split(txt, "\n") + // iterate over the lines to simulate line text stream + for _, line := range lines { + for _, parser := range parsers { + // check if the line matches the regexp of the parser + if parser.RegExp.MatchString(line) { + parser.MatchedResults = parser.RegExp.FindStringSubmatch(line) + parser.Line = line + // execute the parser function + _, scannerError := parser.ExecFunc(parser) + assert.NoError(t, scannerError) + } + } + } +}