Skip to content

Commit

Permalink
feat(crit): add SearchPattern method on MemoryReader
Browse files Browse the repository at this point in the history
This commit adds a new method `SearchPattern` to `MemoryReader` to search
for patterns inside the process memory pages. This method accept regular
expressions for flexible pattern matching and a context (number of bytes
before and after the pattern match).

Signed-off-by: Kouame Behouba Manasse <[email protected]>
  • Loading branch information
behouba committed Jul 21, 2024
1 parent b2a1cfe commit 0074974
Showing 1 changed file with 91 additions and 0 deletions.
91 changes: 91 additions & 0 deletions crit/mempages.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@ import (
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"regexp"

"github.com/checkpoint-restore/go-criu/v7/crit/images/mm"
"github.com/checkpoint-restore/go-criu/v7/crit/images/pagemap"
Expand Down Expand Up @@ -193,3 +195,92 @@ func (mr *MemoryReader) GetShmemSize() (int64, error) {

return size, nil
}

// PatternMatch represents a match when searching for a pattern in memory.
type PatternMatch struct {
Vaddr uint64
Length int
Context int
Pattern string
}

// SearchPattern searches for a pattern in the process memory pages.
func (mr *MemoryReader) SearchPattern(pattern string, context int) ([]PatternMatch, error) {
if context < 0 {
return nil, errors.New("context size cannot be negative")
}

regexPattern, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}

var results []PatternMatch
chunkSize := 10 * 1024 * 1024 // Set chunk size of 10MB to be read at a time

f, err := os.Open(filepath.Join(mr.checkpointDir, fmt.Sprintf("pages-%d.img", mr.pagesID)))
if err != nil {
return nil, err
}
defer f.Close()

for _, entry := range mr.pagemapEntries {
startAddr := entry.GetVaddr()
endAddr := startAddr + uint64(entry.GetNrPages())*uint64(mr.pageSize)

initialOffset := uint64(0)
for _, e := range mr.pagemapEntries {
if e == entry {
break
}
initialOffset += uint64(e.GetNrPages()) * uint64(mr.pageSize)
}

for offset := uint64(0); offset < endAddr-startAddr; offset += uint64(chunkSize) {
readSize := chunkSize
if endAddr-startAddr-offset < uint64(chunkSize) {
readSize = int(endAddr - startAddr - offset)
}

buff := make([]byte, readSize)
if _, err := f.ReadAt(buff, int64(initialOffset+offset)); err != nil {
if err == io.EOF {
break
}
return nil, err
}

// Replace non-printable ASCII characters in the buffer with spaces (0x20) to prevent unexpected behavior
// during regex matching. non-printable characters might cause incorrect interpretation or premature
// termination of strings, leading to inaccuracies in pattern matching.
for i := range buff {
if buff[i] < 32 || buff[i] >= 127 {
buff[i] = 0x20
}
}

indexes := regexPattern.FindAllIndex(buff, -1)

for _, index := range indexes {
startContext := index[0] - context
if startContext < 0 {
startContext = 0
}

endContext := index[1] + context
if endContext > len(buff) {
endContext = len(buff)
}

results = append(results, PatternMatch{
Vaddr: startAddr + offset + uint64(index[0]),
Length: index[1] - index[0],
Context: context,
Pattern: string(buff[startContext:endContext]),
})
}
}
}

return results, nil
}

0 comments on commit 0074974

Please sign in to comment.