Skip to content

Commit

Permalink
Merge pull request #41 from glitchedgitz/dev2024
Browse files Browse the repository at this point in the history
Url parsing fixed
  • Loading branch information
glitchedgitz authored Apr 8, 2024
2 parents 3748721 + 9749596 commit acdcac4
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 2 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file added test/urls.txt
Binary file not shown.
20 changes: 18 additions & 2 deletions v2/pkg/methods/urls.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"net"
"net/url"
"path/filepath"
"regexp"
"strings"

"golang.org/x/net/publicsuffix"
Expand Down Expand Up @@ -113,16 +114,31 @@ func (m *Methods) UrlAllDir(u *url.URL, array *[]string) {
func (m *Methods) AnalyzeURLs(urls []string, fn func(*url.URL, *[]string), array *[]string) {

for _, s := range urls {
u, err := url.Parse(s)
if !strings.HasPrefix(s, "http://") && !strings.HasPrefix(s, "https://") {
s = "http://" + s
}
sanitizedURL := sanitizeURL(s)
u, err := url.Parse(sanitizedURL)
if err != nil {
log.Println("Err: AnalyseURLs in url " + s)
log.Println("Err: AnalyseURLs in url ", err)
continue
}

fn(u, array)
}
}

// Function to sanitize the URL string
func sanitizeURL(s string) string {
// Regular expression to match any characters outside the valid ASCII range
controlCharsRegex := regexp.MustCompile(`[^ -~]`)

// Replace any characters outside the valid ASCII range with an empty string
sanitizedURL := controlCharsRegex.ReplaceAllString(s, "")

return sanitizedURL
}

func (m *Methods) init() {
log.SetFlags(0)
}

0 comments on commit acdcac4

Please sign in to comment.