This repository has been archived by the owner on Aug 5, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathscrape.py
executable file
·94 lines (77 loc) · 2.66 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
"""
Basic Pastebin scraper
by Daniel Roberson
This is currently very crude, but works.
You must have a lifetime pro membership to pastebin and have your IP
address whitelisted in order for this to function.
"""
import re
import time
import json
import urllib
import urllib2
def is_interesting(data):
"""Determine if data contains any interesting artifacts"""
# TODO:
# - phone numbers
# - email addresses
# - URLs
# - IP addresses
# - various hashes
# - GPS coordinates
# - User-specified keywords
# - Display blurb on WHY the file is interesting in output:
# ex: [+] Interesting data found in %s -- saved to %s (exploit)
if 'exploit' in data.lower():
return True
if 'pass' in data.lower():
return True
if 'key' in data.lower():
return True
if 'database' in data.lower():
return True
return False
def main():
"""Scrape all the things"""
# http://pastebin.com/api_scraping.php
# http://pastebin.com/api_scrape_item.php?i=UNIQUE_PASTE_KEY
# http://pastebin.com/api_scrape_item_meta.php?i=UNIQUE_PASTE_KEY
# api_scraping.php?limit=X (max 500)
# TODO: KeyboardInterrupt handler
# TODO: replace urllib/urllib2 with requests
pastebin_keys = []
pastebin = ""
limit = 250 # TODO: CLI setting
url = "https://scrape.pastebin.com/api_scraping.php"
values = {'limit': limit}
url_values = urllib.urlencode(values)
full_url = url + '?' + url_values
while True:
# TODO: exit if IP is not whitelisted.
try:
pastebin = urllib2.urlopen(full_url)
except urllib2.HTTPError, e:
print e
data = json.load(pastebin)
pastebin_keys = pastebin_keys[:limit]
for paste in data:
if paste['key'] in pastebin_keys:
continue
pastebin_keys.insert(0, paste['key'])
#print paste['key'], paste['date'], paste['scrape_url'], paste['full_url']
# TODO: add exception handling
scrape = urllib2.urlopen(paste['scrape_url'])
scrape_data = scrape.read()
if is_interesting(scrape_data):
filename = paste['key'] + ".txt"
print "[+] Interesting data found in %s -- saving to %s" % \
(paste['full_url'], filename)
# TODO: make sure this succeeds
filep = open(filename, 'w')
# TODO: ability to specify write directory, folders by date.
filep.write(scrape_data)
filep.close()
time.sleep(60) # TODO: CLI setting
if __name__ == "__main__":
main()