-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathscrape_data.py
40 lines (35 loc) · 1.13 KB
/
scrape_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
"""
Uses all active scrapers to fetch knowledge graph data from the web.
Run file with `python scrape_data.py`
"""
import importlib
import os
from local_database import start_db
from scraper import registry
from mongoengine import connect
from argparse import ArgumentParser
from typing import List
def scrape_data(active_scrapers: List[str] = []):
scrapers_filenames = [
file.split(".")[0] for file in os.listdir("scrapers") if file.endswith(".py")
]
for scraper_path in scrapers_filenames:
importlib.import_module(f"scrapers.{scraper_path}")
active_scrapers = (
[scraper for scraper in registry if scraper.__name__ in active_scrapers]
if len(active_scrapers) > 0
else registry
)
for scraper_cls in active_scrapers:
scraper = scraper_cls()
scraper.scrape()
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument(
"scrapers",
nargs="*",
help="Space separated list of which scrapers to run. If omitted, all scrapers will run by default",
)
args = parser.parse_args()
start_db()
scrape_data(args.scrapers)