From 4925261c2aebd16d14926e558da6a8b8939b9466 Mon Sep 17 00:00:00 2001 From: Pratyush Venkatakrishnan Date: Thu, 12 Dec 2024 17:39:18 -0500 Subject: [PATCH] Add pre-semester support to scrapers The pre-semester (IAP or summer) is currently output to latestTermPreSemester, though (it needs to be renamed to i25, etc.). The scrapers also don't look at scheduleFall, scheduleIAP, scheduleSpring. --- scrapers/fireroad.py | 15 ++++++++++----- scrapers/package.py | 18 ++++++++++++++++-- scrapers/update.py | 6 ++++-- scrapers/utils.py | 26 ++++++++++++++++++-------- src/components/TermSwitcher.tsx | 2 +- 5 files changed, 49 insertions(+), 18 deletions(-) diff --git a/scrapers/fireroad.py b/scrapers/fireroad.py index 8620489a..fd3bdecb 100644 --- a/scrapers/fireroad.py +++ b/scrapers/fireroad.py @@ -314,16 +314,20 @@ def get_course_data(courses, course, term): return True -def run(): +def run(is_semester_term): """ The main entry point. All data is written to `fireroad.json`. - There are no arguments and there is no return value. + Args: + * is_semester_term (bool): whether to look at the semester term (fall/spring) or the pre-semester term (summer/IAP). + + Returns: none """ text = requests.get(URL).text data = json.loads(text) courses = dict() - term = utils.get_term() + term = utils.get_term(is_semester_term) + fname = "fireroad.json" if is_semester_term else "fireroad-presem.json" missing = 0 for course in data: @@ -331,11 +335,12 @@ def run(): if not included: missing += 1 - with open("fireroad.json", "w") as f: + with open(fname, "w") as f: json.dump(courses, f) print(f"Got {len (courses)} courses") print(f"Skipped {missing} courses that are not offered in the {term.value} term") if __name__ == "__main__": - run() + run(False) + run(True) diff --git a/scrapers/package.py b/scrapers/package.py index e7f92cdf..2fc708d9 100644 --- a/scrapers/package.py +++ b/scrapers/package.py @@ -58,26 +58,40 @@ def run(): Takes data from fireroad.json and catalog.json; outputs latest.json. There are no arguments and no return value. """ + fireroad_presem = load_json_data("fireroad-presem.json") fireroad = load_json_data("fireroad.json") catalog = load_json_data("catalog.json") overrides = load_json_data("overrides.json") # The key needs to be in BOTH fireroad and catalog to make it: - # If it's not in Fireroad, it's not offered in this semester. + # If it's not in Fireroad, it's not offered in this semester (fall, etc.). # If it's not in catalog, it's not offered this year. + courses_presem = merge_data( + datasets=[fireroad_presem, catalog, overrides], + keys_to_keep=set(fireroad_presem) & set(catalog), + ) courses = merge_data( datasets=[fireroad, catalog, overrides], keys_to_keep=set(fireroad) & set(catalog), ) - term_info = utils.get_term_info() + term_info_presem = utils.get_term_info(False) + term_info = utils.get_term_info(True) now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M") + + obj_presem = { + "termInfo": term_info_presem, + "lastUpdated": now, + "classes": courses_presem, + } obj = { "termInfo": term_info, "lastUpdated": now, "classes": courses, } + with open("../public/latestPreSemester.json", mode="w", encoding="utf-8") as f: + json.dump(obj_presem, f, separators=(",", ":")) with open("../public/latest.json", mode="w", encoding="utf-8") as f: json.dump(obj, f, separators=(",", ":")) print(f"Got {len(courses)} courses") diff --git a/scrapers/update.py b/scrapers/update.py index 6767ac7e..69e69366 100644 --- a/scrapers/update.py +++ b/scrapers/update.py @@ -16,8 +16,10 @@ def run(): """ This function is the entry point. There are no arguments. """ - print("=== Update fireroad data ===") - fireroad.run() + print("=== Update fireroad data (pre-semester) ===") + fireroad.run(False) + print("=== Update fireroad data (semester) ===") + fireroad.run(True) print("=== Update catalog data ===") catalog.run() print("=== Packaging ===") diff --git a/scrapers/utils.py b/scrapers/utils.py index dc935990..c2959dbc 100644 --- a/scrapers/utils.py +++ b/scrapers/utils.py @@ -151,25 +151,35 @@ def grouper(iterable, n): return zip_strict(*args) -def get_term_info(): +def get_term_info(is_semester_term): """ Gets the latest term info from "../public/latestTerm.json" as a dictionary. - There are no arguments. + Args: + * is_semester_term (bool): whether to look at the semester term (fall/spring) or the pre-semester term (summer/IAP). + + Returns: + * dict: the term info for the selected term from latestTerm.json. """ with open("../public/latestTerm.json") as f: term_info = json.load(f) - return term_info["semester"] + if is_semester_term: + return term_info["semester"] + else: + return term_info["preSemester"] -def get_term(): +def get_term(is_semester_term): """ - Gets the current term (fall, IAP, or spring) as a value of type Term, from - "../public/latestTerm.json". + Gets the current term (fall/spring/etc.) as a value of type Term, from "../public/latestTerm.json". - There are no arguments. + Args: + * is_semester_term (bool): whether to look at the semester term (fall/spring) or the pre-semester term (summer/IAP). + + Returns: + * Term: the name of the current term. """ - url_name = get_term_info()["urlName"] + url_name = get_term_info(is_semester_term)["urlName"] if url_name[0] == "f": return Term.FA elif url_name[0] == "i": diff --git a/src/components/TermSwitcher.tsx b/src/components/TermSwitcher.tsx index f8ce5757..1417c5b5 100644 --- a/src/components/TermSwitcher.tsx +++ b/src/components/TermSwitcher.tsx @@ -37,7 +37,7 @@ function getLastUrlName(urlName: string): string { } /** urlNames that don't have a State */ -const EXCLUDED_URLS = ["i23", "i24", "i25"]; +const EXCLUDED_URLS = ["i23", "i24"]; /** Earliest urlName we have a State for. */ const EARLIEST_URL = "f22";