Skip to content

Commit

Permalink
Add pre-semester support to scrapers
Browse files Browse the repository at this point in the history
The pre-semester (IAP or summer) is currently output to latestTermPreSemester,
though (it needs to be renamed to i25, etc.). The scrapers also don't look at
scheduleFall, scheduleIAP, scheduleSpring.
  • Loading branch information
psvenk committed Dec 14, 2024
1 parent 6c62955 commit 4925261
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 18 deletions.
15 changes: 10 additions & 5 deletions scrapers/fireroad.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,28 +314,33 @@ def get_course_data(courses, course, term):
return True


def run():
def run(is_semester_term):
"""
The main entry point. All data is written to `fireroad.json`.
There are no arguments and there is no return value.
Args:
* is_semester_term (bool): whether to look at the semester term (fall/spring) or the pre-semester term (summer/IAP).
Returns: none
"""
text = requests.get(URL).text
data = json.loads(text)
courses = dict()
term = utils.get_term()
term = utils.get_term(is_semester_term)
fname = "fireroad.json" if is_semester_term else "fireroad-presem.json"
missing = 0

for course in data:
included = get_course_data(courses, course, term)
if not included:
missing += 1

with open("fireroad.json", "w") as f:
with open(fname, "w") as f:
json.dump(courses, f)
print(f"Got {len (courses)} courses")
print(f"Skipped {missing} courses that are not offered in the {term.value} term")


if __name__ == "__main__":
run()
run(False)
run(True)
18 changes: 16 additions & 2 deletions scrapers/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,26 +58,40 @@ def run():
Takes data from fireroad.json and catalog.json; outputs latest.json.
There are no arguments and no return value.
"""
fireroad_presem = load_json_data("fireroad-presem.json")
fireroad = load_json_data("fireroad.json")
catalog = load_json_data("catalog.json")
overrides = load_json_data("overrides.json")

# The key needs to be in BOTH fireroad and catalog to make it:
# If it's not in Fireroad, it's not offered in this semester.
# If it's not in Fireroad, it's not offered in this semester (fall, etc.).
# If it's not in catalog, it's not offered this year.
courses_presem = merge_data(
datasets=[fireroad_presem, catalog, overrides],
keys_to_keep=set(fireroad_presem) & set(catalog),
)
courses = merge_data(
datasets=[fireroad, catalog, overrides],
keys_to_keep=set(fireroad) & set(catalog),
)

term_info = utils.get_term_info()
term_info_presem = utils.get_term_info(False)
term_info = utils.get_term_info(True)
now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M")

obj_presem = {
"termInfo": term_info_presem,
"lastUpdated": now,
"classes": courses_presem,
}
obj = {
"termInfo": term_info,
"lastUpdated": now,
"classes": courses,
}

with open("../public/latestPreSemester.json", mode="w", encoding="utf-8") as f:
json.dump(obj_presem, f, separators=(",", ":"))
with open("../public/latest.json", mode="w", encoding="utf-8") as f:
json.dump(obj, f, separators=(",", ":"))
print(f"Got {len(courses)} courses")
Expand Down
6 changes: 4 additions & 2 deletions scrapers/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ def run():
"""
This function is the entry point. There are no arguments.
"""
print("=== Update fireroad data ===")
fireroad.run()
print("=== Update fireroad data (pre-semester) ===")
fireroad.run(False)
print("=== Update fireroad data (semester) ===")
fireroad.run(True)
print("=== Update catalog data ===")
catalog.run()
print("=== Packaging ===")
Expand Down
26 changes: 18 additions & 8 deletions scrapers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,25 +151,35 @@ def grouper(iterable, n):
return zip_strict(*args)


def get_term_info():
def get_term_info(is_semester_term):
"""
Gets the latest term info from "../public/latestTerm.json" as a dictionary.
There are no arguments.
Args:
* is_semester_term (bool): whether to look at the semester term (fall/spring) or the pre-semester term (summer/IAP).
Returns:
* dict: the term info for the selected term from latestTerm.json.
"""
with open("../public/latestTerm.json") as f:
term_info = json.load(f)
return term_info["semester"]
if is_semester_term:
return term_info["semester"]
else:
return term_info["preSemester"]


def get_term():
def get_term(is_semester_term):
"""
Gets the current term (fall, IAP, or spring) as a value of type Term, from
"../public/latestTerm.json".
Gets the current term (fall/spring/etc.) as a value of type Term, from "../public/latestTerm.json".
There are no arguments.
Args:
* is_semester_term (bool): whether to look at the semester term (fall/spring) or the pre-semester term (summer/IAP).
Returns:
* Term: the name of the current term.
"""
url_name = get_term_info()["urlName"]
url_name = get_term_info(is_semester_term)["urlName"]
if url_name[0] == "f":
return Term.FA
elif url_name[0] == "i":
Expand Down
2 changes: 1 addition & 1 deletion src/components/TermSwitcher.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function getLastUrlName(urlName: string): string {
}

/** urlNames that don't have a State */
const EXCLUDED_URLS = ["i23", "i24", "i25"];
const EXCLUDED_URLS = ["i23", "i24"];

/** Earliest urlName we have a State for. */
const EARLIEST_URL = "f22";
Expand Down

0 comments on commit 4925261

Please sign in to comment.