diff --git a/scrapers/va/bills.py b/scrapers/va/bills.py index fccdd9ca92..2a4afd9cc8 100644 --- a/scrapers/va/bills.py +++ b/scrapers/va/bills.py @@ -70,8 +70,15 @@ def scrape(self, session=None): # the short title on the VA site is 'description', # LegislationTitle is on top of all the versions title = row["Description"] - subtitle = self.text_from_html(row["LegislationTitle"]) - description = self.text_from_html(row["LegislationSummary"]) + # these properties can occasionally be null + if row["LegislationTitle"] is not None: + subtitle = self.text_from_html(row["LegislationTitle"]) + else: + subtitle = None + if row["LegislationSummary"] is not None: + description = self.text_from_html(row["LegislationSummary"]) + else: + description = None bill = Bill( row["LegislationNumber"], @@ -86,8 +93,10 @@ def scrape(self, session=None): self.add_carryover_related_bill(bill) self.add_sponsors(bill, row["Patrons"]) yield from self.add_votes(bill, row["LegislationID"]) - bill.add_abstract(subtitle, note="title") - bill.add_abstract(description, row["SummaryVersion"]) + if subtitle is not None: + bill.add_abstract(subtitle, note="title") + if description is not None: + bill.add_abstract(description, row["SummaryVersion"]) bill.extras["VA_LEG_ID"] = row["LegislationID"] @@ -163,12 +172,17 @@ def add_versions(self, bill: Bill, legislation_id: str): "sessionCode": self.session_code, "legislationID": legislation_id, } - page = requests.get( + response = requests.get( f"{self.base_url}/LegislationText/api/getlegislationtextbyidasync", params=body, headers=self.headers, verify=False, - ).json() + ) + # occasionally this returns empty response, HTTP 204 + if len(response.text) == 0: + return + + page = response.json() for row in page["TextsList"]: if (row["PDFFile"] and len(row["PDFFile"]) > 1) or (