Skip to content

Commit

Permalink
ND: Fix duplicate votes being scraped
Browse files Browse the repository at this point in the history
  • Loading branch information
jessemortenson committed Jan 15, 2025
1 parent f37af55 commit 4fd130d
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions scrapers/nd/bills.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,22 @@ def process_page(self):
votes_list = doc.xpath(
'//div[@aria-labelledby="vote-modal"]//div[@class="modal-content"]'
)
votes_seen_for_bill = []
for vote_modal in votes_list:
motion_text = (
vote_modal.xpath('.//h5[@class="modal-title"]')[0]
.text_content()
.strip()
)
modal_id = vote_modal.xpath('../..')[0].attrib["id"]
dedupe_key = f"{modal_id}{motion_text}"
if dedupe_key in votes_seen_for_bill:
# at least one ND bill has duplicate votes
# so skip if we have seen this vote already
self.logger.warning(f"Skipped duplicate vote {modal_id} on {bill_id}")
continue
else:
votes_seen_for_bill.append(dedupe_key)
date = parser.parse(
vote_modal.xpath(
'.//div[@class="modal-body"]/span[@class="float-right"]'
Expand Down

0 comments on commit 4fd130d

Please sign in to comment.