From 4fd130d3a07e1b322f9fa9c96339b84bedc5e078 Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Wed, 15 Jan 2025 11:24:04 -0600 Subject: [PATCH 1/2] ND: Fix duplicate votes being scraped --- scrapers/nd/bills.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py index 7dea217443..6338dc119a 100644 --- a/scrapers/nd/bills.py +++ b/scrapers/nd/bills.py @@ -179,12 +179,22 @@ def process_page(self): votes_list = doc.xpath( '//div[@aria-labelledby="vote-modal"]//div[@class="modal-content"]' ) + votes_seen_for_bill = [] for vote_modal in votes_list: motion_text = ( vote_modal.xpath('.//h5[@class="modal-title"]')[0] .text_content() .strip() ) + modal_id = vote_modal.xpath('../..')[0].attrib["id"] + dedupe_key = f"{modal_id}{motion_text}" + if dedupe_key in votes_seen_for_bill: + # at least one ND bill has duplicate votes + # so skip if we have seen this vote already + self.logger.warning(f"Skipped duplicate vote {modal_id} on {bill_id}") + continue + else: + votes_seen_for_bill.append(dedupe_key) date = parser.parse( vote_modal.xpath( './/div[@class="modal-body"]/span[@class="float-right"]' From 9ede0b584d31e08b2d17e938038ac275b1478ebc Mon Sep 17 00:00:00 2001 From: Jesse Mortenson Date: Wed, 15 Jan 2025 11:26:14 -0600 Subject: [PATCH 2/2] ND: fix linting --- scrapers/nd/bills.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scrapers/nd/bills.py b/scrapers/nd/bills.py index 6338dc119a..0d572776cf 100644 --- a/scrapers/nd/bills.py +++ b/scrapers/nd/bills.py @@ -186,12 +186,14 @@ def process_page(self): .text_content() .strip() ) - modal_id = vote_modal.xpath('../..')[0].attrib["id"] + modal_id = vote_modal.xpath("../..")[0].attrib["id"] dedupe_key = f"{modal_id}{motion_text}" if dedupe_key in votes_seen_for_bill: # at least one ND bill has duplicate votes # so skip if we have seen this vote already - self.logger.warning(f"Skipped duplicate vote {modal_id} on {bill_id}") + self.logger.warning( + f"Skipped duplicate vote {modal_id} on {bill_id}" + ) continue else: votes_seen_for_bill.append(dedupe_key)