From 62cd4bcd5f1957e20b3eccbdf794c9fc5724a9ad Mon Sep 17 00:00:00 2001 From: trevineju Date: Mon, 13 Jan 2025 04:35:11 -0300 Subject: [PATCH] =?UTF-8?q?Torna=20par=C3=A2metros=20do=20formul=C3=A1rio?= =?UTF-8?q?=20hardcoded?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../gazette/spiders/es/es_vitoria.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/data_collection/gazette/spiders/es/es_vitoria.py b/data_collection/gazette/spiders/es/es_vitoria.py index ee5e70620..e83111be8 100644 --- a/data_collection/gazette/spiders/es/es_vitoria.py +++ b/data_collection/gazette/spiders/es/es_vitoria.py @@ -9,7 +9,7 @@ class EsVitoriaSpider(BaseGazetteSpider): name = "es_vitoria" - TERRITORY_ID = "3205309" + TERRITORY_ID = "3205309" allowed_domains = ["diariooficial.vitoria.es.gov.br"] start_date = date(2014, 7, 21) @@ -25,8 +25,9 @@ class EsVitoriaSpider(BaseGazetteSpider): "RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406], } - FORM_PARAM_YEAR = None - FORM_PARAM_MONTH = None + FORM_PARAM_YEAR = "ctl00$conteudo$ucPesquisarDiarioOficial$ddlAno" + FORM_PARAM_MONTH = "ctl00$conteudo$ucPesquisarDiarioOficial$ddlMes" + FORM_PARAM_PAGINATION = "ctl00$conteudo$ucPesquisarDiarioOficial$grdArquivos" def start_requests(self): yield Request( @@ -35,8 +36,6 @@ def start_requests(self): ) def make_year_request(self, response): - self.set_form_params(response) - monthly_dates = rruleset() monthly_dates.rrule( rrule(MONTHLY, dtstart=self.start_date, until=self.end_date, bymonthday=[1]) @@ -55,14 +54,6 @@ def make_year_request(self, response): meta={"cookiejar": (monthly_date.year, monthly_date.month)}, ) - def set_form_params(self, response): - year_select = response.xpath("//select[contains(@id, 'ddlAno')]") - self.FORM_PARAM_YEAR = year_select.attrib["name"] - - month_select = response.xpath("//select[contains(@id, 'ddlMes')]") - self.FORM_PARAM_MONTH = month_select.attrib["name"] - - def make_month_request(self, response): year, month = response.meta.get("cookiejar")