Skip to content

Commit

Permalink
Simplifica custom_settings e campos de formrequest sendo solicitados
Browse files Browse the repository at this point in the history
  • Loading branch information
trevineju committed Jan 13, 2025
1 parent a14da77 commit e75155f
Showing 1 changed file with 6 additions and 9 deletions.
15 changes: 6 additions & 9 deletions data_collection/gazette/spiders/es/es_vitoria.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ class EsVitoriaSpider(BaseGazetteSpider):
custom_settings = {
"DOWNLOAD_DELAY": 0.3,
"RANDOMIZE_DOWNLOAD_DELAY": True,
"RETRY_HTTP_CODES": [500, 502, 503, 504, 522, 524, 408, 429, 406],
}

FORM_PARAM_YEAR = "ctl00$conteudo$ucPesquisarDiarioOficial$ddlAno"
Expand Down Expand Up @@ -51,12 +50,9 @@ def make_month_request(self, response):

for monthly_date in self._dates_of_interest(MONTHLY):
if dt(year, 1, 1) <= monthly_date <= dt(year, 12, 31):

formdata = {
self.FORM_PARAM_YEAR: str(monthly_date.year),
self.FORM_PARAM_MONTH: str(monthly_date.month),
"__EVENTTARGET": self.FORM_PARAM_MONTH,
"__EVENTARGUMENT": "",
}

yield FormRequest.from_response(
Expand Down Expand Up @@ -85,11 +81,8 @@ def parse_editions_list(self, response, current_page=1):
if "pagination" in response.text:
if response.css(".pagination li")[-1].css("a::text").get():
next_page = current_page + 1
year, month = response.meta.get("cookiejar")

formdata = {
self.FORM_PARAM_YEAR: str(year),
self.FORM_PARAM_MONTH: str(month),
"__EVENTTARGET": self.FORM_PARAM_PAGINATION,
"__EVENTARGUMENT": f"Page${next_page}",
}
Expand All @@ -104,6 +97,10 @@ def parse_editions_list(self, response, current_page=1):

def _dates_of_interest(self, recurrence):
dates = rruleset()
dates.rrule(rrule(recurrence, dtstart=self.start_date, until=self.end_date, bymonthday=[1]))
dates.rrule(
rrule(
recurrence, dtstart=self.start_date, until=self.end_date, bymonthday=[1]
)
)
dates.rdate(dt(self.start_date.year, self.start_date.month, 1))
return dates
return dates

0 comments on commit e75155f

Please sign in to comment.