Skip to content

Commit

Permalink
fixed scraping from weather.gov.sg
Browse files Browse the repository at this point in the history
  • Loading branch information
liangleslie committed Jul 28, 2024
1 parent 3dbae63 commit 01c9cda
Show file tree
Hide file tree
Showing 4 changed files with 28 additions and 16 deletions.
1 change: 1 addition & 0 deletions custom_components/nea_sg_weather/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,7 @@ class NeaData:

def __init__(self) -> None:
self.weather = Weather()
_LOGGER.debug(f"Weather data initiated: {self.weather}")
self.forecast2hr = Forecast2hr(self.weather)
self.forecast24hr = Forecast24hr(self.weather)
self.forecast4day = Forecast4day(self.weather)
Expand Down
2 changes: 1 addition & 1 deletion custom_components/nea_sg_weather/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
}
RAIN_MAP_URL_PREFIX = "https://www.weather.gov.sg/files/rainarea/50km/v2/dpsri_70km_"
RAIN_MAP_URL_SUFFIX = "0000dBR.dpsri.png"
RAIN_MAP_GIF_URL = "http://www.weather.gov.sg/weather-rain-area-50km/"
RAIN_MAP_GIF_URL = "https://www.weather.gov.sg/weather-rain-area-50km/"

FORECAST_ICON_BASE_URL = "https://www.nea.gov.sg/assets/images/icons/weather-bg/"

Expand Down
1 change: 1 addition & 0 deletions custom_components/nea_sg_weather/nea.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class NeaData:
"""Class for NEA data objects"""

def __init__(self, weather: Weather, url: Str) -> None:
_LOGGER.debug(f"{self.__class__.__name__}: processing Weather data")
self.weather = weather
self.url = url
self.date_time = (
Expand Down
40 changes: 25 additions & 15 deletions custom_components/nea_sg_weather/weathersg.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
from bs4 import BeautifulSoup
from dateutil import parser
import math
import logging


CODE_CONDITION_MAP = {
"BR": "fog",
Expand Down Expand Up @@ -100,28 +102,34 @@
}

_latest_timestamp = str(int(str(round(datetime.now().timestamp()))[:-2])//3*3)+"00" # API endpoint only accepts time rounded to nearest 5min
nea_headers = {
NEA_HEADERS = {
"referer": "https://www.nea.gov.sg/"
}

WEATHERSG_HEADERS = {
"authority": "www.weather.gov.sg",
"referer": "www.weather.gov.sg",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36",
}

GET_ENDPOINTS = {
"4day": "https://www.nea.gov.sg/api/Weather4DayOutlook/GetData/"
+ _latest_timestamp,
"24hr": "https://www.nea.gov.sg/api/WeatherForecast/forecast24hrnowcast2hrs/"
+ _latest_timestamp,
"current": "https://www.nea.gov.sg/api/Weather24hrs/GetData/"
+ _latest_timestamp,
"temperature": "http://www.weather.gov.sg/weather-currentobservations-temperature/",
"humidity": "http://www.weather.gov.sg/weather-currentobservations-relative-humidity/",
"wind": "http://www.weather.gov.sg/weather-currentobservations-wind/",
"rainfall": "http://www.weather.gov.sg/weather-currentobservations-rainfall/",
"temperature": "https://www.weather.gov.sg/weather-currentobservations-temperature/",
"humidity": "https://www.weather.gov.sg/weather-currentobservations-relative-humidity/",
"wind": "https://www.weather.gov.sg/weather-currentobservations-wind/",
"rainfall": "https://www.weather.gov.sg/weather-currentobservations-rainfall/",
}

POST_ENDPOINTS = {
"temperature": "http://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-ajax-temperature-chart.php",
"humidity": "http://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-ajax-relative-humidity-chart.php",
"wind": "http://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-ajax-wind-chart.php",
"rainfall": "http://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-weather-current-observations-rainfall-ajax.php",
"temperature": "https://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-ajax-temperature-chart.php",
"humidity": "https://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-ajax-relative-humidity-chart.php",
"wind": "https://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-ajax-wind-chart.php",
"rainfall": "https://www.weather.gov.sg/wp-content/themes/wiptheme/page-functions/functions-weather-current-observations-rainfall-ajax.php",
}

REALTIME_WEATHER_CONST = {
Expand All @@ -147,12 +155,12 @@
"": math.nan,
}

_LOGGER = logging.getLogger(__name__)

class Weather:
"""
Main class to easily access weather objects
"""

def __init__(self) -> None:
self.data = self.WeatherData()
self.stations = self.Stations(self.data)
Expand All @@ -166,18 +174,18 @@ class WeatherData:
"""
Class to hold raw responses from endpoints
"""

def __init__(self) -> None:
# get data from NEA API
self.raw_resp = {}
for k, v in GET_ENDPOINTS.items():
_LOGGER.debug(f"Getting {k}: {v}")
self.raw_resp[k] = {}
if GET_ENDPOINTS[k][:23] == "https://www.nea.gov.sg/" :
self.raw_resp[k]["raw"] = requests.get(GET_ENDPOINTS[k],headers=nea_headers)
self.raw_resp[k]["raw"] = requests.get(GET_ENDPOINTS[k],headers=NEA_HEADERS)
self.raw_resp[k]["processed"] = self.raw_resp[k]["raw"].json()
# print(k + ": json stored")
else: # scrape data from weather.sg
self.raw_resp[k]["raw"] = requests.get(GET_ENDPOINTS[k])
self.raw_resp[k]["raw"] = requests.get(GET_ENDPOINTS[k],headers=WEATHERSG_HEADERS)
self.raw_resp[k]["processed"] = {}
soup = BeautifulSoup(self.raw_resp[k]["raw"].content, "html.parser")
self.raw_resp[k]["obs_datetime"] = soup.find(class_="date-obs").text
Expand All @@ -190,6 +198,7 @@ def __init__(self) -> None:
'{stationCode:"'
)[1:]


else:
raw_stations_metadata = self.raw_resp[k]["raw"].text.split(
'{station_code:"'
Expand Down Expand Up @@ -222,7 +231,7 @@ def __init__(self) -> None:
self.raw_resp[k]["processed"][station_reading["id"]][
"station_name"
] = BeautifulSoup(
station_reading["data-content"], "html.parser"
station_reading["data-bs-content"], "html.parser"
).strong.text
self.raw_resp[k]["processed"][station_reading["id"]][
"value"
Expand All @@ -238,7 +247,8 @@ def __init__(self) -> None:
self.raw_resp[k]["processed"][station_reading["id"]][
"direction"
] = station_reading.img["alt"]
# print(k + ": processed html with BeautifulSoup")
_LOGGER.debug(f"{k}: {self.raw_resp[k]}")
_LOGGER.debug(f"{k}: processed html with BeautifulSoup")

class Current:
"""
Expand Down

0 comments on commit 01c9cda

Please sign in to comment.