Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validator #6

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions info.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
2022-03-01 17:12:45,922: reading test123
2022-03-01 17:12:46,007: start dissolving for test123
2022-03-01 17:12:46,057: done dissolving for test123
2022-03-01 17:12:46,057: reading test11
2022-03-01 17:12:46,085: no haz col found, skipping dissolving for test11
2022-03-12 17:27:43,025: reading test123
2022-03-12 17:27:43,102: no haz col found, skipping dissolving for test123
2022-03-12 17:27:43,102: reading test11
2022-03-12 17:27:43,127: no haz col found, skipping dissolving for test11
2022-03-12 17:28:28,366: reading test123
2022-03-12 17:28:28,442: start dissolving for test123
2022-03-12 17:28:28,502: done dissolving for test123
2022-03-12 17:28:28,502: reading test11
2022-03-12 17:28:28,525: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,265: reading test123
2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123
2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123
2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123
2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123
2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123
2022-03-12 17:30:25,340: start dissolving for test123
2022-03-12 17:30:25,385: done dissolving for test123
2022-03-12 17:30:25,385: reading test11
2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11
2022-03-12 17:30:25,419: done dissolving for test11
2022-03-16 10:52:54,898: reading test123
2022-03-16 10:52:54,992: start dissolving for test123
2022-03-16 10:52:55,036: done dissolving for test123
2022-03-16 10:52:55,036: reading test11
2022-03-16 10:52:55,058: no haz col found, skipping dissolving for test11
2022-03-16 10:52:55,058: reading PH126300000_LH_LH3
2022-03-16 10:53:19,595: start dissolving for PH126300000_LH_LH3
2022-03-16 14:13:55,363: done dissolving for PH126300000_LH_LH3
82 changes: 82 additions & 0 deletions src/haz_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import geopandas as gpd
import pandas as pd
from datetime import datetime

def shapefile_validator():
"""
Validates the shapefiles according to its geometries, attributes, and projections. Returns boolean depending on the parameters set.
"""

# Initialize empty dataframes
geom_check = []
attribute_check = []
prj_check = []
diss_check = []

# Observed variants of hazard attributes
haz_cols = ['Var', 'VAR', 'SS', 'GRIDCODE', 'LH']

for shp in shp_files:
# Gets the geometry of the shapefiles
geometry = gpd.read_file(shp).geometry

# Reads the shapefiles
data = gpd.read_file(shp)

# Counts the number of rows of the shapefile
count_rows = len(data.index)

# This is a parameter for checking the validity of the dissolved hazard maps. From the number of rows, this will return True if the row count is less than or equal to 3
if count_rows <= 3:
diss_check.append(True)
else:
diss_check.append(False)

# Checks if the shapefile contains a haz col given the different observed variants of haz columns
for haz in haz_cols:
if haz in data:
attribute_check.append(True)
break
else:
attribute_check.append(False)

# Checks if the shapefile has the correct GCS projection. Returns True when projecection is epsg:4326.
prj = gpd.read_file(shp).crs
if prj == 'epsg:4326':
prj_check.append(True)
else:
prj_check.append(False)

# Checks if the shapefile contains geometries. Returns True when it contains geometries.
geom_series = gpd.GeoSeries(geometry)
if geom_series.shape[0] > 0:
geom_check.append(True)
else:
geom_check.append(False)

validator = pd.DataFrame(data=zip(hazard_name, geom_check, attribute_check, prj_check, diss_check),columns=['hazard name', 'contains_geometry', 'correct_attribute', 'GCS_prj', 'diss_check'])
validator['rows'] = len(data.index)

# Creates a new column indicating overall validation assessment for each shapefile. When it returns False, it means that the shapefile is invalid and needs to be returned to the hazard team for inspection.
validator['final_check'] = validator.contains_geometry & validator.correct_attribute & validator.GCS_prj & validator.diss_check

# Saves the dataframe into a csv, with current datetime indicator
validator.to_csv(f'{datetimenow}_results_validation.csv', index=None, encoding="utf-8")

if __name__ == '__main__':
# Path to directories
path_to_dir = os.path.dirname(os.path.abspath('__file__'))
input_path = os.path.join(path_to_dir, "input")
input_files = os.listdir(input_path)

# Gets only the .shp
shp_files = [ file for file in input_files if file.endswith(".shp") ]

# Extracts the hazard name (from the filename)
hazard_name = [ shp.replace(".shp", "") for shp in shp_files ]

# Gets the current date and time
datetimenow = datetime.now().strftime("%Y%m%d %H:%M:%S")

shapefile_validator()