diff --git a/info.log b/info.log new file mode 100644 index 0000000..8f5619d --- /dev/null +++ b/info.log @@ -0,0 +1,38 @@ +2022-03-01 17:12:45,922: reading test123 +2022-03-01 17:12:46,007: start dissolving for test123 +2022-03-01 17:12:46,057: done dissolving for test123 +2022-03-01 17:12:46,057: reading test11 +2022-03-01 17:12:46,085: no haz col found, skipping dissolving for test11 +2022-03-12 17:27:43,025: reading test123 +2022-03-12 17:27:43,102: no haz col found, skipping dissolving for test123 +2022-03-12 17:27:43,102: reading test11 +2022-03-12 17:27:43,127: no haz col found, skipping dissolving for test11 +2022-03-12 17:28:28,366: reading test123 +2022-03-12 17:28:28,442: start dissolving for test123 +2022-03-12 17:28:28,502: done dissolving for test123 +2022-03-12 17:28:28,502: reading test11 +2022-03-12 17:28:28,525: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,265: reading test123 +2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123 +2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123 +2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123 +2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123 +2022-03-12 17:30:25,340: no haz col found, skipping dissolving for test123 +2022-03-12 17:30:25,340: start dissolving for test123 +2022-03-12 17:30:25,385: done dissolving for test123 +2022-03-12 17:30:25,385: reading test11 +2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,408: no haz col found, skipping dissolving for test11 +2022-03-12 17:30:25,419: done dissolving for test11 +2022-03-16 10:52:54,898: reading test123 +2022-03-16 10:52:54,992: start dissolving for test123 +2022-03-16 10:52:55,036: done dissolving for test123 +2022-03-16 10:52:55,036: reading test11 +2022-03-16 10:52:55,058: no haz col found, skipping dissolving for test11 +2022-03-16 10:52:55,058: reading PH126300000_LH_LH3 +2022-03-16 10:53:19,595: start dissolving for PH126300000_LH_LH3 +2022-03-16 14:13:55,363: done dissolving for PH126300000_LH_LH3 diff --git a/src/haz_validator.py b/src/haz_validator.py new file mode 100644 index 0000000..8ecfa05 --- /dev/null +++ b/src/haz_validator.py @@ -0,0 +1,82 @@ +import os +import geopandas as gpd +import pandas as pd +from datetime import datetime + +def shapefile_validator(): + """ + Validates the shapefiles according to its geometries, attributes, and projections. Returns boolean depending on the parameters set. + """ + + # Initialize empty dataframes + geom_check = [] + attribute_check = [] + prj_check = [] + diss_check = [] + + # Observed variants of hazard attributes + haz_cols = ['Var', 'VAR', 'SS', 'GRIDCODE', 'LH'] + + for shp in shp_files: + # Gets the geometry of the shapefiles + geometry = gpd.read_file(shp).geometry + + # Reads the shapefiles + data = gpd.read_file(shp) + + # Counts the number of rows of the shapefile + count_rows = len(data.index) + + # This is a parameter for checking the validity of the dissolved hazard maps. From the number of rows, this will return True if the row count is less than or equal to 3 + if count_rows <= 3: + diss_check.append(True) + else: + diss_check.append(False) + + # Checks if the shapefile contains a haz col given the different observed variants of haz columns + for haz in haz_cols: + if haz in data: + attribute_check.append(True) + break + else: + attribute_check.append(False) + + # Checks if the shapefile has the correct GCS projection. Returns True when projecection is epsg:4326. + prj = gpd.read_file(shp).crs + if prj == 'epsg:4326': + prj_check.append(True) + else: + prj_check.append(False) + + # Checks if the shapefile contains geometries. Returns True when it contains geometries. + geom_series = gpd.GeoSeries(geometry) + if geom_series.shape[0] > 0: + geom_check.append(True) + else: + geom_check.append(False) + + validator = pd.DataFrame(data=zip(hazard_name, geom_check, attribute_check, prj_check, diss_check),columns=['hazard name', 'contains_geometry', 'correct_attribute', 'GCS_prj', 'diss_check']) + validator['rows'] = len(data.index) + + # Creates a new column indicating overall validation assessment for each shapefile. When it returns False, it means that the shapefile is invalid and needs to be returned to the hazard team for inspection. + validator['final_check'] = validator.contains_geometry & validator.correct_attribute & validator.GCS_prj & validator.diss_check + + # Saves the dataframe into a csv, with current datetime indicator + validator.to_csv(f'{datetimenow}_results_validation.csv', index=None, encoding="utf-8") + +if __name__ == '__main__': + # Path to directories + path_to_dir = os.path.dirname(os.path.abspath('__file__')) + input_path = os.path.join(path_to_dir, "input") + input_files = os.listdir(input_path) + + # Gets only the .shp + shp_files = [ file for file in input_files if file.endswith(".shp") ] + + # Extracts the hazard name (from the filename) + hazard_name = [ shp.replace(".shp", "") for shp in shp_files ] + + # Gets the current date and time + datetimenow = datetime.now().strftime("%Y%m%d %H:%M:%S") + + shapefile_validator() \ No newline at end of file