diff --git a/mapswipe_workers/mapswipe_workers/project_types/street/project.py b/mapswipe_workers/mapswipe_workers/project_types/street/project.py index 1853755d..fba87f39 100644 --- a/mapswipe_workers/mapswipe_workers/project_types/street/project.py +++ b/mapswipe_workers/mapswipe_workers/project_types/street/project.py @@ -21,7 +21,7 @@ build_multipolygon_from_layer_geometries, check_if_layer_has_too_many_geometries, save_geojson_to_file, - multipolygon_to_wkt + multipolygon_to_wkt, ) from mapswipe_workers.project_types.project import BaseProject, BaseTask, BaseGroup from mapswipe_workers.utils.process_mapillary import get_image_metadata @@ -56,7 +56,6 @@ def __init__(self, project_draft): sampling_threshold=project_draft.get("samplingThreshold", None), ) - self.imageIds = ImageMetadata["ids"] self.imageGeometries = ImageMetadata["geometries"] @@ -83,7 +82,9 @@ def validate_geometries(self): self.inputGeometriesFileName = save_geojson_to_file( self.projectId, self.geometry ) - layer, datasource = load_geojson_to_ogr(self.projectId, self.inputGeometriesFileName) + layer, datasource = load_geojson_to_ogr( + self.projectId, self.inputGeometriesFileName + ) # check if inputs fit constraints check_if_layer_is_empty(self.projectId, layer) @@ -97,7 +98,9 @@ def validate_geometries(self): del datasource del layer - logger.info(f"{self.projectId}" f" - validate geometry - " f"input geometry is correct.") + logger.info( + f"{self.projectId}" f" - validate geometry - " f"input geometry is correct." + ) wkt_geometry = multipolygon_to_wkt(multi_polygon) return wkt_geometry diff --git a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py index 3558e9b3..8181006e 100644 --- a/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py +++ b/mapswipe_workers/mapswipe_workers/utils/process_mapillary.py @@ -127,15 +127,26 @@ def coordinate_download( return pd.DataFrame(downloaded_metadata) target_columns = [ - "id", "geometry", "captured_at", "is_pano", "compass_angle", "sequence", "organization_id" + "id", + "geometry", + "captured_at", + "is_pano", + "compass_angle", + "sequence", + "organization_id", ] for col in target_columns: if col not in downloaded_metadata.columns: downloaded_metadata[col] = None - if downloaded_metadata.isna().all().all() == False or downloaded_metadata.empty == True: + if ( + downloaded_metadata.isna().all().all() == False + or downloaded_metadata.empty == True + ): downloaded_metadata = downloaded_metadata[ - downloaded_metadata['geometry'].apply(lambda point: point.within(polygon)) + downloaded_metadata["geometry"].apply( + lambda point: point.within(polygon) + ) ] return downloaded_metadata @@ -187,9 +198,7 @@ def filter_results( df = results_df.copy() if is_pano is not None: if df["is_pano"].isna().all(): - logger.exception( - "No Mapillary Feature in the AoI has a 'is_pano' value." - ) + logger.exception("No Mapillary Feature in the AoI has a 'is_pano' value.") return None df = df[df["is_pano"] == is_pano] @@ -220,14 +229,12 @@ def get_image_metadata( organization_id: str = None, start_time: str = None, end_time: str = None, - sampling_threshold = None, + sampling_threshold=None, ): aoi_polygon = geojson_to_polygon(aoi_geojson) - downloaded_metadata = coordinate_download( - aoi_polygon, level, attempt_limit - ) + downloaded_metadata = coordinate_download(aoi_polygon, level, attempt_limit) downloaded_metadata = downloaded_metadata[ - downloaded_metadata['geometry'].apply(lambda geom: isinstance(geom, Point)) + downloaded_metadata["geometry"].apply(lambda geom: isinstance(geom, Point)) ] downloaded_metadata = filter_results( @@ -235,10 +242,15 @@ def get_image_metadata( ) if sampling_threshold is not None: downloaded_metadata = spatial_sampling(downloaded_metadata, sampling_threshold) - if downloaded_metadata.isna().all().all() == False or downloaded_metadata.empty == False: + if ( + downloaded_metadata.isna().all().all() == False + or downloaded_metadata.empty == False + ): if len(downloaded_metadata) > 100000: - err = (f"Too many Images with selected filter " - f"options for the AoI: {len(downloaded_metadata)}") + err = ( + f"Too many Images with selected filter " + f"options for the AoI: {len(downloaded_metadata)}" + ) raise ValueError(err) else: return { diff --git a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py index 082346b9..1d9c53dc 100644 --- a/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py +++ b/mapswipe_workers/mapswipe_workers/utils/spatial_sampling.py @@ -3,6 +3,7 @@ from shapely import wkt from shapely.geometry import Point + def distance_on_sphere(p1, p2): """ p1 and p2 are two lists that have two elements. They are numpy arrays of the long and lat @@ -30,13 +31,19 @@ def distance_on_sphere(p1, p2): delta_lat = p2[1] - p1[1] delta_long = p2[0] - p1[0] - a = np.sin(delta_lat / 2) ** 2 + np.cos(p1[1]) * np.cos(p2[1]) * np.sin(delta_long / 2) ** 2 + a = ( + np.sin(delta_lat / 2) ** 2 + + np.cos(p1[1]) * np.cos(p2[1]) * np.sin(delta_long / 2) ** 2 + ) c = 2 * np.arcsin(np.sqrt(a)) distances = earth_radius * c return distances + """-----------------------------------Filtering Points------------------------------------------------""" + + def filter_points(df, threshold_distance): """ Filter points from a DataFrame based on a threshold distance. @@ -61,12 +68,10 @@ def filter_points(df, threshold_distance): lat = df["lat"].to_numpy() long = df["long"].to_numpy() - - distances = distance_on_sphere([long[1:],lat[1:]], - [long[:-1],lat[:-1]]) + distances = distance_on_sphere([long[1:], lat[1:]], [long[:-1], lat[:-1]]) road_length = np.sum(distances) - #save the last point if the road segment is relavitely small (< 2*road_length) + # save the last point if the road segment is relavitely small (< 2*road_length) if threshold_distance <= road_length < 2 * threshold_distance: mask[-1] = True @@ -74,18 +79,26 @@ def filter_points(df, threshold_distance): for i, distance in enumerate(distances): accumulated_distance += distance if accumulated_distance >= threshold_distance: - mask[i+1] = True + mask[i + 1] = True accumulated_distance = 0 # Reset accumulated distance to_be_returned_df = df[mask] # since the last point has to be omitted in the vectorized distance calculation, it is being checked manually p2 = to_be_returned_df.iloc[0] - distance = distance_on_sphere([float(p2["long"]),float(p2["lat"])],[long[-1],lat[-1]]) - - #last point will be added if it suffices the length condition - #last point will be added in case there is only one point returned - if distance >= threshold_distance or len(to_be_returned_df) ==1: - to_be_returned_df = pd.concat([to_be_returned_df,pd.DataFrame(df.iloc[-1],columns=to_be_returned_df.columns)],axis=0) + distance = distance_on_sphere( + [float(p2["long"]), float(p2["lat"])], [long[-1], lat[-1]] + ) + + # last point will be added if it suffices the length condition + # last point will be added in case there is only one point returned + if distance >= threshold_distance or len(to_be_returned_df) == 1: + to_be_returned_df = pd.concat( + [ + to_be_returned_df, + pd.DataFrame(df.iloc[-1], columns=to_be_returned_df.columns), + ], + axis=0, + ) return to_be_returned_df @@ -109,19 +122,23 @@ def spatial_sampling(df, interval_length): if len(df) == 1: return df - df['long'] = df['geometry'].apply(lambda geom: geom.x if geom.geom_type == 'Point' else None) - df['lat'] = df['geometry'].apply(lambda geom: geom.y if geom.geom_type == 'Point' else None) - sorted_df = df.sort_values(by=['captured_at']) + df["long"] = df["geometry"].apply( + lambda geom: geom.x if geom.geom_type == "Point" else None + ) + df["lat"] = df["geometry"].apply( + lambda geom: geom.y if geom.geom_type == "Point" else None + ) + sorted_df = df.sort_values(by=["captured_at"]) sampled_sequence_df = pd.DataFrame() # loop through each sequence - for sequence in sorted_df['sequence_id'].unique(): - sequence_df = sorted_df[sorted_df['sequence_id'] == sequence] - - filtered_sorted_sub_df = filter_points(sequence_df,interval_length) - sampled_sequence_df = pd.concat([sampled_sequence_df,filtered_sorted_sub_df],axis=0) - + for sequence in sorted_df["sequence_id"].unique(): + sequence_df = sorted_df[sorted_df["sequence_id"] == sequence] + filtered_sorted_sub_df = filter_points(sequence_df, interval_length) + sampled_sequence_df = pd.concat( + [sampled_sequence_df, filtered_sorted_sub_df], axis=0 + ) return sampled_sequence_df