Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added geocoding function #50

Merged
merged 46 commits into from
Nov 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
dc07743
added geocoding function
mtravis Oct 16, 2023
8094953
add more tests
felix-schott Oct 18, 2023
cbc7bf0
Merge pull request #52 from felix-schott/36_add-cli-tests
cholmes Oct 18, 2023
cd80183
added osmnx to requirements
mtravis Oct 19, 2023
e03dc60
made changes to downloads.py
mtravis Oct 20, 2023
e26df79
Some performance quick wins for the geopandas implementation
theroggy Nov 3, 2023
d08dc00
Update process.py
theroggy Nov 3, 2023
ce0bc83
Updated geocoded function and added test
mtravis Nov 4, 2023
9d488c6
Updated settings source
mtravis Nov 4, 2023
290558a
import geocode function to tests
mtravis Nov 4, 2023
9ffae1e
Fixed geocode test
mtravis Nov 4, 2023
d10fb90
added location arg to cli.py
mtravis Nov 5, 2023
02203c4
fixed import settings
mtravis Nov 6, 2023
04bd224
added exception for when location geojson isn't true geojson
mtravis Nov 6, 2023
f1703b2
Merge pull request #53 from theroggy/Some-performance-quick-wins-for-…
cholmes Nov 6, 2023
d2955aa
Update download_buildings.py
mtravis Nov 6, 2023
b5c97d2
Fixed wkt to geojson for geocode
mtravis Nov 6, 2023
f723da6
Update download_buildings.py
mtravis Nov 6, 2023
3e5a22c
added geocoding function
mtravis Oct 16, 2023
cd001aa
added osmnx to requirements
mtravis Oct 19, 2023
38a8a56
made changes to downloads.py
mtravis Oct 20, 2023
4781358
Updated geocoded function and added test
mtravis Nov 4, 2023
ebb811d
Updated settings source
mtravis Nov 4, 2023
4ffb84d
import geocode function to tests
mtravis Nov 4, 2023
db97396
Fixed geocode test
mtravis Nov 4, 2023
0811250
added location arg to cli.py
mtravis Nov 5, 2023
d94a6ec
fixed import settings
mtravis Nov 6, 2023
46025ea
added exception for when location geojson isn't true geojson
mtravis Nov 6, 2023
40b7030
Update download_buildings.py
mtravis Nov 6, 2023
751ad60
Fixed wkt to geojson for geocode
mtravis Nov 6, 2023
68d984b
Update download_buildings.py
mtravis Nov 6, 2023
db12b37
added geocoding function
mtravis Oct 16, 2023
50d8837
made changes to downloads.py
mtravis Oct 20, 2023
99b03f9
Updated geocoded function and added test
mtravis Nov 4, 2023
acb6e7c
Updated settings source
mtravis Nov 4, 2023
f05cb25
added location arg to cli.py
mtravis Nov 5, 2023
88bb7ba
fixed import settings
mtravis Nov 6, 2023
581fb43
added exception for when location geojson isn't true geojson
mtravis Nov 6, 2023
e44a5bf
Update download_buildings.py
mtravis Nov 6, 2023
64229ba
Update download_buildings.py
mtravis Nov 6, 2023
7dc815c
add cli test for --location, fix some minor things, change dst from p…
felix-schott Nov 6, 2023
5e7338c
merging
felix-schott Nov 6, 2023
f3c7174
remove cache dir
felix-schott Nov 6, 2023
fd87791
remove option where unused, tidy up a bit
felix-schott Nov 6, 2023
0daad7c
changed country_iso in cli geocode tests
mtravis Nov 7, 2023
e545565
update readme, simplify geojson_to_quadkey, change geocode() to retur…
felix-schott Nov 16, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ private/
*.py[cod]
*$py.class

cache
issues.txt

# C extensions
Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ Will print out a help message. You then will be able run the CLI (download [1.js


```bash
ob tools get_buildings 1.json my-buildings.geojson --country_iso RW
ob tools get_buildings 1.json --dst my-buildings.geojson --country_iso RW
```

You can also stream the json in directly in one line:

```
curl https://data.source.coop/cholmes/aois/1.json | ob get_buildings - my-buildings.geojson --country_iso RW
curl https://data.source.coop/cholmes/aois/1.json | ob get_buildings - --dst my-buildings.geojson --country_iso RW
```


Expand Down Expand Up @@ -96,13 +96,17 @@ Usage: ob get_buildings [OPTIONS] [GEOJSON_INPUT] [DST]
this tool we hope to eliminate the need to hint with the country_iso.
Options:
--dst TEXT The path to write the output to. Can be a
directory or file.
--location TEXT Use city or region name instead of providing an
AOI as file.
--source [google|overture] Dataset to query, defaults to Overture
--country_iso TEXT A 2 character country ISO code to filter the
data by.
-s, --silent Suppress all print outputs.
--overwrite Overwrite the destination file if it already
exists.
--verbose Print detailed logs with timestamps.
-v, --verbose Print detailed logs with timestamps.
--help Show this message and exit.
```

Expand Down
17 changes: 14 additions & 3 deletions open_buildings/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import click
import json
import pandas as pd
import osmnx
from shapely.geometry import shape, box, mapping
import matplotlib.pyplot as plt
from open_buildings.google.process import process_benchmark, process_geometries
from open_buildings.download_buildings import download as download_buildings
Expand Down Expand Up @@ -34,15 +36,22 @@ def overture():
def handle_comma_separated(ctx, param, value):
return value.split(',')

def geocode(data: str):
location = osmnx.geocode_to_gdf(data)
geom = location.geometry[0]
geojson = json.loads(json.dumps({"type": "Feature", "geometry": mapping(geom)})) # turn geom tuple into list by (de-)serialising
return geojson

@main.command(name="get_buildings")
@click.argument('geojson_input', type=click.File('r'), required=False)
@click.argument('dst', type=str, default="buildings.json")
@click.option('--dst', type=str, default="buildings.json", help='The path to write the output to. Can be a directory or file.')
@click.option('--location', type=str, default=None, help='Use city or region name instead of providing an AOI as file.')
@click.option('--source', default="overture", type=click.Choice(['google', 'overture']), help='Dataset to query, defaults to Overture')
@click.option('--country_iso', type=str, default=None, help='A 2 character country ISO code to filter the data by.')
@click.option('-s', '--silent', is_flag=True, default=False, help='Suppress all print outputs.')
@click.option('--overwrite', default=False, is_flag=True, help='Overwrite the destination file if it already exists.')
@click.option('--verbose', default=False, is_flag=True, help='Print detailed logs with timestamps.')
def get_buildings(geojson_input, dst, source, country_iso, silent, overwrite, verbose):
@click.option('-v', '--verbose', default=False, is_flag=True, help='Print detailed logs with timestamps.')
def get_buildings(geojson_input, dst, location, source, country_iso, silent, overwrite, verbose):
"""Tool to extract buildings in common geospatial formats from large archives of GeoParquet data online. GeoJSON
input can be provided as a file or piped in from stdin. If no GeoJSON input is provided, the tool will read from stdin.

Expand Down Expand Up @@ -71,6 +80,8 @@ def get_buildings(geojson_input, dst, source, country_iso, silent, overwrite, ve

if geojson_input:
geojson_data = json.load(geojson_input)
elif location:
geojson_data = geocode(location)
else:
mtravis marked this conversation as resolved.
Show resolved Hide resolved
geojson_data = json.load(click.get_text_stream('stdin'))
mtravis marked this conversation as resolved.
Show resolved Hide resolved

Expand Down
27 changes: 8 additions & 19 deletions open_buildings/download_buildings.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
import click
from math import tan, cos, log, pi
from shapely.geometry import shape
from shapely.geometry import shape, box, mapping
from typing import Dict, Any, Union
import mercantile
import duckdb
Expand All @@ -13,24 +13,15 @@
import pandas as pd
import geopandas as gpd
import subprocess
from shapely import wkb
import shapely
import geojson
import shutil

import osmnx
from open_buildings.settings import Source, Format, settings

def geojson_to_quadkey(data: dict) -> str:
if 'bbox' in data:
min_lon, min_lat, max_lon, max_lat = data['bbox']
else:
coords = data['geometry']['coordinates'][0]
min_lon = min_lat = float('inf')
max_lon = max_lat = float('-inf')

for lon, lat in coords:
min_lon = min(min_lon, lon)
min_lat = min(min_lat, lat)
max_lon = max(max_lon, lon)
max_lat = max(max_lat, lat)
geom = shape(data["geometry"])
min_lon, min_lat, max_lon, max_lat = geom.bounds

for zoom in range(12, -1, -1):
tiles = list(mercantile.tiles(min_lon, min_lat, max_lon, max_lat, zooms=zoom))
Expand Down Expand Up @@ -79,7 +70,6 @@ def quadkey(geojson_input):
geojson_data = json.load(geojson_input)
else:
geojson_data = json.load(click.get_text_stream('stdin'))

result = geojson_to_quadkey(geojson_data)
click.echo(result)

Expand Down Expand Up @@ -132,9 +122,8 @@ def quad2json(quadkey_input):
result = quadkey_to_geojson(quadkey_input)
click.echo(json.dumps(result, indent=2))


def download(
geojson_data: Dict[str, Any],
geojson_data: Dict[str, Any],
dst: Union[Path, str] = "buildings.json",
source: Union[Source, str] = Source.OVERTURE,
format: Optional[Union[Format, str]] = None,
Expand Down Expand Up @@ -332,4 +321,4 @@ def print_elapsed_time(start_time):
#cli.add_command(download)

if __name__ == '__main__':
cli()
cli()
17 changes: 9 additions & 8 deletions open_buildings/google/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,14 +210,13 @@ def process_with_pandas(
input_file_path, split_multipolygons, verbose, format, output_file_path
):
df = pd.read_csv(input_file_path)
df['geometry'] = df['geometry'].apply(wkt.loads)
gs = gpd.GeoSeries.from_wkt(df['geometry'])

# Drop the 'latitude' and 'longitude' columns
df = df.drop(['latitude', 'longitude'], axis=1)
# Drop the 'latitude', 'longitude' and 'geometry' columns
df = df.drop(['latitude', 'longitude', 'geometry'], axis=1)

# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')
gdf.set_crs("EPSG:4326", inplace=True)
gdf = gpd.GeoDataFrame(df, geometry=gs, crs="EPSG:4326")

# Create an empty GeoDataFrame for the output
output_gdf = gpd.GeoDataFrame(columns=list(gdf.columns), crs=gdf.crs)
Expand Down Expand Up @@ -295,13 +294,15 @@ def process_with_pandas(
)
# Write the output GeoDataFrame to a file
if format == 'fgb':
output_gdf.to_file(output_file_path, driver="FlatGeobuf")
output_gdf.to_file(output_file_path, driver="FlatGeobuf", engine="pyogrio")
elif format == 'parquet':
output_gdf.to_parquet(output_file_path, compression=PARQUET_COMPRESSION)
elif format == 'gpkg':
output_gdf.to_file(output_file_path, driver='GPKG')
output_gdf.to_file(
output_file_path, driver='GPKG', engine="pyogrio", spatial_index=False
)
elif format == 'shp':
output_gdf.to_file(output_file_path, driver='ESRI Shapefile')
output_gdf.to_file(output_file_path, driver='ESRI Shapefile', engine="pyogrio")


def process_with_ogr2ogr(
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ click
duckdb
pandas
geopandas
pyogrio
osmnx
shapely
openlocationcode
tabulate
Expand Down
115 changes: 113 additions & 2 deletions tests/test_open_buildings.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,12 @@
from pathlib import Path
import os
import json
from shapely.geometry import shape, box, mapping
import re
import subprocess

from open_buildings.download_buildings import download, geojson_to_wkt, geojson_to_quadkey, quadkey_to_geojson
from open_buildings.cli import geocode
from open_buildings.settings import Source, Format, settings

###########################################################################
Expand Down Expand Up @@ -58,11 +62,17 @@ def test_geojson_to_wkt(aoi: Dict[str, Any]):
def test_geojson_to_quadkey(aoi: Dict[str, Any]):
""" Tests geojson_to_quadkey() using a pre-established true value. """
assert geojson_to_quadkey(aoi) == '301001330310'

def test_quadkey_to_geojson():
""" Tests quadkey_to_geojson() using a pre-established true value. """
assert quadkey_to_geojson('031313131112') == {'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-0.17578125, 51.50874245880333], [-0.087890625, 51.50874245880333], [-0.087890625, 51.56341232867588], [-0.17578125, 51.56341232867588], [-0.17578125, 51.50874245880333]]]}}

def test_geocode():
""" Tests geocode() using a pre-established true value. Verifies the bbox of the returned geometry. """
geocoding_result = geocode('plymouth')
assert geocoding_result["type"] == "Feature"
assert shape(geocoding_result["geometry"]).bounds == (-4.2055324, 50.3327426, -4.0196056, 50.4441737)

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
@pytest.mark.parametrize("source", [s for s in Source])
Expand All @@ -89,6 +99,19 @@ def test_download_directory(aoi: Dict[str, Any], tmp_path: Path):
assert os.path.exists(tmp_path.joinpath("buildings.json"))
assert os.path.getsize(tmp_path.joinpath("buildings.json")) != 0

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_download_overwrite(aoi: Dict[str, Any], tmp_path: Path):
""" Tests that, if the "overwrite" option is set to True, an existing file does indeed get overwritten. """
output_path = tmp_path.joinpath("file_exists.json")
with open(output_path, "w") as f:
f.write("Foo bar")

download(aoi, dst=output_path, country_iso="SC", overwrite=True)
assert os.path.exists(output_path)
with open(output_path, "r") as f:
assert f.read() != "Foo bar" # verify that the file was updated

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
@pytest.mark.parametrize("format", [f for f in Format if f != Format.SHAPEFILE]) # fails for shapefile!
Expand Down Expand Up @@ -117,4 +140,92 @@ def test_download_format(format: Format, aoi: Dict[str, Any], tmp_path: Path):
def test_download_unknown_format(aoi: Dict[str, Any]):
""" Tests that an unknown format (.abc) raises an Exception. """
with pytest.raises(ValueError):
download(aoi, dst="buildings.abc")
download(aoi, dst="buildings.abc")

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_from_file_to_directory(aoi: Dict[str, Any], tmp_path: Path):
"""
Tests the CLI for get_buildings - provides the path to a GeoJSON file as input and a directory as output path.
Verifies that the output gets written to a default file name in the given directory.
"""
# write aoi dict to geojson file in temporary directory
input_path = tmp_path.joinpath("input.json")
with open(input_path, "w") as f:
json.dump(aoi, f)
subprocess.run(["ob", "get_buildings", str(input_path), "--dst", str(tmp_path), "--country_iso", "SC"], check=True)
output_path = tmp_path.joinpath("buildings.json") # default file name
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0


@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_from_stdin_to_directory(aoi: Dict[str, Any], tmp_path: Path):
"""
Tests the CLI for get_buildings - provides a GeoJSON string via stdin and a directory as output path.
Verifies that a log message with timestamp gets written to stdout.
"""
# we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments,
process = subprocess.run([ "ob", "get_buildings", "-", "--dst", str(tmp_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
dt_regex = re.compile(r"^\[[0-9]{4}(-[0-9]{2}){2} ([0-9]{2}:){2}[0-9]{2}\] ") # match timestamp format e.g. "[2023-10-18 19:08:24]"
assert dt_regex.search(process.stdout) # ensure that stdout contains at least one timestamped message
output_path = tmp_path.joinpath("buildings.json") # default file name
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_from_stdin_to_file_silent(aoi: Dict[str, Any], tmp_path: Path):
"""
Tests the CLI for get_buildings - provides a GeoJSON string via stdin and an exact filepath to write the output to.
Verifies that nothing gets written to stdout.
"""
output_path = tmp_path.joinpath("test123.json")
# we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments,
process = subprocess.run(["ob", "get_buildings", "-", "--dst", str(output_path), "--silent", "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
assert process.stdout == "" # assert that nothing gets printed to stdout
assert process.stderr == "" # assert that nothing gets printed to stdout
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0


@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_from_stdin_to_file_overwrite_false(aoi: Dict[str, Any], tmp_path: Path):
"""
Tests the CLI for get_buildings - provides a GeoJSON string via stdin and an exact filepath to write the output to.
Verifies that, if the output file already exists, nothing happens and the user is notified of this.
"""
output_path = tmp_path.joinpath("file_exists.json")
with open(output_path, "w") as f:
f.write("Foo bar")
# we can't use pipes (e.g. f"echo {json.dumps(aoi)} | ...") in subprocess.run, instead we pass the json as stdin using the input/text arguments,
process = subprocess.run(["ob", "get_buildings", "-", "--dst", str(output_path), "--country_iso", "SC"], input=json.dumps(aoi), text=True, check=True, capture_output=True)
assert os.path.exists(output_path)
with open(output_path, "r") as f:
assert f.read() == "Foo bar" # verify that the file still has the same content as before
assert "exists" in process.stdout # verify that the user has been warned about the existing file

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_geocode(tmp_path: Path):
"""
Tests the geocoding functionality, implemented as the argument "location".
"""
output_path = tmp_path.joinpath("geocode_test.json")
subprocess.run(["ob", "get_buildings", "--dst", str(output_path), "--location", "oxford uk", "--country_iso", "GB"], check=True)
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0

@pytest.mark.integration
@pytest.mark.flaky(reruns=NUM_RERUNS)
def test_cli_get_buildings_geocode_multipolygon(tmp_path: Path):
"""
Tests the geocoding functionality, implemented as the argument "location". Makes sure that a MultiPolygon geometry (the outline of Dubrovnik)
is simplified to a polygon (convex hull).
"""
output_path = tmp_path.joinpath("geocode_test.json")
subprocess.run(["ob", "get_buildings", "--dst", str(output_path), "--location", "dubrovnik", "--country_iso", "HR"], check=True)
assert os.path.exists(output_path)
assert os.path.getsize(output_path) != 0