This repository has been archived by the owner on Sep 6, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_study_region_setup.py
349 lines (315 loc) · 15 KB
/
01_study_region_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# Script: area_linkage_tables.py
# Purpose: Create ABS and non-ABS linkage tables using 2016 data sourced from ABS
#
# Parcel address points are already associated with Meshblock in the sample_point_feature table
# Further linkage with the abs_linkage table (actually, a reduced version of the existing mb_dwellings)
# facilitates aggregation to ABS area units such as SA1, SA2, SA3, SA4.
#
# The non-ABS linkage table associated points with the suburb and LGA in which they are located, according
# to ABS sourced spatial features.
#
# Regarding linkage of addresses with non-ABS structures, while the ABS provides some
# correspondence tables between areas, (e.g. SA2 2016 to LGA 2016) this will not be as accurate
# for our purposes as taking an address point location and evaluating the polygon it intersects.
# There are pitfalls in this approach (e.g. if a point lies exactly on a boundary), however
# this is par for the course when generalising unique units into aggregate categories
# (ie. points to averages, sums or variances within contiguous areas).
#
# Author: Carl Higgs
# Date: 20180710
# Import arcpy module
import subprocess as sp # for executing external commands (e.g. pgsql2shp or ogr2ogr)
import numpy
import time
import psycopg2
from progressor import progressor
from sqlalchemy import create_engine
# importing arcpy as legacy dependency (using resource in gdb for network analysis)
import arcpy
from script_running_log import script_running_log
# Import custom variables for National Liveability indicator process
from _project_setup import *
# simple timer for log file
start = time.time()
script = os.path.basename(sys.argv[0])
task = '\nCreate area linkage tables using 2016 data sourced from ABS'
engine = create_engine("postgresql://{user}:{pwd}@{host}/{db}".format(user = db_user,
pwd = db_pwd,
host = db_host,
db = db),
use_native_hstore=False)
print("Commencing task: {} at {}".format(task,time.strftime("%Y%m%d-%H%M%S")))
# connect to the PostgreSQL server
conn = psycopg2.connect(dbname=db, user=db_user, password=db_pwd)
curs = conn.cursor()
print("\nImport region data... "),
print("Import pre-processed data for the Highlife study... "),
# first, drop tables if they exist, as otherwise this won't work
# organise imported tables in appropriate schemas
sql = '''
DROP TABLE IF EXISTS network.clean_intersections_12m ;
DROP TABLE IF EXISTS network.edges ;
DROP TABLE IF EXISTS boundaries.footprints ;
DROP TABLE IF EXISTS network.nodes ;
DROP TABLE IF EXISTS osm.osm_20190902_line ;
DROP TABLE IF EXISTS osm.osm_20190902_point ;
DROP TABLE IF EXISTS osm.osm_20190902_polygon ;
DROP TABLE IF EXISTS osm.osm_20190902_roads ;
DROP TABLE IF EXISTS boundaries.study_region_hex_3200m_diag;
DROP TABLE IF EXISTS boundaries.study_region_hex_3200m_diag_3200m_buffer;
'''
engine.execute(sql)
tables = [['clean_intersections_12m'],
['edges'],
['nodes'],
['osm_20190902_line'],
['osm_20190902_point'],
['osm_20190902_polygon'],
['osm_20190902_roads'],
['{}_au_2019'.format(locale), 'study_region'],
['{}_au_2019_10000m'.format(locale), 'study_region_10000m'],
['{}_au_2019_hex_3200m_diag'.format(locale), 'study_region_hex_3200m_diag'],
['{}_au_2019_hex_3200m_diag_3200m_buffer'.format(locale),'study_region_hex_3200m_diag_3200m_buffer'],
['{}_AccessPts_Edited'.format(locale).lower()],
['{}_Footprints_v3'.format(locale).lower(),'footprints']
]
command = (
' ogr2ogr -overwrite -progress -f "PostgreSQL" '
' -t_srs "EPSG:{to_epsg}" '
' PG:"host={host} port=5432 dbname={db}'
' user={user} password = {pwd}" '
' "{gpkg}" '
' -lco geometry_name="geom"'.format(host = db_host,
db = db,
user = db_user,
pwd = db_pwd,
to_epsg = srid,
gpkg = preprocessed_data)
)
print(command)
sp.call(command, shell=True,cwd=os.path.dirname(preprocessed_data))
for t in tables:
if len(t) == 2:
sql = '''ALTER TABLE IF EXISTS {} RENAME TO {}'''.format(t[0],t[1])
engine.execute(sql)
sql = '''ALTER INDEX IF EXISTS {}_fid_seq RENAME TO {}_fid_seq'''.format(t[0],t[1])
engine.execute(sql)
# organise imported tables in appropriate schemas
sql = '''
ALTER TABLE IF EXISTS clean_intersections_12m SET SCHEMA network ;
ALTER TABLE IF EXISTS edges SET SCHEMA network ;
ALTER TABLE IF EXISTS footprints SET SCHEMA boundaries ;
ALTER TABLE IF EXISTS nodes SET SCHEMA network ;
ALTER TABLE IF EXISTS osm_20190902_line SET SCHEMA osm ;
ALTER TABLE IF EXISTS osm_20190902_point SET SCHEMA osm ;
ALTER TABLE IF EXISTS osm_20190902_polygon SET SCHEMA osm ;
ALTER TABLE IF EXISTS osm_20190902_roads SET SCHEMA osm ;
ALTER TABLE IF EXISTS study_region_hex_3200m_diag SET SCHEMA boundaries ;
ALTER TABLE IF EXISTS study_region_hex_3200m_diag_3200m_buffer SET SCHEMA boundaries ;
'''
engine.execute(sql)
# process boundaries to be used throughout project
for geo in geo_imports.index.values:
data = geo_imports.loc[geo,'data']
if os.path.splitext(data)[1]=='.gpkg':
epsg = int(geo_imports.loc[geo,'epsg'])
sql = '''SELECT ST_Extent(ST_Transform(geom,{epsg}))
FROM {buffered_study_region};
'''.format(epsg = epsg,
buffered_study_region=buffered_study_region)
# transform data if not in project spatial reference
if epsg!=srid:
transform = ' -s_srs "EPSG:{epsg}" -t_srs "EPSG:{srid}" '.format(epsg=epsg,srid=srid)
else:
transform = ''
# get bounding box of buffered study region for clipping external data using ogr2ogr on import
urban_region = engine.execute(sql).fetchone()
urban_region = [float(x) for x in urban_region[0][4:-1].replace(',',' ').split(' ')]
bbox = '{} {} {} {}'.format(*urban_region)
command = (
' ogr2ogr -overwrite -progress -f "PostgreSQL" '
' PG:"host={db_host} port=5432 dbname={db} active_schema={boundary_schema}'
' user={db_user} password = {db_pwd}" '
' "{data}" -clipsrc {bbox}'
' -lco geometry_name="geom"'
' {transform}'
).format(db=db,
boundary_schema=boundary_schema,
db_host=db_host,
db_user=db_user,
db_pwd=db_pwd,
data=data,
bbox=bbox,
transform=transform)
print(command)
sp.call(command, shell=True,cwd=os.path.dirname(os.path.join(folderPath)))
print("Done.")
print("Initiate area linkage table based on smallest region in region list (first entry: {})... )".format(geographies[0])),
print('''(note that a warning "Did not recognize type 'geometry' of column 'geom'" may appear; this is fine.)''')
sql = 'SELECT * FROM {}.{} WHERE geom IS NOT NULL'.format(boundary_schema,df_regions.loc[geographies[0],'table'])
area_linkage = pandas.read_sql(sql,con=engine,index_col=df_regions.loc[geographies[0],'id']).reset_index()
# drop the geom column
area_linkage = area_linkage.loc[:,[x for x in area_linkage.columns if x not in ['geom']]]
print("\nMerging csv source data with area linkage table:")
for csv in csv_linkage:
print('\n{}'.format(csv))
data_list = df_regions.loc[csv,'data'].split(',')
print(' - {}'.format(data_list))
linkage_id = df_regions.loc[csv,'linkage_id']
retain = (','.join(df_regions.loc[csv][['linkage_id','linkage_retain']].dropna().values.tolist())).split(',')
print(' - {}'.format(retain))
if len(data_list) > 1:
dfs = [pandas.read_csv(f,
compression='infer',
header=0,
sep=',',
quotechar='"')
for f in data_list]
df = pandas.concat(dfs).sort_index()
else:
df = pandas.read_csv(data_list[0],
compression='infer',
header=0,
sep=',',
quotechar='"')
df.columns = map(str.lower, df.columns)
df = df.loc[:,retain].reset_index()
df[linkage_id] = df[linkage_id].astype(str)
area_linkage = pandas.merge(area_linkage,df, on=linkage_id, how = 'left',left_index=True)
# remove redundant merged indices
area_linkage = area_linkage.loc[:,[x for x in area_linkage.columns if x not in ['index_x','index_y','index']]]
# set geographies[0] code as index
area_linkage = area_linkage.set_index(df_regions.loc[geographies[0],'id'])
area_linkage.to_sql('area_linkage',con = engine, if_exists='replace')
print("Done.")
print("\nRecreate area linkage table geometries and GIST index... "),
# work around since geopandas isn't necessarily installed on our computers
sql = '''
---- commented out dwelling exclusion; this is arguably excessive for the linkage table
-- DELETE FROM area_linkage WHERE dwelling = 0;
ALTER TABLE area_linkage ADD COLUMN IF NOT EXISTS urban text;
ALTER TABLE area_linkage ADD COLUMN IF NOT EXISTS study_region boolean;
ALTER TABLE area_linkage ADD COLUMN IF NOT EXISTS area_ha double precision;
ALTER TABLE area_linkage ADD COLUMN IF NOT EXISTS geom geometry;
'''
curs.execute(sql)
conn.commit()
sql = '''
UPDATE area_linkage a
SET urban = CASE
WHEN sos_name_2016 IN ('Major Urban','Other Urban')
THEN 'urban'
ELSE 'not urban'
END,
area_ha = ST_Area(g.geom)/10000.0,
geom = g.geom
FROM {boundary_schema}.{table} g
WHERE a.{id} = g.{id};
CREATE INDEX gix_area_linkage ON area_linkage USING GIST (geom);
'''.format(boundary_schema = boundary_schema,
table = df_regions.loc[geographies[0],'table'],
id = df_regions.loc[geographies[0],'id'])
curs.execute(sql)
conn.commit()
sql = '''
UPDATE area_linkage a
SET study_region = ST_CoveredBy(a.geom,s.geom)
FROM study_region s;
'''
curs.execute(sql)
conn.commit()
sql ='''
DROP TABLE IF EXISTS mb_dwellings;
CREATE TABLE mb_dwellings AS
SELECT *
FROM area_linkage
WHERE dwelling > 0;
CREATE UNIQUE INDEX ix_mb_dwellings ON mb_dwellings (mb_code_2016);
CREATE INDEX gix_mb_dwellings ON mb_dwellings USING GIST (geom);
'''.format(table = df_regions.loc[geographies[0],'table'],
id = df_regions.loc[geographies[0],'id'])
curs.execute(sql)
conn.commit()
print("Done.")
print("Granting privileges to python and arcgis users... "),
curs.execute(grant_query)
conn.commit()
print("Done.")
## ArcGIS code (currently necessary due to legacy usage in lead up to network analysis)
# ArcGIS environment settings
arcpy.env.workspace = locale_dir
# create project specific folder in temp dir for scratch.gdb, if not exists
if not os.path.exists(os.path.join(temp,db)):
os.makedirs(os.path.join(temp,db))
arcpy.env.scratchWorkspace = os.path.join(temp,db)
arcpy.env.overwriteOutput = True
# define spatial reference
SpatialRef = arcpy.SpatialReference(SpatialRef)
# OUTPUT PROCESS
# Create output gdb if not already existing
if os.path.exists(gdb_path):
print("Using extant file geodatabase: {}".format(gdb_path))
else:
arcpy.CreateFileGDB_management(locale_dir,gdb)
print("File geodatabase created: {}".format(gdb_path))
# copy study region, buffered study region and mb_dwellings to arcgis
arcpy.env.workspace = db_sde_path
arcpy.env.overwriteOutput = True
# features = ['{}'.format(study_region),
# '{}'.format(buffered_study_region),
# sample_point_feature,
# 'mb_dwellings']
# for feature in features:
# print(feature)
# try:
# if arcpy.Exists('public.{}'.format(feature)):
# arcpy.CopyFeatures_management('public.{}'.format(feature), os.path.join(gdb_path,feature))
# else:
# print("It seems that the feature doesn't exist...")
# except:
# print("... that didn't work ...")
## NOTE:
# The scripts copying of resources from Postgis using the arcpy spatial database engine (SDE) connection
# to a file geodatabase (GDB) for network analysis did not work, and nor did manually attempting to copy
# from the SDE to the GDB.
# In lieu of this, features were first exported to a 'processing' geopackage,
# and then manually copied to each regions' GDB using ArcCatalog.
print("Exporting to GPKG as intermediary step in case of automated copy failure; can then manually copy to the study region gdb")
# note assumptions which must be met that
# 1. ogr2ogr will locate tables in different schemas
# 2. tables in different schemas have unique names within overall database
features = [['public','area_linkage'],
['public',study_region],
['public',buffered_study_region],
['public',sample_point_feature],
[boundary_schema,'footprints'],
['public','mb_dwellings']]
processing_gpkg = os.path.join(folderPath,'study_region',locale,'{}_processing.gpkg'.format(locale))
for t in features:
schema = t[0]
table = t[1]
command = (
' ogr2ogr -overwrite -f GPKG '
' {gpkg} '
' PG:"host={host} port=5432 dbname={db} user={user} password = {pwd} active_schema={schema}" '
' {table} '.format(gpkg = processing_gpkg,
schema = schema,
host = db_host,
db = db,
user = db_user,
pwd = db_pwd,
table = table
)
)
sp.call(command, shell=True)
print("Done.")
for f in features:
feature = f[1]
print(feature)
try:
arcpy.CopyFeatures_management('{}/{}'.format(processing_gpkg,feature), os.path.join(gdb_path,feature))
except:
print(" - failed")
# output to completion log
script_running_log(script, task, start, locale)
engine.dispose()