-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp_funcs.R
462 lines (355 loc) · 18.1 KB
/
app_funcs.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
# Sampo Vesanen's Master's thesis
# Helsinki Region Travel Time comparison application functions
# "Parking of private cars and spatial accessibility in Helsinki Capital Region"
# by Sampo Vesanen
# 15.10.2020
# infix operator for sequential validation of input$ykrid
`%then%` <- shiny:::`%OR%`
# These custom infix operators work in the manner of += and ++ in C++/C# and
# Java. Save some space and increase readability.
`%+=%` = function(e1, e2) eval.parent(substitute(e1 <- e1 + e2))
`%-=%` = function(e1, e2) eval.parent(substitute(e1 <- e1 - e2))
ReadAndClean <- function(fp) {
# Use this function to read a html file into R and clean it of tabulator
# spaces and HTML comments.
result <-
paste(readLines(fp, encoding = "UTF-8"), collapse = "") %>%
gsub("[\t]", "", .) %>%
gsub(" <!--(.*?)-->", "", .)
return(result)
}
CreateEqualColumn <- function(inputDf, datacol, newcolname, classes_n = 11,
lockedclasses) {
# This function is a variant of CreateJenksColumn() in thesis_data_vis.R.
#
# Use this function to create a column in fortified dataframe that can be
# used to portray equal intervals colouring in a ggplot map. Dplyr note: to
# enable parameters as column names in dplyr, apply !! and := for the left
# side and for the right side !!rlang::sym().
#
# Provide possibility to select how classes breaks are created: create class
# breaks from a column in parameter dataframe "inputDf", or from an external
# vector of type double.
#
# Adapted from:
# https://medium.com/@traffordDataLab/lets-make-a-map-in-r-7bd1d9366098
if(missing(lockedclasses)) {
# Normal function behaviour
# Suppress n classes warnings, problem probably handled
classes <- suppressWarnings(
classInt::classIntervals(inputDf[, datacol], n = classes_n, style = "equal"))
} else {
# This condition is met when the parameter "lockedclasses" is detected. This
# condition works with the values outputted by locked_class_breaks_params()
# and locked_class_breaks_all().
classes <- suppressWarnings(
classInt::classIntervals(lockedclasses, n = classes_n, style = "equal"))
}
# Make note that breaks are rounded to two decimal places. In some cases the
# rounding would create non-unique breaks and this breaks the map view for the
# active symbology. If two decimals is not enough, use three.
result <-
inputDf %>%
dplyr::mutate(!!newcolname := cut(!!rlang::sym(datacol),
unique(round(classes$brks, 5)),
include.lowest = TRUE))
return(result)
}
AddLevelCounts <- function(thisDf, datacol, newcolname, classes_n,
labels_to_input, lockedclasses) {
# Use this function to calculate how many times a legend key color appears
# in the comparison app and add those values to the ggplot legend labels.
# This function is non-optimal as half of this functionality is carried out
# in CreateEqualColumn(), but for the sake of clarity keep them apart. For
# example, interval calculation with classInt could be moved to its own
# function, as it is identical with the one in CreateEqualColumn()
#
# We use an if statement to detect the state of locked classes breaks.
if(missing(lockedclasses)) {
# Normal function behaviour
intervals <- suppressWarnings(
classInt::classIntervals(thisDf[, datacol], n = classes_n, style = "equal"))
} else {
# This condition is met when the parameter "lockedclasses" is detected. This
# condition works with the values outputted by locked_class_breaks_params()
# and locked_class_breaks_all().
intervals <- suppressWarnings(
classInt::classIntervals(lockedclasses, n = classes_n, style = "equal"))
}
# Breaks are rounded to two decimal places
interv_codes <- cut(thisDf[, datacol],
round(unique(intervals$brks), 5),
include.lowest = TRUE)
input_levels <- levels(thisDf[, newcolname])
# Create level occurrence count in this clunky way.
# In left_join NAs can be preserved if the new dataframe has NA present
# like this: data.frame(brks = c(input_levels, NA))
levels_n <-
dplyr::select(thisDf, zipcode) %>%
dplyr::mutate(brks = interv_codes) %>%
dplyr::group_by(brks) %>%
dplyr::summarise(n = dplyr::n_distinct(zipcode)) %>%
dplyr::left_join(data.frame(brks = input_levels), .)
levels_n$n[is.na(levels_n$n)] <- 0
result <- paste0(labels_to_input, " [", levels_n$n, "]")
return(result)
}
GetLegendName <- function(val, originzip) {
# Appropriately name the plot legend. Use parts of the input string to
# figure out what to print. Use strwrap() to automatically add newlines
# to long strings. Use originzip to add information about origin ykr_id.
wherefrom <- paste0("origin ", originzip[, "zipcode"][1])
if (grepl("ttm18_", val)) {
datasource <- "TTM18 data"
} else if (grepl("msc_", val)) {
datasource <- "Thesis data"
} else if (grepl("compare_", val)) {
datasource <- "Comparison of the data sources (thesis data / TTM18 data)"
}
# Automatically add newlines to the long datasource string
datasource <-
strwrap(datasource, 22, prefix = "\n") %>%
paste(., collapse = "")
# thisUnit is minutes, except when datasource is "compare" or
# description "_pct"
if (grepl("compare_", val) || grepl("_pct", val)) {
thisUnit <- "(unit percent, where 1.00 = 100 %)"
} else {
thisUnit <- "(unit minutes)"
}
if (grepl("_avg", val)) {
description <-
"The mean total travel time to the destination postal code area"
} else if (grepl("_drivetime", val)) {
description <-
"The duration of the driving segment in the total travel chain, to the destination postal code area"
} else if (grepl("_pct", val)) {
description <-
"The percentage of SFP and WTD durations in the mean total travel time to the destination postal code area"
} else if (grepl("_sfp", val)) {
description <-
"The mean time consumed in searching for parking in the destination postal code area"
} else if (grepl("_wtd", val)) {
description <-
"The mean duration to walk from one's parked car to the final destination of the travel chain in the destination postal code area"
}
description <-
strwrap(description, 22, prefix = "\n") %>%
paste(., collapse = "")
if (grepl("_m_", val)) {
timeofday <- paste("during midday traffic", thisUnit)
} else if (grepl("_r_", val)) {
timeofday <- paste("during rush hour traffic", thisUnit)
} else if (grepl("_all_", val)) {
timeofday <- paste(
"using an average of all temporal choices",
thisUnit)
}
timeofday <-
strwrap(timeofday, 22, prefix = "\n") %>%
paste(., collapse = "")
result <- paste(datasource, ",\n",
wherefrom, ":\n",
description, ",\n",
timeofday, sep = "")
return(result)
}
GetSymbologyHelp <- function(val) {
# Get helpful text for the meaning of the cryptically named columns.
# Functionally close to GetLegendName().
if (grepl("ttm18_", val)) {
datasource <- "<i class='icon database'></i>TTM18 data"
} else if (grepl("msc_", val)) {
datasource <- "<i class='icon poll'></i>Thesis data"
} else if (grepl("compare_", val)) {
datasource <- "<i class='icon exchange-alt'></i>Comparison of the data sources (thesis data / TTM18 data)"
}
# thisUnit is minutes, except when datasource is "compare" or
# description "_pct"
if (grepl("compare_", val) || grepl("_pct", val)) {
thisUnit <- "(unit percent, where 1 = 100 %)"
} else {
thisUnit <- "(unit minutes)"
}
# Describe the data. The p tag which defines a vertical bar in the final
# product needs to be defined here. The tag opening is pasted together with
# the tag closure other outputs below.
if (grepl("_avg", val)) {
description <-
"The mean total travel time to the destination postal code area"
ptag <- "<p class='line lred'>"
} else if (grepl("_drivetime", val)) {
description <-
"The duration of the driving segment in the total travel chain to the destination postal code area"
ptag <- "<p class='line lcyan'>"
} else if (grepl("_pct", val)) {
description <-
"The percentage of <i>searching for parking</i> and <i>walking to the destination</i> durations in the mean total travel time"
ptag <- "<p class='line lcyan'>"
} else if (grepl("_sfp", val)) {
description <-
"The mean time consumed in <i>searching for parking</i> in the destination postal code area"
ptag <- "<p class='line lyellow'>"
} else if (grepl("_wtd", val)) {
description <-
"The mean duration to <i>walk from one's parked car to the final destination</i> of the travel chain in the destination postal code area"
ptag <- "<p class='line lyellow'>"
}
# Time of day
if (grepl("_m_", val)) {
timeofday <- paste("during midday traffic", thisUnit)
} else if (grepl("_r_", val)) {
timeofday <- paste("during rush hour traffic", thisUnit)
} else if (grepl("_all_", val)) {
timeofday <- paste(
"using an average of all temporal choices",
thisUnit)
}
result <- paste(ptag, datasource, ":<br>", description, ",<br>", timeofday,
"</p>", sep = "")
return(result)
}
GetCentroids <- function(fortified, unique_id, nominator) {
# Annotate desired feature in ggplot. Adapted from:
# https://stackoverflow.com/a/28963405/9455395
# Insert a fortified Spatial object and the column name you want to use as
# the label. With parameters "unique_id" and "nominator" a few functionalities
# can be attained:
# Unique_id tells what column to use as the unique identifier. This can be
# for example "kunta": four rows with coordinates and labels are created.
# If used "zipcode", 167 rows are created with coordinates and labels.
# "nominator" allocates the labels. "nominator" must contain the same amount
# of unique values, or more, than "unique_id", for example combination
# unique_id = "kunta" and nominator = "zipcode" will create broken results.
# unique_id will be stored as rowname for possible later use when row
# identification is needed.
# Examples:
# unique_id = "kunta" and nominator = "kunta":
# --- 4 rows, centroids in the middle of municipalities, labels by "kunta"
# unique_id = "zipcode" and nominator = "parktime_median":
# --- 167 rows, centroids in the middle of zipcodes, labels by "parktime_median"
# Change R options, otherwise as.numeric() loses some important digits
options(digits = 15)
result <-
do.call("rbind.data.frame",
by(fortified,
fortified[, unique_id],
function(x) {c(sp::Polygon(x[c("long", "lat")])@labpt,
x %>%
dplyr::group_by(!!rlang::sym(nominator)) %>%
dplyr::summarise() %>%
as.vector())
})) %>%
setNames(., c("long", "lat", "label"))
# Change long and lat to numeric vectors, if they already aren't
if(is.factor(result$long) == TRUE) {
result$long <- as.numeric(levels(result$long))[result$long]
}
if (is.factor(result$lat) == TRUE) {
result$lat <- as.numeric(levels(result$lat))[result$lat]
}
return(result)
}
str_start <- function (string, n) {
# Get an amount of n characters from the beginning of a string.
substr(string, 1, n)
}
str_end <- function (string, n) {
# Get an amount of n characters from the end of a string.
substr(string, nchar(string) - (n - 1), nchar(string))
}
columnFinder <- function(columnname, compare_vector) {
# Insert a column name that exists in an output of the reactive object of
# thisTTM() and receive all three columns of the same type. To be used with
# locked class breaks option "params". In this script, "compare_vector" is
# names(vis_cols).
bool <-
grepl(str_start(columnname, 3), compare_vector) &
grepl(str_end(columnname, 3), compare_vector)
result <- compare_vector[bool]
return(result)
}
Reactive_TTM_fetch <- function(current_fst, thesisdata, postal_f) {
# Use this function to calculate all necessary columns for the comparison
# application to work. "current_fst" represents one fst file on disk,
# "thesisdata" is the thesis survey data, and postal_f is a fortified version
# of postal code area polygon data (this dataframe sets the output function
# nrow of 14316)
#
# In the comparison app this function is used
# 1) to get all possible column values to calculate minimums and maximums for
# all columns (locked class breaks)
# 2) Any time user requests a new origin postal code area, this function is
# run
result <-
fst::read_fst(current_fst, as.data.table = TRUE) %>%
dplyr::left_join(., thesisdata, by = "zipcode") %>%
dplyr::left_join(postal_f, ., by = "zipcode") %>%
# Generate drivetime (min) and pct (%) columns for TTM18 data.
# NB! Use a mean of r, m, and sl for all the columns "sl".
dplyr::mutate(ttm_all_avg = rowMeans(select(., c(ttm_r_avg, ttm_m_avg, ttm_sl_avg))),
ttm_r_drivetime = ttm_r_avg - ttm_sfp - ttm_wtd,
ttm_m_drivetime = ttm_m_avg - ttm_sfp - ttm_wtd,
ttm_all_drivetime = ttm_all_avg - ttm_sfp - ttm_wtd,
ttm_r_pct = (ttm_sfp + ttm_wtd) / ttm_r_avg,
ttm_m_pct = (ttm_sfp + ttm_wtd) / ttm_m_avg,
ttm_all_pct = (ttm_sfp + ttm_wtd) / ttm_all_avg) %>%
dplyr::mutate_at(vars(ttm_all_avg, ttm_all_drivetime, ttm_r_pct, ttm_m_pct, ttm_all_pct),
~round(., 2)) %>%
# If zipcode is NA, then convert all calculated data to NA as well
dplyr::mutate(ttm_r_avg = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_r_avg),
ttm_m_avg = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_m_avg),
ttm_sl_avg = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_sl_avg),
ttm_all_avg = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_all_avg),
ttm_sfp = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_sfp),
ttm_wtd = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_wtd),
ttm_r_drivetime = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_r_drivetime),
ttm_m_drivetime = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_m_drivetime),
ttm_all_drivetime = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_all_drivetime),
ttm_r_pct = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_r_pct),
ttm_m_pct = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_m_pct),
ttm_all_pct = case_when(is.na(zipcode) ~ NA_real_, TRUE ~ ttm_all_pct)) %>%
dplyr::mutate_at(vars(ttm_r_avg, ttm_m_avg, ttm_all_avg),
~dplyr::na_if(., -1)) %>%
# Add the rest of thesis_ columns. with if_else() change possible NA's to
# zeros so that calculations are not rendered NA
dplyr::mutate(thesis_r_drivetime = ttm_r_drivetime -
if_else(is.na(thesis_r_sfp), 0, thesis_r_sfp) -
if_else(is.na(thesis_r_wtd), 0, thesis_r_wtd),
thesis_m_drivetime = ttm_m_drivetime -
if_else(is.na(thesis_m_sfp), 0, thesis_m_sfp) -
if_else(is.na(thesis_m_wtd), 0, thesis_m_wtd),
thesis_all_drivetime = ttm_all_drivetime -
if_else(is.na(thesis_all_sfp), 0, thesis_all_sfp) -
if_else(is.na(thesis_all_wtd), 0, thesis_all_wtd),
# Forgo if_else() here so that if thesis results sfp or wtd
# are missing for a postal code, pct value also gets NA
thesis_r_pct = (thesis_r_sfp + thesis_r_wtd) / ttm_r_drivetime,
thesis_m_pct = (thesis_m_sfp + thesis_m_wtd) / ttm_m_drivetime,
thesis_all_pct = (
if_else(is.na(thesis_all_sfp), 0, thesis_all_sfp) +
if_else(is.na(thesis_all_wtd), 0, thesis_all_wtd)) / ttm_all_drivetime) %>%
dplyr::mutate_at(vars(thesis_r_drivetime, thesis_m_drivetime,
thesis_all_drivetime, thesis_r_pct, thesis_m_pct,
thesis_all_pct),
~round(., 2)) %>%
# Add TTM18/thesis comparison columns
dplyr::mutate(comp_r_sfp = thesis_r_sfp / ttm_sfp,
comp_m_sfp = thesis_m_sfp / ttm_sfp,
comp_all_sfp = thesis_all_sfp / ttm_sfp,
comp_r_wtd = thesis_r_wtd / ttm_wtd,
comp_m_wtd = thesis_m_wtd / ttm_wtd,
comp_all_wtd = thesis_all_wtd / ttm_wtd,
comp_r_drivetime = thesis_r_drivetime / ttm_r_drivetime,
comp_m_drivetime = thesis_m_drivetime / ttm_m_drivetime,
comp_all_drivetime = thesis_all_drivetime / ttm_all_drivetime,
comp_r_pct = thesis_r_pct / ttm_r_pct,
comp_m_pct = thesis_m_pct / ttm_m_pct,
comp_all_pct = thesis_all_pct / ttm_all_pct) %>%
dplyr::mutate_at(vars(comp_r_sfp, comp_m_sfp, comp_all_sfp, comp_r_wtd,
comp_m_wtd, comp_all_wtd, comp_r_drivetime,
comp_m_drivetime, comp_all_drivetime, comp_r_pct,
comp_m_pct, comp_all_pct),
~round(., 2))
return(result)
}