diff --git a/README.md b/README.md index 7919d68..355cca4 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,11 @@ specified in the following sections. - `map_bubble`: User can view the bubble map in the Map page. - `map_trip_lines`: User can view the trip lines map in the Map page. +### Segment Trip Time Page +- `segment_trip_time`: User can view this page. (default `true`) +- `segment_trip_time_full_trips`: User can see the table containing non-aggregated data (default `true`) +- `segment_trip_time_min_users`: Minimal number of distinct users in data required to display anything (value is a number, default `0`). + ### Push Notification Page - `push_send`: User can send push notifications in the Push Notification page. diff --git a/app_sidebar_collapsible.py b/app_sidebar_collapsible.py index 64077db..a3abd9d 100644 --- a/app_sidebar_collapsible.py +++ b/app_sidebar_collapsible.py @@ -99,6 +99,15 @@ href=dash.get_relative_path("/map"), active="exact", ), + dbc.NavLink( + [ + html.I(className="fas fa-solid fa-hourglass me-2"), + html.Span("Segment trip time"), + ], + href=dash.get_relative_path("/segment_trip_time"), + active="exact", + style={'display': 'block' if has_permission('segment_trip_time') else 'none'}, + ), dbc.NavLink( [ html.I(className="fas fa-solid fa-envelope-open-text me-2"), @@ -186,6 +195,7 @@ def update_store_uuids(start_date, end_date): return store +# Note: this triggers twice on load, not great with a slow db @app.callback( Output("store-trips", "data"), Input('date-picker', 'start_date'), diff --git a/pages/home.py b/pages/home.py index 3aee59a..1b27f5d 100644 --- a/pages/home.py +++ b/pages/home.py @@ -64,7 +64,7 @@ def compute_sign_up_trend(uuid_df): def compute_trips_trend(trips_df, date_col): - trips_df[date_col] = pd.to_datetime(trips_df[date_col], utc=True) + trips_df[date_col] = pd.to_datetime(trips_df[date_col], utc=True, format='ISO8601') trips_df[date_col] = pd.DatetimeIndex(trips_df[date_col]).date res_df = ( trips_df diff --git a/pages/segment_trip_time.py b/pages/segment_trip_time.py new file mode 100644 index 0000000..e9518bc --- /dev/null +++ b/pages/segment_trip_time.py @@ -0,0 +1,284 @@ +from dash import dcc, html, Input, Output, State, callback, register_page, dash_table +import dash_bootstrap_components as dbc +import dash_leaflet as dl +import pandas as pd + +import logging +import json + +from utils.permissions import has_permission, permissions +from utils import db_utils + +register_page(__name__, path="/segment_trip_time") + +intro = """ +## Segment average trip time +This page displays some statistics on average trip duration between two selected zones. + +### Usage +Using the polygon or square tools on the maps' menu, draw the start (left map) and end (right map) zones to consider. + +Data will then be fetched for trips crossing the start zone and then the end zone. + +Here are some tips on how to draw zones: +* Zones shouldn't cover more than one parallel road; otherwise, it is unclear which path the user took. +* A bigger zone will give more results, at the cost of lower accuracy in trip durations (the start point could be anywhere in the zone). +* For exhaustivity, zone length should somewhat match the distance a vehicle can cross at the maximum allowed speed in 30 seconds (sample rate). +* A smaller zone will give more accurate time results, but the number of trips might be too low to be significant. +* Zones can be moved and edited using the Edit layer menu, and they can be deleted with the Delete layer button. +* Please be advised that only the last added zone will be considered on each map. It is thus advised to delete existing zones before creating new ones. +""" + + +not_enough_data_message = f""" +Not enough segments could be found between endpoints. This means that the number of recorded trips going from start to end point is too low. +* There could be data, but on an insufficient number of users, breaking anonymity (minimum number of users is currently set to {permissions.get('segment_trip_time_min_users', 0)}) +* You could try to increase the zone sizes, or chose different start and end points. +""" + +initial_maps_center = [32.7, -96.8] +initial_maps_zoom = 5 +layout = html.Div( + [ + dcc.Store(id='link-trip-time-start', data=json.dumps({"features": []})), + dcc.Store(id='link-trip-time-end', data=json.dumps({"features": []})), + dcc.Markdown(intro), + dbc.Row( + [ + dbc.Col( + [ + html.H4('Start zone selection'), + dl.Map( + [ + dl.TileLayer(), + dl.FeatureGroup([ + dl.EditControl( + id="stt-edit-control-start", + draw=dict(circle=False, marker=False, polyline=False, circlemarker=False) + ) + ]) + ], + #[dl.TileLayer(), dl.LayerGroup(id='stt-trip-layer-start')], + id='stt-trip-map-start', + style={'height': '500px'}, + center=initial_maps_center, + zoom=initial_maps_zoom + ), + ] + ), + dbc.Col( + [ + html.H4('End zone selection'), + dl.Map( + [ + dl.TileLayer(), + dl.FeatureGroup([ + dl.EditControl( + id="stt-edit-control-end", + draw=dict(circle=False, marker=False, polyline=False, circlemarker=False) + ) + ]) + ], + id='stt-trip-map-end', + style={'height': '500px'}, + center=initial_maps_center, + zoom=initial_maps_zoom + ), + ] + ), + ] + ), + dbc.Row( + html.Div(id='message'), + ), + ] +) + + + +@callback( + Output('link-trip-time-start', 'data'), + Input('stt-edit-control-start', 'geojson'), + prevent_initial_call=True, +) +def map_start_draw(geojson): + return json.dumps(geojson) + +@callback( + Output('link-trip-time-end', 'data'), + Input('stt-edit-control-end', 'geojson'), + prevent_initial_call=True, +) +def map_end_draw(geojson): + return json.dumps(geojson) + + + +def format_duration_df(df, time_column_name='Time sample'): + df['Median time (minutes)'] = df.duration / 60 # convert seconds in minutes + df = df.reset_index().rename( + columns={ + 'start_fmt_time': time_column_name, + 'duration': 'Median time (seconds)', + 'section': 'Count', + 'mode': 'Mode', + } + ) + if time_column_name in df: + if 'Mode' in df: + df = df[ + [ + 'Mode', + time_column_name, + 'Median time (seconds)', + 'Median time (minutes)', + 'Count', + ] + ] # reorder cols + else: + df = df[ + [ + time_column_name, + 'Median time (seconds)', + 'Median time (minutes)', + 'Count', + ] + ] # reorder cols + else: + df = df[ + ['Mode', 'Median time (seconds)', 'Median time (minutes)', 'Count'] + ] # reorder cols + df = df.to_dict('records') # Format for display + return df + + +@callback( + Output('message', 'children'), + Input('link-trip-time-start', 'data'), + Input('link-trip-time-end', 'data'), + prevent_initial_call=True, +) +def generate_content_on_endpoints_change(link_trip_time_start_str, link_trip_time_end_str): + link_trip_time_start = json.loads(link_trip_time_start_str) + link_trip_time_end = json.loads(link_trip_time_end_str) + if len(link_trip_time_end["features"]) == 0 or len(link_trip_time_start["features"]) == 0: + return '' + # logging.debug("link_trip_time_start: " + str(link_trip_time_start)) + # logging.debug("link_trip_time_end: " + str(link_trip_time_end)) + + # Warning: This is a database call, looks here if there is a performance hog. + # From initial tests, this seems to be performing well, without the need to do geoqueries in memory + df = db_utils.query_segments_crossing_endpoints( + link_trip_time_start["features"][len(link_trip_time_start["features"])-1], + link_trip_time_end["features"][len(link_trip_time_end["features"])-1], + ) + total_nb_trips = df.shape[0] + if total_nb_trips > 0: + # Warning: Another db call here. + # In theory, we could load all inferred_section modes in memory at start time, instead of fetching it everytime + # However, when testing it, the operation is quite heavy on the db and on ram. + # I opted for querying only sections we're interested in, every time. Page load is still decent, especially when the number of section is low. + mode_by_section_id = db_utils.query_inferred_sections_modes( + df[['section', 'user_id']].to_dict('records') + ) + df['mode'] = df['section'].apply( + lambda section_id: mode_by_section_id[str(section_id)].name + ) + median_trip_time = df['duration'].median() + times = pd.to_datetime(df['start_fmt_time'], errors='coerce', utc=True) + duration_per_hour = format_duration_df( + df.groupby(times.dt.hour).agg({'duration': 'median', 'section': 'count'}), + time_column_name='Hour', + ) + duration_per_mode = format_duration_df( + df.groupby('mode').agg({'duration': 'median', 'section': 'count'}) + ) + duration_per_mode_per_hour = format_duration_df( + df.groupby(['mode', times.dt.hour]).agg( + {'duration': 'median', 'section': 'count'} + ), + time_column_name='Hour', + ) + duration_per_mode_per_month = format_duration_df( + df.groupby(['mode', times.dt.month]).agg( + {'duration': 'median', 'section': 'count'} + ), + time_column_name='Month', + ) + return dbc.Row( + [ + dbc.Col( + [ + html.Br(), + html.H3('Results'), + html.Div( + f'Computed median segment duration is {median_trip_time} seconds, {total_nb_trips} trips considered' + ), + html.Br(), + html.H4('Median segment duration by mode of transport'), + dash_table.DataTable( + id='duration_per_mode', + data=duration_per_mode, + sort_action='native', + sort_mode='multi', + export_format='csv', + ), + html.Br(), + html.H4( + 'Median segment duration by hour of the day (UTC)' + ), + dash_table.DataTable( + id='duration_per_hour', + data=duration_per_hour, + sort_action='native', + sort_mode='multi', + export_format='csv', + ), + html.Br(), + html.H4( + 'Median segment duration by mode and hour of the day (UTC)' + ), + dash_table.DataTable( + id='duration_per_mode_per_hour', + data=duration_per_mode_per_hour, + sort_action='native', + sort_mode='multi', + export_format='csv', + ), + html.Br(), + html.H4('Median segment duration by mode and month'), + dash_table.DataTable( + id='duration_per_mode_per_month', + data=duration_per_mode_per_month, + sort_action='native', + sort_mode='multi', + export_format='csv', + ), + ], + xs=6, + ), + dbc.Col( + [ + html.Br(), + html.H3('Trips Data'), + dash_table.DataTable( + id='trips_data', + data=df[ + ['start_fmt_time', 'end_fmt_time', 'mode', 'duration'] + ].to_dict('records'), + page_size=15, + sort_action='native', + sort_mode='multi', + export_format='csv', + ), + ], + xs=6, + style={ + 'display': 'block' + if has_permission('segment_trip_time_full_trips') + else 'none' + }, + ), + ] + ) + return [html.H3('Results'), dcc.Markdown(not_enough_data_message)] diff --git a/requirements.txt b/requirements.txt index 6f1bce8..61cf274 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,3 +14,4 @@ python-jose==3.3.0 flask==2.2.5 flask-talisman==1.0.0 dash_auth==2.0.0 +dash-leaflet==1.0.7 diff --git a/utils/db_utils.py b/utils/db_utils.py index 1500633..96a359c 100644 --- a/utils/db_utils.py +++ b/utils/db_utils.py @@ -9,7 +9,11 @@ import emission.core.get_database as edb import emission.storage.timeseries.abstract_timeseries as esta +import emission.storage.timeseries.aggregate_timeseries as estag import emission.storage.timeseries.timequery as estt +import emission.storage.timeseries.geoquery as estg +import emission.storage.decorations.section_queries as esds +import emission.core.wrapper.modeprediction as ecwm from utils import constants from utils import permissions as perm_utils @@ -47,7 +51,7 @@ def query_uuids(start_date, end_date): return df def query_confirmed_trips(start_date, end_date): - start_ts, end_ts = None, datetime.max.timestamp() + start_ts, end_ts = None, datetime.max.replace(tzinfo=timezone.utc).timestamp() if start_date is not None: start_ts = datetime.combine(start_date, datetime.min.time()).timestamp() @@ -158,4 +162,40 @@ def add_user_stats(user_data): if last_call != -1: user['last_call'] = arrow.get(last_call).format(time_format) - return user_data \ No newline at end of file + return user_data + +def query_segments_crossing_endpoints(poly_region_start, poly_region_end): + agg_ts = estag.AggregateTimeSeries().get_aggregate_time_series() + + locs_matching_start = agg_ts.get_data_df("analysis/recreated_location", geo_query = estg.GeoQuery(['data.loc'], poly_region_start)) + locs_matching_start = locs_matching_start.drop_duplicates(subset=['section']) + if locs_matching_start.empty: + return locs_matching_start + + locs_matching_end = agg_ts.get_data_df("analysis/recreated_location", geo_query = estg.GeoQuery(['data.loc'], poly_region_end)) + locs_matching_end = locs_matching_end.drop_duplicates(subset=['section']) + if locs_matching_end.empty: + return locs_matching_end + + merged = locs_matching_start.merge(locs_matching_end, how='outer', on=['section']) + filtered = merged.loc[merged['idx_x']