Skip to content

Commit f862eb4

Browse files
authored
Merge pull request #1839 from cal-itp/detour-stop-viz
potential detours (scheduled stops + vp path) MVP data product
2 parents cd9be2f + b495f06 commit f862eb4

File tree

3 files changed

+439
-0
lines changed

3 files changed

+439
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# README
2+
3+
Use GTFS Real-Time vehicle positions to find where the positions we're capturing did not get near a scheduled stop.
4+
5+
To start, we calculate is which and how many vehicle positions got within 10 meters, 25 meters, 50 meters, and 100 meters of a stop for that trip.
6+
7+
8+
Starting heuristics:
9+
To get at whether real-time stops were serviced, we need to know whether we captured vehicle positions near the scheduled stop for that trip.
10+
11+
For stops that are eventually detoured or out of service, vehicle positions would likely get near enough (serving surrounding stops) while skipping the stop in question. Also, real-time vehicle positions information must be available enough that day for that stop (out of all the scheduled trips for that stop, from `stop_times`, how many of these trips had vehicle positions data? Let's assume at least 20% of the trips had real-time information.
12+
13+
All the following conditions must be met.
14+
* zero vehicle positions within 10 meters, 25 meters, and 50 meters of a stop
15+
* some threshold (50? 100? distinct vehicle positions) were captured within 100 meters of the stop
16+
* at least 20% of the scheduled trips had vehicle positions

rt_predictions/detour_stops.ipynb

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "e53cbaa4-a146-498f-a240-f9800219e4a1",
7+
"metadata": {
8+
"tags": []
9+
},
10+
"outputs": [],
11+
"source": [
12+
"%%capture\n",
13+
"\n",
14+
"import warnings\n",
15+
"warnings.filterwarnings(\"ignore\")\n",
16+
"\n",
17+
"import pandas as pd\n",
18+
"import calitp_data_analysis.magics\n",
19+
"import prep_vp_detour_stops as prep_vp"
20+
]
21+
},
22+
{
23+
"cell_type": "code",
24+
"execution_count": null,
25+
"id": "0ca2eebf-74b9-4bc5-af57-c1e2ed2bc307",
26+
"metadata": {
27+
"tags": [
28+
"parameters"
29+
]
30+
},
31+
"outputs": [],
32+
"source": [
33+
"# Comment out, this is `parameters` tagged cell\n",
34+
"#name = \"Montebello Vehicle Positions\""
35+
]
36+
},
37+
{
38+
"cell_type": "markdown",
39+
"id": "679cd4a0-e0ed-450d-bb2d-04c4299100d1",
40+
"metadata": {},
41+
"source": [
42+
"# {name} \n",
43+
"\n",
44+
"## Potential detour stops \n",
45+
"1. Stop has at least 20% of its scheduled trips served by vehicle positions.\n",
46+
"2. There are zero vehicle positions within 10, 25, and 50 meters.\n",
47+
"3. There are at least 10 distinct vehicle positions within 100 meters."
48+
]
49+
},
50+
{
51+
"cell_type": "code",
52+
"execution_count": null,
53+
"id": "16964774-44ad-4fbd-a889-a9e44ff04e73",
54+
"metadata": {
55+
"tags": []
56+
},
57+
"outputs": [],
58+
"source": [
59+
"stop_gdf = prep_vp.prep_fct_vp_stop_metrics(\n",
60+
" filters = [[(\"vp_name\", \"==\", name)]]\n",
61+
")\n",
62+
"vp_path = prep_vp.prep_vp_path(\n",
63+
" filters = [[(\"gtfs_dataset_name\", \"==\", name)]]\n",
64+
") \n",
65+
"\n",
66+
"intermediate_df = prep_vp.prep_intermediate_vp_stops_trip_crosswalk(\n",
67+
" filters = [[(\"feed_key\", \"==\", stop_gdf.feed_key.iloc[0])]]\n",
68+
")"
69+
]
70+
},
71+
{
72+
"cell_type": "code",
73+
"execution_count": null,
74+
"id": "969eb111-e6ab-4257-8610-921bb38a8e43",
75+
"metadata": {
76+
"tags": []
77+
},
78+
"outputs": [],
79+
"source": [
80+
"# 10 vp near 100m, this is capturing a lot of rows now\n",
81+
"gdf = prep_vp.filter_to_potential_detour_stops(\n",
82+
" stop_gdf,\n",
83+
" intermediate_df,\n",
84+
" vp_path,\n",
85+
" [10, 0, 0, 0, 0.2]\n",
86+
")\n",
87+
"\n",
88+
"print(\"trip_instance_keys with potential detour stops\")\n",
89+
"gdf.trip_instance_key.value_counts()"
90+
]
91+
},
92+
{
93+
"cell_type": "code",
94+
"execution_count": null,
95+
"id": "10a590c6-347c-4eef-a338-7d099a7c4963",
96+
"metadata": {
97+
"tags": []
98+
},
99+
"outputs": [],
100+
"source": [
101+
"stop_summary = (\n",
102+
" gdf\n",
103+
" .groupby([\"stop_id\", \"n_vp_near_100m\", \"pct_vp_trips\"])\n",
104+
" .agg({\"trip_instance_key\": lambda x: list(x)})\n",
105+
" .reset_index()\n",
106+
")\n",
107+
"\n",
108+
"stop_summary"
109+
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"id": "e06047e8-4f7b-4aa8-ac2e-4a4158b9217a",
115+
"metadata": {
116+
"tags": []
117+
},
118+
"outputs": [],
119+
"source": [
120+
"prep_vp.plot_stops_and_exploded_vp(\n",
121+
" gdf\n",
122+
")"
123+
]
124+
},
125+
{
126+
"cell_type": "markdown",
127+
"id": "aa98ace3-d7f0-4b77-9519-c4beacafcd7b",
128+
"metadata": {},
129+
"source": [
130+
"### Robustness and Sensitivity of Cutoffs\n",
131+
"\n",
132+
"More sensitive to what n_vp_cutoff is, rather than pct_vp_trips"
133+
]
134+
},
135+
{
136+
"cell_type": "code",
137+
"execution_count": null,
138+
"id": "896b8716-36d6-4486-87a1-7b5cec17eca9",
139+
"metadata": {},
140+
"outputs": [],
141+
"source": [
142+
"def adjust_filters(n_vp_cutoff, pct_vp):\n",
143+
" test = prep_vp.filter_to_potential_detour_stops(\n",
144+
" stop_gdf,\n",
145+
" intermediate_df,\n",
146+
" vp_path,\n",
147+
" [n_vp_cutoff, 0, 0, 0, pct_vp]\n",
148+
" )\n",
149+
"\n",
150+
" results = len(test)\n",
151+
" \n",
152+
" return results"
153+
]
154+
},
155+
{
156+
"cell_type": "code",
157+
"execution_count": null,
158+
"id": "5c7af90d-2b21-406a-a9c4-a963cd2c4ee3",
159+
"metadata": {},
160+
"outputs": [],
161+
"source": [
162+
"vp_series = []\n",
163+
"pct_series = []\n",
164+
"n_trips_series = []\n",
165+
"for vp in [50, 25, 10]:\n",
166+
" for pct in [0.1, 0.15, 0.2, 0.25, 0.3, 0.35]:\n",
167+
" \n",
168+
" results = adjust_filters(vp, pct)\n",
169+
" vp_series.append(vp)\n",
170+
" pct_series.append(pct)\n",
171+
" n_trips_series.append(results)\n",
172+
"\n",
173+
"\n",
174+
"results_df = pd.DataFrame()\n",
175+
"results_df = results_df.assign(\n",
176+
" at_least_vp = vp_series,\n",
177+
" pct_vp = pct_series,\n",
178+
" n_trips = n_trips_series\n",
179+
")"
180+
]
181+
},
182+
{
183+
"cell_type": "code",
184+
"execution_count": null,
185+
"id": "3b532360-091e-4ab9-a734-b56d99c5b6e8",
186+
"metadata": {},
187+
"outputs": [],
188+
"source": [
189+
"results_df"
190+
]
191+
},
192+
{
193+
"cell_type": "code",
194+
"execution_count": null,
195+
"id": "16ed6787-cc15-476e-932d-9a5c1ed12514",
196+
"metadata": {},
197+
"outputs": [],
198+
"source": []
199+
}
200+
],
201+
"metadata": {
202+
"kernelspec": {
203+
"display_name": "Python 3 (ipykernel)",
204+
"language": "python",
205+
"name": "python3"
206+
},
207+
"language_info": {
208+
"codemirror_mode": {
209+
"name": "ipython",
210+
"version": 3
211+
},
212+
"file_extension": ".py",
213+
"mimetype": "text/x-python",
214+
"name": "python",
215+
"nbconvert_exporter": "python",
216+
"pygments_lexer": "ipython3",
217+
"version": "3.11.10"
218+
}
219+
},
220+
"nbformat": 4,
221+
"nbformat_minor": 5
222+
}

0 commit comments

Comments
 (0)