Merge pull request #1839 from cal-itp/detour-stop-viz

tiffanychu90 · web-flow · commit f862eb426f82 · 2025-12-26T14:20:41.000-08:00
potential detours (scheduled stops + vp path) MVP data product
diff --git a/rt_predictions/README_detour_stops.md b/rt_predictions/README_detour_stops.md
@@ -0,0 +1,16 @@
+# README
+
+Use GTFS Real-Time vehicle positions to find where the positions we're capturing did not get near a scheduled stop.
+
+To start, we calculate is which and how many vehicle positions got within 10 meters, 25 meters, 50 meters, and 100 meters of a stop for that trip.
+
+
+Starting heuristics:
+To get at whether real-time stops were serviced, we need to know whether we captured vehicle positions near the scheduled stop for that trip.
+
+For stops that are eventually detoured or out of service, vehicle positions would likely get near enough (serving surrounding stops) while skipping the stop in question. Also, real-time vehicle positions information must be available enough that day for that stop (out of all the scheduled trips for that stop, from `stop_times`, how many of these trips had vehicle positions data? Let's assume at least 20% of the trips had real-time information.
+
+All the following conditions must be met.
+* zero vehicle positions within 10 meters, 25 meters, and 50 meters of a stop
+* some threshold (50? 100? distinct vehicle positions) were captured within 100 meters of the stop
+* at least 20% of the scheduled trips had vehicle positions
diff --git a/rt_predictions/detour_stops.ipynb b/rt_predictions/detour_stops.ipynb
@@ -0,0 +1,222 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e53cbaa4-a146-498f-a240-f9800219e4a1",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "\n",
+    "import warnings\n",
+    "warnings.filterwarnings(\"ignore\")\n",
+    "\n",
+    "import pandas as pd\n",
+    "import calitp_data_analysis.magics\n",
+    "import prep_vp_detour_stops as prep_vp"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ca2eebf-74b9-4bc5-af57-c1e2ed2bc307",
+   "metadata": {
+    "tags": [
+     "parameters"
+    ]
+   },
+   "outputs": [],
+   "source": [
+    "# Comment out, this is `parameters` tagged cell\n",
+    "#name = \"Montebello Vehicle Positions\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "679cd4a0-e0ed-450d-bb2d-04c4299100d1",
+   "metadata": {},
+   "source": [
+    "# {name} \n",
+    "\n",
+    "## Potential detour stops \n",
+    "1. Stop has at least 20% of its scheduled trips served by vehicle positions.\n",
+    "2. There are zero vehicle positions within 10, 25, and 50 meters.\n",
+    "3. There are at least 10 distinct vehicle positions within 100 meters."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16964774-44ad-4fbd-a889-a9e44ff04e73",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "stop_gdf = prep_vp.prep_fct_vp_stop_metrics(\n",
+    "    filters = [[(\"vp_name\", \"==\", name)]]\n",
+    ")\n",
+    "vp_path = prep_vp.prep_vp_path(\n",
+    "    filters = [[(\"gtfs_dataset_name\", \"==\", name)]]\n",
+    ") \n",
+    "\n",
+    "intermediate_df = prep_vp.prep_intermediate_vp_stops_trip_crosswalk(\n",
+    "    filters = [[(\"feed_key\", \"==\", stop_gdf.feed_key.iloc[0])]]\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "969eb111-e6ab-4257-8610-921bb38a8e43",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# 10 vp near 100m, this is capturing a lot of rows now\n",
+    "gdf = prep_vp.filter_to_potential_detour_stops(\n",
+    "    stop_gdf,\n",
+    "    intermediate_df,\n",
+    "    vp_path,\n",
+    "    [10, 0, 0, 0, 0.2]\n",
+    ")\n",
+    "\n",
+    "print(\"trip_instance_keys with potential detour stops\")\n",
+    "gdf.trip_instance_key.value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "10a590c6-347c-4eef-a338-7d099a7c4963",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "stop_summary = (\n",
+    "    gdf\n",
+    "    .groupby([\"stop_id\", \"n_vp_near_100m\", \"pct_vp_trips\"])\n",
+    "    .agg({\"trip_instance_key\": lambda x: list(x)})\n",
+    "    .reset_index()\n",
+    ")\n",
+    "\n",
+    "stop_summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e06047e8-4f7b-4aa8-ac2e-4a4158b9217a",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "prep_vp.plot_stops_and_exploded_vp(\n",
+    "    gdf\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "aa98ace3-d7f0-4b77-9519-c4beacafcd7b",
+   "metadata": {},
+   "source": [
+    "### Robustness and Sensitivity of Cutoffs\n",
+    "\n",
+    "More sensitive to what n_vp_cutoff is, rather than pct_vp_trips"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "896b8716-36d6-4486-87a1-7b5cec17eca9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def adjust_filters(n_vp_cutoff, pct_vp):\n",
+    "    test = prep_vp.filter_to_potential_detour_stops(\n",
+    "        stop_gdf,\n",
+    "        intermediate_df,\n",
+    "        vp_path,\n",
+    "        [n_vp_cutoff, 0, 0, 0, pct_vp]\n",
+    "    )\n",
+    "\n",
+    "    results = len(test)\n",
+    "    \n",
+    "    return results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c7af90d-2b21-406a-a9c4-a963cd2c4ee3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "vp_series = []\n",
+    "pct_series = []\n",
+    "n_trips_series = []\n",
+    "for vp in [50, 25, 10]:\n",
+    "    for pct in [0.1, 0.15, 0.2, 0.25, 0.3, 0.35]:\n",
+    "            \n",
+    "        results = adjust_filters(vp, pct)\n",
+    "        vp_series.append(vp)\n",
+    "        pct_series.append(pct)\n",
+    "        n_trips_series.append(results)\n",
+    "\n",
+    "\n",
+    "results_df = pd.DataFrame()\n",
+    "results_df = results_df.assign(\n",
+    "    at_least_vp = vp_series,\n",
+    "    pct_vp = pct_series,\n",
+    "    n_trips = n_trips_series\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b532360-091e-4ab9-a734-b56d99c5b6e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "16ed6787-cc15-476e-932d-9a5c1ed12514",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.10"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/rt_predictions/prep_vp_detour_stops.py b/rt_predictions/prep_vp_detour_stops.py