Skip to content

Commit f237833

Browse files
committed
refactor create_routes to use fct_monthly_routes
1 parent 3c63378 commit f237833

File tree

1 file changed

+91
-0
lines changed

1 file changed

+91
-0
lines changed

open_data/create_routes_data2.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""
2+
Create `ca_transit_routes` to publish to Geoportal.
3+
"""
4+
5+
import geopandas as gpd
6+
import google.auth
7+
import pandas as pd
8+
from calitp_data_analysis import utils
9+
from create_stops_data2 import prep_crosswalk
10+
from update_vars import OPEN_DATA_GCS, analysis_month
11+
12+
credentials, _ = google.auth.default()
13+
14+
15+
def prep_route_shapes(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
16+
"""
17+
Aggregate day_type (weekday/Sat/Sun) to all shape-route
18+
combinations that month.
19+
"""
20+
route_group_cols = ["name", "shape_id", "route_name"]
21+
# Group across day_types
22+
shape_geom = gdf[route_group_cols + ["geometry"]].drop_duplicates()
23+
24+
gdf2 = (
25+
gdf.groupby(route_group_cols + ["month_first_day"])
26+
.agg(
27+
{
28+
"n_trips": "sum",
29+
}
30+
)
31+
.reset_index()
32+
)
33+
34+
gdf3 = pd.merge(shape_geom, gdf2, on=route_group_cols, how="inner")
35+
36+
# should route_name be split to show route_id and route_name?
37+
return gdf3
38+
39+
40+
def rename_route_columns(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
41+
"""
42+
Put all the renaming here.
43+
"""
44+
keep_cols = [
45+
# from shapes
46+
"name",
47+
"route_name",
48+
"shape_id",
49+
"route_type",
50+
"n_trips",
51+
"geometry",
52+
# from trips
53+
"route_ids",
54+
# calculate
55+
"on_shn" # dummy,
56+
"shn_districts",
57+
"pct_route_on_hwy_all_districts",
58+
# from bridge
59+
"analysis_name",
60+
# schedule_source_record_id
61+
]
62+
63+
gdf = gdf[keep_cols].rename(columns={})
64+
65+
return gdf
66+
67+
68+
def publish_routes(analysis_month: str):
69+
routes = gpd.read_parquet(
70+
f"{OPEN_DATA_GCS}routes_{analysis_month}.parquet",
71+
storage_options={"token": credentials.token},
72+
).pipe(prep_route_shapes)
73+
74+
crosswalk = pd.read_parquet(f"{OPEN_DATA_GCS}bridge_gtfs_analysis_name_x_ntd.parquet").pipe(prep_crosswalk)
75+
76+
routes2 = pd.merge(
77+
routes, crosswalk.rename(columns={"schedule_gtfs_dataset_name": "name"}), on=["name"], how="inner"
78+
)
79+
80+
# TODO1 route_id unparsed from route_name?
81+
# TODO2 (add SHN derived columns): use existing function and work it into this
82+
# TODO3 (standardize columns for Geoportal): pipe through rename_routecolumns
83+
84+
return routes2
85+
86+
87+
if __name__ == "__main__":
88+
89+
routes = publish_routes(analysis_month)
90+
91+
utils.geoparquet_gcs_export(routes, OPEN_DATA_GCS, f"export/ca_transit_routes_{analysis_month}")

0 commit comments

Comments
 (0)