Skip to content

Commit cdc8fbc

Browse files
committed
trying to attach caltrans district
1 parent f862eb4 commit cdc8fbc

File tree

2 files changed

+2729
-15
lines changed

2 files changed

+2729
-15
lines changed

gtfs_digest/31_download_prep_data.ipynb

Lines changed: 95 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
},
1111
{
1212
"cell_type": "code",
13-
"execution_count": null,
14-
"id": "cdb02606-2d1c-4090-9af7-1aee7fed44a7",
13+
"execution_count": 1,
14+
"id": "a78470b0-9a68-4bc3-8100-d9a2679c9905",
1515
"metadata": {},
1616
"outputs": [],
1717
"source": [
@@ -24,8 +24,26 @@
2424
"from calitp_data_analysis.gcs_geopandas import GCSGeoPandas\n",
2525
"\n",
2626
"import pandas as pd\n",
27-
"from functools import cache\n",
28-
"\n",
27+
"from functools import cache"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 2,
33+
"id": "4792e66b-950f-4df4-977e-5694d69e3f83",
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"from shared_utils import gtfs_utils_v2, portfolio_utils, publish_utils, geo_utils"
38+
]
39+
},
40+
{
41+
"cell_type": "code",
42+
"execution_count": 3,
43+
"id": "cdb02606-2d1c-4090-9af7-1aee7fed44a7",
44+
"metadata": {},
45+
"outputs": [],
46+
"source": [
2947
"import publish_public_data\n",
3048
"from calitp_data_analysis import geography_utils\n",
3149
"from shared_utils import gtfs_utils_v2, portfolio_utils, publish_utils, geo_utils\n",
@@ -34,7 +52,7 @@
3452
},
3553
{
3654
"cell_type": "code",
37-
"execution_count": null,
55+
"execution_count": 4,
3856
"id": "5705a199-5cc8-4c64-956c-ba0f62dcdf16",
3957
"metadata": {
4058
"tags": []
@@ -49,7 +67,7 @@
4967
},
5068
{
5169
"cell_type": "code",
52-
"execution_count": null,
70+
"execution_count": 5,
5371
"id": "4b87f16a-943a-49c9-b524-e8cabaa579d6",
5472
"metadata": {
5573
"tags": []
@@ -61,7 +79,7 @@
6179
},
6280
{
6381
"cell_type": "code",
64-
"execution_count": null,
82+
"execution_count": 6,
6583
"id": "691e4dcc-0067-48a3-ae7d-9a956da31074",
6684
"metadata": {
6785
"tags": []
@@ -75,7 +93,7 @@
7593
},
7694
{
7795
"cell_type": "code",
78-
"execution_count": null,
96+
"execution_count": 7,
7997
"id": "730d1e7d-1e07-4adc-8888-8f697641a422",
8098
"metadata": {
8199
"tags": []
@@ -326,7 +344,7 @@
326344
},
327345
{
328346
"cell_type": "code",
329-
"execution_count": null,
347+
"execution_count": 8,
330348
"id": "4c0b9783-97d1-4031-87d2-eecd546156db",
331349
"metadata": {
332350
"tags": []
@@ -360,7 +378,7 @@
360378
},
361379
{
362380
"cell_type": "code",
363-
"execution_count": null,
381+
"execution_count": 9,
364382
"id": "44e48d17-738a-40f5-86cd-943e4d7536c2",
365383
"metadata": {
366384
"tags": []
@@ -413,30 +431,92 @@
413431
" how=\"left\",\n",
414432
" ).drop(columns = [\"month_first_day\"])\n",
415433
"\n",
416-
" m1 = geo_utils.convert_to_gdf(m1, \"pt_array\", \"line\")\n",
417-
" \"\"\"\n",
434+
" \n",
418435
" # Convert the geometry to line\n",
436+
" m1 = geo_utils.convert_to_gdf(m1, \"pt_array\", \"line\")\n",
419437
" \n",
420438
" m1 = gcs_geopandas().geo_data_frame_to_parquet(\n",
421439
" m1,\n",
422440
" f\"{GTFS_DATA_DICT.gcs_paths.DIGEST_GCS}processed/fct_monthly_routes_{download_date}.parquet\"\n",
423441
" )\n",
424-
" \"\"\"\n",
425442
" return m1"
426443
]
427444
},
428445
{
429446
"cell_type": "code",
430-
"execution_count": null,
447+
"execution_count": 10,
431448
"id": "ea7d4e28-78d8-4c69-ad1c-a22cc69eec1c",
432449
"metadata": {
433450
"tags": []
434451
},
435-
"outputs": [],
452+
"outputs": [
453+
{
454+
"name": "stdout",
455+
"output_type": "stream",
456+
"text": [
457+
"\n",
458+
" SELECT \n",
459+
" *\n",
460+
" FROM `cal-itp-data-infra-staging`.`tiffany_mart_gtfs_rollup`.`fct_monthly_routes`\n",
461+
" WHERE month_first_day >= DATE('2025-01-01')\n",
462+
" \n"
463+
]
464+
},
465+
{
466+
"name": "stderr",
467+
"output_type": "stream",
468+
"text": [
469+
"/opt/conda/lib/python3.11/site-packages/pandas_gbq/gbq.py:38: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n",
470+
" import pkg_resources # noqa\n"
471+
]
472+
},
473+
{
474+
"name": "stdout",
475+
"output_type": "stream",
476+
"text": [
477+
"Downloading: 100%|\u001b[32m██████████\u001b[0m|\n",
478+
"download time: 0:10:16.570742\n"
479+
]
480+
}
481+
],
436482
"source": [
437483
"fct_operator_hourly_summary = clean_fct_monthly_routes(date_str)"
438484
]
439485
},
486+
{
487+
"cell_type": "code",
488+
"execution_count": 11,
489+
"id": "acd7c563-e794-4291-8ebb-b387ff6a1238",
490+
"metadata": {},
491+
"outputs": [
492+
{
493+
"data": {
494+
"text/plain": [
495+
"(76342, 11)"
496+
]
497+
},
498+
"execution_count": 11,
499+
"metadata": {},
500+
"output_type": "execute_result"
501+
}
502+
],
503+
"source": [
504+
"fct_operator_hourly_summary.shape"
505+
]
506+
},
507+
{
508+
"cell_type": "code",
509+
"execution_count": 13,
510+
"id": "4584dec1-88a0-44bb-8e66-0a4a6f32e4f1",
511+
"metadata": {},
512+
"outputs": [],
513+
"source": [
514+
"fct_operator_hourly_summary = gcs_geopandas().geo_data_frame_to_parquet(\n",
515+
" fct_operator_hourly_summary,\n",
516+
" f\"{GTFS_DATA_DICT.gcs_paths.DIGEST_GCS}processed/fct_monthly_routes_{date_str}.parquet\"\n",
517+
" )"
518+
]
519+
},
440520
{
441521
"cell_type": "markdown",
442522
"id": "f282ac48-8e51-4337-889e-ee9a381aba72",

0 commit comments

Comments
 (0)