Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions python/employment.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _aggregate_lodes_to_mgra(

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should add a paragraph below the one-liner with a high level description of how we use EDD first then fail over to area-based

Args:
combined_data: LODES data with columns: year, block, naics_code, jobs
xref: Crosswalk with columns: block, mgra, allocation_pct
xref: Crosswalk with columns: block, mgra, pct_edd, pct_area, edd_flag
year: The year for which to aggregate data

Returns:
Expand Down Expand Up @@ -125,7 +125,10 @@ def _aggregate_lodes_to_mgra(
.assign(year=year)
.merge(
combined_data.merge(xref, on="block", how="inner")
.assign(value=lambda df: df["jobs"] * df["allocation_pct"])
.assign(
value=lambda df: df["jobs"]
* np.where(df["edd_flag"] == 1, df["pct_edd"], df["pct_area"])
)
.groupby(["year", "mgra", "naics_code"], as_index=False)["value"]
.sum(),
on=["year", "mgra", "naics_code"],
Expand All @@ -151,15 +154,19 @@ def _get_jobs_inputs(year: int) -> dict[str, pd.DataFrame]:

jobs_inputs["lodes_data"] = _get_lodes_data(year)

with utils.ESTIMATES_ENGINE.connect() as con:
with utils.GIS_ENGINE.connect() as con:
# get crosswalk from Census blocks to MGRAs
with open(utils.SQL_FOLDER / "employment/xref_block_to_mgra.sql") as file:
jobs_inputs["xref_block_to_mgra"] = pd.read_sql_query(
with open(utils.SQL_FOLDER / "employment/edd_land_use_split.sql") as file:
jobs_inputs["xref_block_to_mgra"] = utils.read_sql_query_fallback(
sql=sql.text(file.read()),
con=con,
params={"mgra_version": utils.MGRA_VERSION},
params={
"mgra_version": utils.MGRA_VERSION,
"year": year,
},
)

with utils.ESTIMATES_ENGINE.connect() as con:
# get regional employment control totals from QCEW
with open(utils.SQL_FOLDER / "employment/QCEW_control.sql") as file:
jobs_inputs["control_totals"] = utils.read_sql_query_fallback(
Expand Down
229 changes: 229 additions & 0 deletions sql/employment/edd_land_use_split.sql
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  • The way that GIS_ENGINE is specified through the secrets file and the comment in the README.md on that file actually suggest that [GeoDepot] or [SPACECORE] should be the database supplied necessitating the use of [EMPCORE] in the queries. Recommend adding [EMPCORE] back in.

Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/*
This query provides a many-to-many cross reference mapping 2020 Census Blocks to Series 15 MGRAs
There are two cross references for separate use cases
1) Cross reference based on EDD point-level jobs data
2) Cross reference based on simple land area intersection

Notes:
1) The land area intersection cross reference is used as a default cross
reference as there may be instances where the EDD point layer indicates no
MGRAs to allocate data to but the Census LEHD LODES contains jobs that
need to be allocated to MGRAs.
2) Data prior to year 2017 is not present in the EDD view and must be
queried directly from the source database table.
3) This must be run on the GIS server
*/

SET NOCOUNT ON;
-- Initialize parameters and return table ------------------------------------
DECLARE @year INTEGER = :year;
DECLARE @mgra_version NVARCHAR(10) = :mgra_version;
DECLARE @msg nvarchar(45) = 'EDD point-level data does not exist';


-- Check for MGRA version and stop execution if not Series 15
IF @mgra_version != 'mgra15'
BEGIN
RAISERROR('EDD xref only valid for Series 15 MGRAs',16,1)
RETURN
END

-- Create temporary table for EDD data to support spatial index
DROP TABLE IF EXISTS [#edd];
CREATE TABLE [#edd] (
[id] INTEGER IDENTITY(1,1) NOT NULL,
[jobs] FLOAT NOT NULL,
[Shape] GEOMETRY NOT NULL,
CONSTRAINT [pk_tt_edd] PRIMARY KEY ([id])
)

-- Create spatial index for later spatial join
-- Bounding box coordinates from SANDAG GIS team
-- Identical to spatial index on LUDU point layers in GIS database
CREATE SPATIAL INDEX [sidx_tt_edd] ON [#edd]
([Shape]) USING GEOMETRY_AUTO_GRID
WITH (BOUNDING_BOX = (
6151635.98006938,
1775442.36347014,
6613401.66775663,
2129306.52024172),
CELLS_PER_OBJECT = 8
)


-- Get SANDAG GIS team EDD dataset -------------------------------------------
IF @year >= 2017
BEGIN
INSERT INTO [#edd]
SELECT
1.0 * [emp_total]/[emp_valid] AS [jobs],
[SHAPE]
FROM (
SELECT
CASE WHEN [emp_m1] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m2] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m3] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m4] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m5] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m6] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m7] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m8] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m9] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m10] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m11] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [emp_m12] IS NOT NULL THEN 1 ELSE 0 END
AS [emp_valid],
ISNULL([emp_m1], 0)
+ ISNULL([emp_m2], 0)
+ ISNULL([emp_m3], 0)
+ ISNULL([emp_m4], 0)
+ ISNULL([emp_m5], 0)
+ ISNULL([emp_m6], 0)
+ ISNULL([emp_m7], 0)
+ ISNULL([emp_m8], 0)
+ ISNULL([emp_m9], 0)
+ ISNULL([emp_m10], 0)
+ ISNULL([emp_m11], 0)
+ ISNULL([emp_m12], 0)
AS [emp_total],
[SHAPE]
FROM [ca_edd].[vi_ca_edd_employment]
WHERE [year] = @year
) AS [tt]
WHERE
[emp_valid] > 0
AND [emp_total] > 0
END
ELSE IF @year = 2016 OR @year BETWEEN 2010 AND 2014
BEGIN
INSERT INTO [#edd]
SELECT
[employment] * ISNULL([headquarters].[share], 1) AS [jobs],
ISNULL([headquarters].[shape], [businesses].[shape]) AS [SHAPE]
FROM [ca_edd].[businesses]
LEFT JOIN [ca_edd].[headquarters]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with Eric regarding more explicit JOIN labels. LEFT OUTER JOIN

ON [businesses].[year] = [headquarters].[year]
AND [businesses].[emp_id] = [headquarters].[emp_id]
INNER JOIN (
SELECT [year], [emp_id], [employment]
FROM [ca_edd].[employment]
WHERE
[month_id] = 14 -- adjusted employment
AND [employment] > 0
AND [year] = @year
) AS [employment]
ON [businesses].[year] = [employment].[year]
AND [businesses].[emp_id] = [employment].[emp_id]
END
ELSE IF @year = 2015
BEGIN
INSERT INTO [#edd]
SELECT
[employment] * ISNULL([headquarters].[share], 1) AS [jobs],
ISNULL([headquarters].[shape], [businesses].[shape]) AS [SHAPE]
FROM [ca_edd].[businesses]
LEFT JOIN [ca_edd].[headquarters]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with Eric regarding more explicit JOIN labels. LEFT OUTER JOIN

ON [businesses].[year] = [headquarters].[year]
AND [businesses].[emp_id] = [headquarters].[emp_id]
INNER JOIN (
SELECT
[year],
[emp_id],
1.0 * ((ISNULL([15], 0) + ISNULL([16], 0) + ISNULL([17], 0))
/
(CASE WHEN [15] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [16] IS NOT NULL THEN 1 ELSE 0 END
+ CASE WHEN [17] IS NOT NULL THEN 1 ELSE 0 END
))
AS [employment]
FROM [ca_edd].[employment]
PIVOT(SUM([employment]) FOR [month_id] IN ([15], [16], [17])) AS [pivot]
WHERE
[year] = @year AND
([15] IS NOT NULL OR [16] IS NOT NULL OR [17] IS NOT NULL)
) AS [employment]
ON [businesses].[year] = [employment].[year]
AND [businesses].[emp_id] = [employment].[emp_id]
END


-- Send error message if no data exists --------------------------------------
IF NOT EXISTS (
SELECT TOP (1) *
FROM [#edd]
)
SELECT @msg AS [msg]
ELSE
-- Build cross reference of Census 2020 Blocks to MGRAs ----------------------
BEGIN
-- Calculate sum of jobs in Census 2020 Blocks by MGRAs using EDD points
-- Used later to calculate % allocation of Census 2020 Block jobs to MGRAs
WITH [xref_edd] AS (
SELECT
[CENSUSBLOCKS].[GEOID20] AS [block],
[MGRA15].[MGRA] AS [mgra],
SUM([jobs]) AS [mgra_jobs],
SUM(SUM([jobs])) OVER (PARTITION BY [CENSUSBLOCKS].[GEOID20])
AS [block_jobs]
FROM [#edd]
INNER JOIN [GeoDepot].[sde].[CENSUSBLOCKS]
ON [#edd].[Shape].STIntersects([CENSUSBLOCKS].[Shape]) = 1
INNER JOIN [GeoDepot].[sde].[MGRA15]
ON [#edd].[Shape].STIntersects([MGRA15].[Shape]) = 1
GROUP BY
[CENSUSBLOCKS].[GEOID20], [MGRA15].[MGRA]
),
-- Get % area overlap of Census 2020 Block area and MGRAs
[xref_area] AS (
SELECT
[CENSUSBLOCKS].[GEOID20] AS [block],
[MGRA15].[MGRA] AS [mgra],
([CENSUSBLOCKS].[Shape].STIntersection([MGRA15].[Shape]).STArea()
/ [CENSUSBLOCKS].[Shape].STArea())
AS [pct_area]
FROM [GeoDepot].[sde].[CENSUSBLOCKS]
LEFT JOIN [GeoDepot].[sde].[MGRA15]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with Eric regarding more explicit JOIN labels. LEFT OUTER JOIN

ON [CENSUSBLOCKS].[Shape].STIntersects([MGRA15].[Shape]) = 1
WHERE ([CENSUSBLOCKS].[Shape].STIntersection([MGRA15].[Shape]).STArea()
/ [CENSUSBLOCKS].[Shape].STArea()) > 0.01
),
-- Combine results and calculate % allocations of Census 2020 blocks to MGRAs
[results] AS (
SELECT
ISNULL([xref_edd].[block], [xref_area].[block]) AS [block],
ISNULL([xref_edd].[mgra], [xref_area].[mgra]) AS [mgra],
SUM(CASE WHEN ISNULL([xref_edd].[block_jobs], 0) = 0 THEN 0
ELSE 1.0 * ISNULL([xref_edd].[mgra_jobs], 0)
/ ISNULL([xref_edd].[block_jobs], 0)
END) AS [pct_edd],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CASE statement formatting looks off

SUM(
    CASE
        WHEN ISNULL([xref_edd].[block_jobs], 0) = 0 THEN 0
        ELSE 1.0 * ISNULL([xref_edd].[mgra_jobs], 0) / ISNULL([xref_edd].[block_jobs], 0)
    END
) AS [pct_edd],

SUM(ISNULL([xref_area].[pct_area], 0)) AS [pct_area]
FROM [xref_edd]
FULL JOIN [xref_area]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree with Eric regarding more explicit JOIN labels. FULL OUTER JOIN

ON [xref_edd].[block] = [xref_area].[block]
AND [xref_edd].[mgra] = [xref_area].[mgra]
GROUP BY
ISNULL([xref_edd].[block], [xref_area].[block]),
ISNULL([xref_edd].[mgra], [xref_area].[mgra])
)
-- Return results ensuring % allocations add to 1 within each Census 2020 Block
SELECT
[block],
[mgra],
CASE WHEN SUM([pct_edd]) OVER (PARTITION BY [block]) > 0
THEN [pct_edd] * 1/SUM([pct_edd]) OVER (PARTITION BY [block])
ELSE 0 END AS [pct_edd],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

CASE statement formatting looks off

CASE 
    WHEN SUM([pct_edd]) OVER (PARTITION BY [block]) > 0
        THEN [pct_edd] * 1/SUM([pct_edd]) OVER (PARTITION BY [block])
    ELSE 0
END AS [pct_edd],

[pct_area] * 1/SUM([pct_area]) OVER (PARTITION BY [block]) AS [pct_area],
CASE WHEN SUM([pct_edd]) OVER (PARTITION BY [block])> 0
THEN 1
ELSE 0 END AS edd_flag
FROM
[results]
WHERE
[mgra] IS NOT NULL
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatting is off

        CASE
            WHEN SUM([pct_edd]) OVER (PARTITION BY [block])> 0 THEN 1 
            ELSE 0
        END AS [edd_flag]
    FROM [results]
    WHERE [mgra] IS NOT NULL

ORDER BY
[block],
[mgra]
END

--Drop Temp table
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably don't need this comment at all but missing space

DROP TABLE IF EXISTS [#edd];