Skip to content

Commit 16b91ad

Browse files
authored
Coffea processor implementation with metrics and demos (#30)
1 parent 4634073 commit 16b91ad

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+23104
-3838
lines changed

cms/analysis.py

Lines changed: 0 additions & 88 deletions
This file was deleted.

cms/example_cms/configs/cuts.py

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
# Select good run data
88
# ===================
99
def lumi_mask(
10-
run: ak.Array, lumiBlock: ak.Array, lumifile: str = "") -> ak.Array:
10+
run: ak.Array, lumiBlock: ak.Array, goodruns: ak.Array = None) -> ak.Array:
1111
"""
1212
Create a boolean mask selecting events that pass the good run/lumi criteria.
1313
https://github.com/cms-opendata-workshop/workshop2024-lesson-event-selection/blob/main/instructors/dpoa_workshop_utilities.py
@@ -33,7 +33,7 @@ def lumi_mask(
3333
# -----------------------------
3434
# Load good lumi sections JSON
3535
# -----------------------------
36-
good_lumi_sections = ak.from_json(open(lumifile, "rb"))
36+
good_lumi_sections = goodruns
3737

3838
# Extract good run numbers (as integers)
3939
good_runs = np.array(good_lumi_sections.fields).astype(int)
@@ -49,22 +49,26 @@ def lumi_mask(
4949
# -----------------------------
5050
# Match run numbers to good runs
5151
# -----------------------------
52-
def find_indices(arr1: np.ndarray, arr2: ak.Array) -> ak.Array:
53-
arr1_np = np.asarray(ak.to_numpy(arr1))
54-
arr2_np = np.asarray(ak.to_numpy(arr2))
52+
def find_indices(good_runs: np.ndarray, event_runs: ak.Array) -> ak.Array:
53+
good_runs_np = np.asarray(ak.to_numpy(good_runs))
54+
event_runs_np = np.asarray(ak.to_numpy(event_runs))
5555

56-
# Sort arr1 and track indices
57-
sorter = np.argsort(arr1_np)
58-
sorted_arr1 = arr1_np[sorter]
59-
60-
# Find insertion positions of arr2 elements into arr1
61-
pos = np.searchsorted(sorted_arr1, arr2_np)
56+
# Sort good_runs and track indices
57+
sorter = np.argsort(good_runs_np)
58+
sorted_good_runs = good_runs_np[sorter]
6259

60+
# Find insertion positions of event_runs elements into good_runs
61+
pos = np.searchsorted(sorted_good_runs, event_runs_np)
62+
6363
# Validate matches
64-
valid = (pos < len(arr1_np)) & (sorted_arr1[pos] == arr2_np)
65-
64+
if pos[-1] < len(sorted_good_runs):
65+
valid = (pos < len(good_runs_np)) & (sorted_good_runs[pos] == event_runs_np)
66+
else:
67+
# HACK because we miss some lumi file
68+
valid = np.ones_like(sorted_good_runs)
69+
6670
# Build result array
67-
out = np.full(len(arr2_np), -1, dtype=int)
71+
out = np.full(len(event_runs_np), -1, dtype=int)
6872
out[valid] = sorter[pos[valid]]
6973
return ak.Array(out)
7074

@@ -73,7 +77,6 @@ def find_indices(arr1: np.ndarray, arr2: ak.Array) -> ak.Array:
7377
# -----------------------------
7478
# Compute per-event lumi block diffs
7579
# -----------------------------
76-
7780
# Calculate (event lumi - good lumi) for matched run
7881
diff = lumiBlock - all_good_blocks[good_run_indices]
7982

@@ -88,6 +91,7 @@ def find_indices(arr1: np.ndarray, arr2: ak.Array) -> ak.Array:
8891
return mask
8992

9093

94+
9195
# ===================
9296
# Selection which is applied to all regions
9397
# ===================

cms/example_cms/configs/skim.py

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,30 @@
1212
from typing import List, Tuple
1313

1414
from coffea.analysis_tools import PackedSelection
15-
from utils.schema import WorkerEval
16-
from .cuts import lumi_mask
15+
from coffea.lumi_tools import LumiMask
16+
from intccms.schema import WorkerEval
17+
#from .cuts import lumi_mask
1718

19+
import awkward as ak
20+
21+
lumifile16 = Path("./example_cms/corrections/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt").resolve()
22+
lumifile17 = Path("./example_cms/corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt").resolve()
23+
lumifile18 = Path("./example_cms/corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt").resolve()
24+
lmobj16 = LumiMask(lumifile16)
25+
lmobj17 = LumiMask(lumifile17)
26+
lmobj18 = LumiMask(lumifile18)
27+
28+
lm16 = lambda run, lb, obj=lmobj16: obj(run, lb)
29+
lm17 = lambda run, lb, obj=lmobj17: obj(run, lb)
30+
lm18 = lambda run, lb, obj=lmobj18: obj(run, lb)
31+
32+
33+
# Build configuration
34+
year_run_config = [
35+
("2016", ["B", "C", "D", "E", "F"], lm16),
36+
("2017", ["B", "C", "D", "E", "F"], lm17),
37+
("2018", ["A", "B", "C", "D"], lm18),
38+
]
1839

1940
def get_cross_sections_for_datasets(
2041
years: List[str],
@@ -57,6 +78,15 @@ def get_cross_sections_for_datasets(
5778

5879
return tuple(cross_sections)
5980

81+
# Pre-build the lumi_mask configs
82+
lumi_mask_configs = []
83+
for year, runs, lm_func in year_run_config:
84+
for _ in runs:
85+
lumi_mask_configs.append({
86+
"function": lm_func,
87+
"use": [("event", "run"), ("event", "luminosityBlock")],
88+
})
89+
6090

6191
# ==============================================================================
6292
# Dataset Configuration
@@ -207,25 +237,19 @@ def get_cross_sections_for_datasets(
207237
},
208238
# Data: Single muon (different run periods per year)
209239
{
210-
"name": "data",
211-
"directories": tuple(
212-
f"example_cms/datasets/{year}/SingleMuonRun{run}/"
213-
for year, runs in [("2016", ["B", "C", "D", "E", "F"]),
214-
("2017", ["B", "C", "D", "E", "F"]),
215-
("2018", ["A", "B", "C", "D"])]
216-
for run in runs
217-
),
218-
"cross_sections": 1.0,
219-
"file_pattern": "*.txt",
220-
"tree_name": "Events",
221-
"weight_branch": None,
222-
"redirector": REDIRECTOR,
223-
"is_data": True,
224-
"lumi_mask": {
225-
"function": lumi_mask,
226-
"use": [("event", "run"), ("event", "luminosityBlock")],
227-
"static_kwargs": {"lumifile": "./example_cms/corrections/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt"},
228-
},
240+
"name": "data",
241+
"directories": tuple(
242+
f"example_cms/datasets/{year}/SingleMuonRun{run}/"
243+
for year, runs, _ in year_run_config
244+
for run in runs
245+
),
246+
"cross_sections": 1.0,
247+
"file_pattern": "*.txt",
248+
"tree_name": "Events",
249+
"weight_branch": None,
250+
"redirector": REDIRECTOR,
251+
"is_data": True,
252+
"lumi_mask": tuple(lumi_mask_configs),
229253
}
230254
]
231255

@@ -266,7 +290,7 @@ def default_skim_selection(puppimet, hlt):
266290
skimming_config = {
267291
"function": default_skim_selection,
268292
"use": [("PuppiMET", None), ("HLT", None)],
269-
"chunk_size": 100_000,
293+
"chunk_size": 200_000,
270294
"tree_name": "Events",
271295
# "output": {
272296
# "format": "parquet",

0 commit comments

Comments
 (0)