iris-hep
diff --git a/‎cms/analysis.py‎
Lines changed: 0 additions & 88 deletions b/‎cms/analysis.py‎
Lines changed: 0 additions & 88 deletions
diff --git a/‎cms/example_cms/configs/cuts.py‎
Lines changed: 19 additions & 15 deletions b/‎cms/example_cms/configs/cuts.py‎
Lines changed: 19 additions & 15 deletions
diff --git a/‎cms/example_cms/configs/skim.py‎
Lines changed: 46 additions & 22 deletions b/‎cms/example_cms/configs/skim.py‎
Lines changed: 46 additions & 22 deletions
@@ -7,7 +7,7 @@
 # Select good run data
 # ===================
 def lumi_mask(
-     run: ak.Array, lumiBlock: ak.Array, lumifile: str = "") -> ak.Array:
+    run: ak.Array, lumiBlock: ak.Array, goodruns: ak.Array = None) -> ak.Array:
     """
     Create a boolean mask selecting events that pass the good run/lumi criteria.
     https://github.com/cms-opendata-workshop/workshop2024-lesson-event-selection/blob/main/instructors/dpoa_workshop_utilities.py
@@ -33,7 +33,7 @@ def lumi_mask(
     # -----------------------------
     # Load good lumi sections JSON
     # -----------------------------
-    good_lumi_sections = ak.from_json(open(lumifile, "rb"))
+    good_lumi_sections = goodruns
 
     # Extract good run numbers (as integers)
     good_runs = np.array(good_lumi_sections.fields).astype(int)
@@ -49,22 +49,26 @@ def lumi_mask(
     # -----------------------------
     # Match run numbers to good runs
     # -----------------------------
-    def find_indices(arr1: np.ndarray, arr2: ak.Array) -> ak.Array:
-        arr1_np = np.asarray(ak.to_numpy(arr1))
-        arr2_np = np.asarray(ak.to_numpy(arr2))
+    def find_indices(good_runs: np.ndarray, event_runs: ak.Array) -> ak.Array:
+        good_runs_np = np.asarray(ak.to_numpy(good_runs))
+        event_runs_np = np.asarray(ak.to_numpy(event_runs))
 
-        # Sort arr1 and track indices
-        sorter = np.argsort(arr1_np)
-        sorted_arr1 = arr1_np[sorter]
-
-        # Find insertion positions of arr2 elements into arr1
-        pos = np.searchsorted(sorted_arr1, arr2_np)
+        # Sort good_runs and track indices
+        sorter = np.argsort(good_runs_np)
+        sorted_good_runs = good_runs_np[sorter]
 
+        # Find insertion positions of event_runs elements into good_runs
+        pos = np.searchsorted(sorted_good_runs, event_runs_np)
+        
         # Validate matches
-        valid = (pos < len(arr1_np)) & (sorted_arr1[pos] == arr2_np)
-
+        if pos[-1] < len(sorted_good_runs):
+            valid = (pos < len(good_runs_np)) & (sorted_good_runs[pos] == event_runs_np)
+        else:
+            # HACK because we miss some lumi file
+            valid = np.ones_like(sorted_good_runs)
+            
         # Build result array
-        out = np.full(len(arr2_np), -1, dtype=int)
+        out = np.full(len(event_runs_np), -1, dtype=int)
         out[valid] = sorter[pos[valid]]
         return ak.Array(out)
 
@@ -73,7 +77,6 @@ def find_indices(arr1: np.ndarray, arr2: ak.Array) -> ak.Array:
     # -----------------------------
     # Compute per-event lumi block diffs
     # -----------------------------
-
     # Calculate (event lumi - good lumi) for matched run
     diff = lumiBlock - all_good_blocks[good_run_indices]
 
@@ -88,6 +91,7 @@ def find_indices(arr1: np.ndarray, arr2: ak.Array) -> ak.Array:
     return mask
 
 
+
 # ===================
 # Selection which is applied to all regions
 # ===================
 
@@ -12,9 +12,30 @@
 from typing import List, Tuple
 
 from coffea.analysis_tools import PackedSelection
-from utils.schema import WorkerEval
-from .cuts import lumi_mask
+from coffea.lumi_tools import LumiMask
+from intccms.schema import WorkerEval
+#from .cuts import lumi_mask
 
+import awkward as ak
+
+lumifile16 = Path("./example_cms/corrections/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt").resolve()
+lumifile17 = Path("./example_cms/corrections/Cert_294927-306462_13TeV_UL2017_Collisions17_GoldenJSON.txt").resolve()
+lumifile18 = Path("./example_cms/corrections/Cert_314472-325175_13TeV_Legacy2018_Collisions18_JSON.txt").resolve()
+lmobj16 = LumiMask(lumifile16)
+lmobj17 = LumiMask(lumifile17)
+lmobj18 = LumiMask(lumifile18)
+
+lm16 = lambda run, lb, obj=lmobj16: obj(run, lb)
+lm17 = lambda run, lb, obj=lmobj17: obj(run, lb)
+lm18 = lambda run, lb, obj=lmobj18: obj(run, lb)
+
+
+# Build configuration
+year_run_config = [
+  ("2016", ["B", "C", "D", "E", "F"], lm16),
+  ("2017", ["B", "C", "D", "E", "F"], lm17),
+  ("2018", ["A", "B", "C", "D"], lm18),
+]
 
 def get_cross_sections_for_datasets(
     years: List[str],
@@ -57,6 +78,15 @@ def get_cross_sections_for_datasets(
 
     return tuple(cross_sections)
 
+# Pre-build the lumi_mask configs
+lumi_mask_configs = []
+for year, runs, lm_func in year_run_config:
+  for _ in runs:
+      lumi_mask_configs.append({
+          "function": lm_func,
+          "use": [("event", "run"), ("event", "luminosityBlock")],
+      })
+
 
 # ==============================================================================
 #  Dataset Configuration
@@ -207,25 +237,19 @@ def get_cross_sections_for_datasets(
     },
     # Data: Single muon (different run periods per year)
     {
-        "name": "data",
-        "directories": tuple(
-            f"example_cms/datasets/{year}/SingleMuonRun{run}/"
-            for year, runs in [("2016", ["B", "C", "D", "E", "F"]),
-                              ("2017", ["B", "C", "D", "E", "F"]),
-                              ("2018", ["A", "B", "C", "D"])]
-            for run in runs
-        ),
-        "cross_sections": 1.0,
-        "file_pattern": "*.txt",
-        "tree_name": "Events",
-        "weight_branch": None,
-        "redirector": REDIRECTOR,
-        "is_data": True,
-        "lumi_mask": {
-            "function": lumi_mask,
-            "use": [("event", "run"), ("event", "luminosityBlock")],
-            "static_kwargs": {"lumifile": "./example_cms/corrections/Cert_271036-284044_13TeV_Legacy2016_Collisions16_JSON.txt"},
-        },
+          "name": "data",
+          "directories": tuple(
+              f"example_cms/datasets/{year}/SingleMuonRun{run}/"
+              for year, runs, _ in year_run_config
+              for run in runs
+          ),
+          "cross_sections": 1.0,
+          "file_pattern": "*.txt",
+          "tree_name": "Events",
+          "weight_branch": None,
+          "redirector": REDIRECTOR,
+          "is_data": True,
+          "lumi_mask": tuple(lumi_mask_configs),
     }
 ]
 
@@ -266,7 +290,7 @@ def default_skim_selection(puppimet, hlt):
 skimming_config = {
     "function": default_skim_selection,
     "use": [("PuppiMET", None), ("HLT", None)],
-    "chunk_size": 100_000,
+    "chunk_size": 200_000,
     "tree_name": "Events",
     # "output": {
     #     "format": "parquet",