cms-flaf · kandrosov · May 10, 2026 · May 7, 2026 · May 7, 2026 · May 8, 2026
diff --git a/AnaProd/AnaTupleFileList.py b/AnaProd/AnaTupleFileList.py
diff --git a/AnaProd/MergeAnaTuples.py b/AnaProd/MergeAnaTuples.py
@@ -141,6 +141,7 @@ def mergeAnaTuples(
     input_reports,
     input_roots,
     root_outputs,
+    runs,
     compression_algo="LZMA",
     compression_level=9,
 ):
@@ -216,6 +217,9 @@ def mergeAnaTuples(
         columns, _ = getColumns(df)
         if unc_source == central:
             if is_data:
+                if len(runs) > 0:
+                    filter_str = " || ".join([f"run == {r}" for r in runs])
+                    df = df.Filter(filter_str, "RunFilter")
                 event_filter = ROOT.EventDuplicateFilter()
                 df = event_filter.apply(
                     ROOT.RDF.AsRNode(df),
@@ -288,6 +292,7 @@ def mergeAnaTuples(
     parser.add_argument("--compression-algo", type=str, default="LZMA")
     parser.add_argument("--is-data", action="store_true")
     parser.add_argument("--LAWrunVersion", required=True, type=str)
+    parser.add_argument("--runs", required=False, nargs="+", type=int, default=[])
     args = parser.parse_args()
 
     setup = Setup.getGlobal(
@@ -313,6 +318,7 @@ def mergeAnaTuples(
         input_reports=reports,
         input_roots=args.input_roots,
         root_outputs=args.root_outputs,
+        runs=args.runs,
         compression_algo=args.compression_algo,
         compression_level=args.compression_level,
     )
diff --git a/AnaProd/tasks.py b/AnaProd/tasks.py
@@ -49,24 +49,31 @@ def run(self):
         pattern = pattern_dict.get(nano_version, r".*\.root$")
         input_files = []
         inactive_files = []
+        empty_files = []
         for file in fs_nanoAOD.listdir(folder_name):
             if not re.match(pattern, file):
                 continue
             file_path = os.path.join(folder_name, file) if include_folder_name else file
-            if hasattr(fs_nanoAOD, "file_interface") and hasattr(
-                fs_nanoAOD.file_interface, "is_available"
-            ):
-                if not fs_nanoAOD.file_interface.is_available(
-                    folder_name, file, verbose=1
-                ):
-                    if ignore_missing:
-                        print(
-                            f"{file_path}: will be ignored because no sites are found."
-                        )
-                        inactive_files.append(file_path)
+            if hasattr(fs_nanoAOD, "file_interface"):
+
+                if hasattr(fs_nanoAOD.file_interface, "is_available"):
+                    if not fs_nanoAOD.file_interface.is_available(
+                        folder_name, file, verbose=1
+                    ):
+                        if ignore_missing:
+                            print(
+                                f"{file_path}: will be ignored because no sites are found."
+                            )
+                            inactive_files.append(file_path)
+                            continue
+                        else:
+                            raise RuntimeError(f"No sites found for {file_path}")
+                if hasattr(fs_nanoAOD.file_interface, "n_events"):
+                    n_events = fs_nanoAOD.file_interface.n_events(folder_name, file)
+                    if n_events == 0:
+                        print(f"{file_path}: will be ignored because it has 0 events.")
+                        empty_files.append(file_path)
                         continue
-                    else:
-                        raise RuntimeError(f"No sites found for {file_path}")
             input_files.append(file_path)
 
         if len(input_files) == 0:
@@ -76,6 +83,7 @@ def run(self):
         output = {
             "input_files": input_files,
             "inactive_files": inactive_files,
+            "empty_files": empty_files,
         }
         with self.output().localize("w") as out_local_file:
             with open(out_local_file.abspath, "w") as f:
@@ -427,7 +435,12 @@ def run(self):
                 nEventsPerFile = nEventsPerFile.get(process_group, 100_000)
             is_data = process_group == "data"
 
-            result = CreateMergePlan(self.setup, local_inputs, nEventsPerFile, is_data)
+            result = CreateMergePlan(
+                setup=self.setup,
+                local_inputs=local_inputs,
+                n_events_per_file=nEventsPerFile,
+                is_data=is_data,
+            )
 
             for output_name, output_remote in self.output().items():
                 output_path_tmp = os.path.join(job_home, f"{output_name}_tmp.json")
@@ -449,7 +462,7 @@ def requires(self):
 
     def output(self):
         dataset_name, process_group = self.branch_data
-        return self.local_target(self.get_output_path(dataset_name, "plan"))
+        return self.local_target(f"{dataset_name}.json")
 
     def run(self):
         with self.input()["plan"].localize("r") as input_local:
@@ -485,6 +498,7 @@ def workflow_requires(self):
             _,
             _,
             _,
+            _,
         ) in self.branch_map.items():
             branch_set.add(ds_branch)
             branch_set.update(dataset_dependencies.values())
@@ -508,6 +522,7 @@ def requires(self):
             input_file_list,
             _,
             skip_future_tasks,
+            runs,
         ) = self.branch_data
         anaTuple_branch_map = AnaTupleFileTask.req(
             self, branch=-1, branches=()
@@ -597,6 +612,7 @@ def create_branch_map(self):
                 input_file_list = this_dict["inputs"]
                 output_file_list = this_dict["outputs"]
                 skip_future_tasks = this_dict["n_events"] == 0
+                runs = this_dict.get("runs", [])
                 branches[nBranch] = (
                     dataset_name,
                     process_group,
@@ -605,6 +621,7 @@ def create_branch_map(self):
                     input_file_list,
                     output_file_list,
                     skip_future_tasks,
+                    runs,
                 )
                 nBranch += 1
         return branches
@@ -637,6 +654,7 @@ def output(self):
             input_file_list,
             output_file_list,
             skip_future_tasks,
+            runs,
         ) = self.branch_data
         output_dir = os.path.join(self.version, "AnaTuples", self.period, dataset_name)
         outputs = [os.path.join(output_dir, out_file) for out_file in output_file_list]
@@ -653,6 +671,7 @@ def run(self):
             input_file_list,
             output_file_list,
             skip_future_tasks,
+            runs,
         ) = self.branch_data
         is_data = process_group == "data"
         job_home, remove_job_home = self.law_job_home()
@@ -692,6 +711,7 @@ def run(self):
                 input_reports=reports,
                 input_roots=local_root_inputs,
                 root_outputs=tmpFiles,
+                runs=runs,
             )
 
         for outFile, tmpFile in zip(self.output(), tmpFiles):

diff --git a/Analysis/tasks.py b/Analysis/tasks.py
@@ -245,6 +245,7 @@ def create_branch_map(self):
             input_file_list,
             output_file_list,
             skip_future_tasks,
+            runs,
         ) in anaProd_branch_map.items():
             if skip_future_tasks:
                 continue

diff --git a/RunKit b/RunKit
diff --git a/bootstrap.sh b/bootstrap.sh
@@ -1,6 +1,28 @@
 #!/usr/bin/env bash
 
 action() {
-    source "{{analysis_path}}/env.sh"
+    local run_token_server_host="{{run_token_server_host}}"
+    local run_token_server_port="{{run_token_server_port}}"
+    local analysis_path="{{analysis_path}}"
+
+    if [ -n "${run_token_server_host}" ] && [ -n "${run_token_server_port}" ]; then
+        local get_run_token_script
+        get_run_token_script=$(ls "${LAW_JOB_INIT_DIR}"/get_run_token*.py 2>/dev/null | head -1)
+        if [ -z "${get_run_token_script}" ]; then
+            echo "ERROR: get_run_token.py not found in ${LAW_JOB_INIT_DIR}"
+            return 1
+        fi
+        python3 "${get_run_token_script}" \
+            --server "${run_token_server_host}" \
+            --port "${run_token_server_port}" \
+            --path "${analysis_path}"
+        local rc=$?
+        if [ ${rc} -ne 0 ]; then
+            echo "ERROR: failed to obtain run token (exit code ${rc}), aborting."
+            return ${rc}
+        fi
+    fi
+
+    source "${analysis_path}/env.sh"
 }
 action
diff --git a/config/Run3_2022/datasets.yaml b/config/Run3_2022/datasets.yaml
@@ -736,6 +736,13 @@ DYto2Mu_M_50_PTLL_600_2J_amcatnloFXFX:
 #   fileNamePattern:
   # HLepRare: ^nanoTauTau_\d+\.root$
 
+#### DY LO ####
+DYto2L_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer22NanoAODv12-130X_mcRun3_2022_realistic_v5-v2/NANOAODSIM
+
 ############## H ##############
 GluGluHto2Tau_M125:
   crossSection: GluGluHToTauTau_M125

diff --git a/config/Run3_2022EE/datasets.yaml b/config/Run3_2022EE/datasets.yaml
@@ -754,6 +754,14 @@ DYto2Tau_MLL_6000_powheg:
   nanoAOD:
     v12: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22EENanoAODv12-130X_mcRun3_2022_realistic_postEE_v6-v2/NANOAODSIM
 
+#### DY LO ####
+DYto2L_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer22EENanoAODv12-130X_mcRun3_2022_realistic_postEE_v6-v4/NANOAODSIM
+
+
 ############## H ##############
 GluGluHto2B_M125:
   crossSection: GluGluHToBB_M125

diff --git a/config/Run3_2023/datasets.yaml b/config/Run3_2023/datasets.yaml
@@ -959,6 +959,15 @@ DYto2Tau_MLL_6000_powheg:
   miniAOD: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23MiniAODv4-130X_mcRun3_2023_realistic_v14-v2/MINIAODSIM
   nanoAOD:
     v12: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23NanoAODv12-130X_mcRun3_2023_realistic_v14-v2/NANOAODSIM
+
+#### DY LO ####
+DYto2L_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer23NanoAODv12-130X_mcRun3_2023_realistic_v14-v4/NANOAODSIM
+
+
 ############## ggZH ##############
 ggZH_Hto2B_Zto2L:
   crossSection: ZH_Hbb_Zll

diff --git a/config/Run3_2023BPix/datasets.yaml b/config/Run3_2023BPix/datasets.yaml
@@ -873,6 +873,15 @@ DYto2Tau_MLL_6000_powheg:
   miniAOD: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v3/MINIAODSIM
   nanoAOD:
     v12: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23BPixNanoAODv12-130X_mcRun3_2023_realistic_postBPix_v2-v3/NANOAODSIM
+
+#### DY LO ####
+DYto2L_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer23BPixNanoAODv12-130X_mcRun3_2023_realistic_postBPix_v2-v4/NANOAODSIM
+
+
 ############## GGH ##############
 ############## Htobb ##############
 ggZH_Hto2B_Zto2L:

diff --git a/config/Run3_2024/datasets.yaml b/config/Run3_2024/datasets.yaml
@@ -1008,6 +1008,26 @@ DYto2Tau_MLL_800to1500_powheg:
   nanoAOD:
     v15: /DYto2Tau_Bin-MLL-800to1500_TuneCP5_13p6TeV_powheg-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM
 
+#### DY LO ####
+DYto2E_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_singleFlavor
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2E-4Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM
+
+DYto2Mu_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_singleFlavor
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2Mu-4Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM
+
+DYto2Tau_MLL_10to50_madgraphMLM:
+  crossSection: DYto2L_M_10to50_amcatnloFXFX_singleFlavor
+  generator: madgraphMLM
+  nanoAOD:
+    v12: /DYto2Tau-4Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM
+
+
 ############## GGH ##############
 ############## HtoMuMu ##############
 ggZH_Hto2B_Zto2Q:

diff --git a/config/dataset_exceptions.yaml b/config/dataset_exceptions.yaml
@@ -26,6 +26,16 @@
 ^DYto2Tau_M_50_(0|1|2)J_Filtered_amcatnloFXFX:
   - Run3_2024
   - Run3_2025
+
+^DYto2L_MLL_10to50_madgraphMLM:
+  - Run3_2024
+  - Run3_2025
+^DYto2(E|Mu|Tau)_MLL_10to50_madgraphMLM:
+  - Run3_2022
+  - Run3_2022EE
+  - Run3_2023
+  - Run3_2023BPix
+
 # H
 ^GluGluH*.:
   - Run3_2025

diff --git a/config/law.cfg b/config/law.cfg
@@ -4,6 +4,7 @@ Analysis.tasks
 RunKit.grid_helper_tasks
 RunKit.crabLaw
 inference.dhi.tasks
+test.hello_world_task
 
 
 [job]