Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
677 changes: 500 additions & 177 deletions AnaProd/AnaTupleFileList.py

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions AnaProd/MergeAnaTuples.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ def mergeAnaTuples(
input_reports,
input_roots,
root_outputs,
runs,
compression_algo="LZMA",
compression_level=9,
):
Expand Down Expand Up @@ -216,6 +217,9 @@ def mergeAnaTuples(
columns, _ = getColumns(df)
if unc_source == central:
if is_data:
if len(runs) > 0:
filter_str = " || ".join([f"run == {r}" for r in runs])
df = df.Filter(filter_str, "RunFilter")
event_filter = ROOT.EventDuplicateFilter()
df = event_filter.apply(
ROOT.RDF.AsRNode(df),
Expand Down Expand Up @@ -288,6 +292,7 @@ def mergeAnaTuples(
parser.add_argument("--compression-algo", type=str, default="LZMA")
parser.add_argument("--is-data", action="store_true")
parser.add_argument("--LAWrunVersion", required=True, type=str)
parser.add_argument("--runs", required=False, nargs="+", type=int, default=[])
args = parser.parse_args()

setup = Setup.getGlobal(
Expand All @@ -313,6 +318,7 @@ def mergeAnaTuples(
input_reports=reports,
input_roots=args.input_roots,
root_outputs=args.root_outputs,
runs=args.runs,
compression_algo=args.compression_algo,
compression_level=args.compression_level,
)
50 changes: 35 additions & 15 deletions AnaProd/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,24 +49,31 @@ def run(self):
pattern = pattern_dict.get(nano_version, r".*\.root$")
input_files = []
inactive_files = []
empty_files = []
for file in fs_nanoAOD.listdir(folder_name):
if not re.match(pattern, file):
continue
file_path = os.path.join(folder_name, file) if include_folder_name else file
if hasattr(fs_nanoAOD, "file_interface") and hasattr(
fs_nanoAOD.file_interface, "is_available"
):
if not fs_nanoAOD.file_interface.is_available(
folder_name, file, verbose=1
):
if ignore_missing:
print(
f"{file_path}: will be ignored because no sites are found."
)
inactive_files.append(file_path)
if hasattr(fs_nanoAOD, "file_interface"):

if hasattr(fs_nanoAOD.file_interface, "is_available"):
if not fs_nanoAOD.file_interface.is_available(
folder_name, file, verbose=1
):
if ignore_missing:
print(
f"{file_path}: will be ignored because no sites are found."
)
inactive_files.append(file_path)
continue
else:
raise RuntimeError(f"No sites found for {file_path}")
if hasattr(fs_nanoAOD.file_interface, "n_events"):
n_events = fs_nanoAOD.file_interface.n_events(folder_name, file)
if n_events == 0:
print(f"{file_path}: will be ignored because it has 0 events.")
empty_files.append(file_path)
continue
else:
raise RuntimeError(f"No sites found for {file_path}")
input_files.append(file_path)

if len(input_files) == 0:
Expand All @@ -76,6 +83,7 @@ def run(self):
output = {
"input_files": input_files,
"inactive_files": inactive_files,
"empty_files": empty_files,
}
with self.output().localize("w") as out_local_file:
with open(out_local_file.abspath, "w") as f:
Expand Down Expand Up @@ -427,7 +435,12 @@ def run(self):
nEventsPerFile = nEventsPerFile.get(process_group, 100_000)
is_data = process_group == "data"

result = CreateMergePlan(self.setup, local_inputs, nEventsPerFile, is_data)
result = CreateMergePlan(
setup=self.setup,
local_inputs=local_inputs,
n_events_per_file=nEventsPerFile,
is_data=is_data,
)

for output_name, output_remote in self.output().items():
output_path_tmp = os.path.join(job_home, f"{output_name}_tmp.json")
Expand All @@ -449,7 +462,7 @@ def requires(self):

def output(self):
dataset_name, process_group = self.branch_data
return self.local_target(self.get_output_path(dataset_name, "plan"))
return self.local_target(f"{dataset_name}.json")

def run(self):
with self.input()["plan"].localize("r") as input_local:
Expand Down Expand Up @@ -485,6 +498,7 @@ def workflow_requires(self):
_,
_,
_,
_,
) in self.branch_map.items():
branch_set.add(ds_branch)
branch_set.update(dataset_dependencies.values())
Expand All @@ -508,6 +522,7 @@ def requires(self):
input_file_list,
_,
skip_future_tasks,
runs,
) = self.branch_data
anaTuple_branch_map = AnaTupleFileTask.req(
self, branch=-1, branches=()
Expand Down Expand Up @@ -597,6 +612,7 @@ def create_branch_map(self):
input_file_list = this_dict["inputs"]
output_file_list = this_dict["outputs"]
skip_future_tasks = this_dict["n_events"] == 0
runs = this_dict.get("runs", [])
branches[nBranch] = (
dataset_name,
process_group,
Expand All @@ -605,6 +621,7 @@ def create_branch_map(self):
input_file_list,
output_file_list,
skip_future_tasks,
runs,
)
nBranch += 1
return branches
Expand Down Expand Up @@ -637,6 +654,7 @@ def output(self):
input_file_list,
output_file_list,
skip_future_tasks,
runs,
) = self.branch_data
output_dir = os.path.join(self.version, "AnaTuples", self.period, dataset_name)
outputs = [os.path.join(output_dir, out_file) for out_file in output_file_list]
Expand All @@ -653,6 +671,7 @@ def run(self):
input_file_list,
output_file_list,
skip_future_tasks,
runs,
) = self.branch_data
is_data = process_group == "data"
job_home, remove_job_home = self.law_job_home()
Expand Down Expand Up @@ -692,6 +711,7 @@ def run(self):
input_reports=reports,
input_roots=local_root_inputs,
root_outputs=tmpFiles,
runs=runs,
)

for outFile, tmpFile in zip(self.output(), tmpFiles):
Expand Down
1 change: 1 addition & 0 deletions Analysis/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def create_branch_map(self):
input_file_list,
output_file_list,
skip_future_tasks,
runs,
) in anaProd_branch_map.items():
if skip_future_tasks:
continue
Expand Down
2 changes: 1 addition & 1 deletion RunKit
Submodule RunKit updated 1 files
+22 −7 law_das.py
24 changes: 23 additions & 1 deletion bootstrap.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,28 @@
#!/usr/bin/env bash

action() {
source "{{analysis_path}}/env.sh"
local run_token_server_host="{{run_token_server_host}}"
local run_token_server_port="{{run_token_server_port}}"
local analysis_path="{{analysis_path}}"

if [ -n "${run_token_server_host}" ] && [ -n "${run_token_server_port}" ]; then
local get_run_token_script
get_run_token_script=$(ls "${LAW_JOB_INIT_DIR}"/get_run_token*.py 2>/dev/null | head -1)
if [ -z "${get_run_token_script}" ]; then
echo "ERROR: get_run_token.py not found in ${LAW_JOB_INIT_DIR}"
return 1
fi
python3 "${get_run_token_script}" \
--server "${run_token_server_host}" \
--port "${run_token_server_port}" \
--path "${analysis_path}"
local rc=$?
if [ ${rc} -ne 0 ]; then
echo "ERROR: failed to obtain run token (exit code ${rc}), aborting."
return ${rc}
fi
fi

source "${analysis_path}/env.sh"
}
action
7 changes: 7 additions & 0 deletions config/Run3_2022/datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,13 @@ DYto2Mu_M_50_PTLL_600_2J_amcatnloFXFX:
# fileNamePattern:
# HLepRare: ^nanoTauTau_\d+\.root$

#### DY LO ####
DYto2L_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
generator: madgraphMLM
nanoAOD:
v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer22NanoAODv12-130X_mcRun3_2022_realistic_v5-v2/NANOAODSIM

############## H ##############
GluGluHto2Tau_M125:
crossSection: GluGluHToTauTau_M125
Expand Down
8 changes: 8 additions & 0 deletions config/Run3_2022EE/datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -754,6 +754,14 @@ DYto2Tau_MLL_6000_powheg:
nanoAOD:
v12: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer22EENanoAODv12-130X_mcRun3_2022_realistic_postEE_v6-v2/NANOAODSIM

#### DY LO ####
DYto2L_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
generator: madgraphMLM
nanoAOD:
v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer22EENanoAODv12-130X_mcRun3_2022_realistic_postEE_v6-v4/NANOAODSIM


############## H ##############
GluGluHto2B_M125:
crossSection: GluGluHToBB_M125
Expand Down
9 changes: 9 additions & 0 deletions config/Run3_2023/datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -959,6 +959,15 @@ DYto2Tau_MLL_6000_powheg:
miniAOD: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23MiniAODv4-130X_mcRun3_2023_realistic_v14-v2/MINIAODSIM
nanoAOD:
v12: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23NanoAODv12-130X_mcRun3_2023_realistic_v14-v2/NANOAODSIM

#### DY LO ####
DYto2L_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
generator: madgraphMLM
nanoAOD:
v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer23NanoAODv12-130X_mcRun3_2023_realistic_v14-v4/NANOAODSIM


############## ggZH ##############
ggZH_Hto2B_Zto2L:
crossSection: ZH_Hbb_Zll
Expand Down
9 changes: 9 additions & 0 deletions config/Run3_2023BPix/datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -873,6 +873,15 @@ DYto2Tau_MLL_6000_powheg:
miniAOD: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23BPixMiniAODv4-130X_mcRun3_2023_realistic_postBPix_v2-v3/MINIAODSIM
nanoAOD:
v12: /DYto2Tau_MLL-6000_TuneCP5_13p6TeV_powheg-pythia8/Run3Summer23BPixNanoAODv12-130X_mcRun3_2023_realistic_postBPix_v2-v3/NANOAODSIM

#### DY LO ####
DYto2L_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_allFlavors
generator: madgraphMLM
nanoAOD:
v12: /DYto2L-4Jets_MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/Run3Summer23BPixNanoAODv12-130X_mcRun3_2023_realistic_postBPix_v2-v4/NANOAODSIM


############## GGH ##############
############## Htobb ##############
ggZH_Hto2B_Zto2L:
Expand Down
20 changes: 20 additions & 0 deletions config/Run3_2024/datasets.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1008,6 +1008,26 @@ DYto2Tau_MLL_800to1500_powheg:
nanoAOD:
v15: /DYto2Tau_Bin-MLL-800to1500_TuneCP5_13p6TeV_powheg-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM

#### DY LO ####
DYto2E_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_singleFlavor
generator: madgraphMLM
nanoAOD:
v12: /DYto2E-4Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM

DYto2Mu_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_singleFlavor
generator: madgraphMLM
nanoAOD:
v12: /DYto2Mu-4Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM

DYto2Tau_MLL_10to50_madgraphMLM:
crossSection: DYto2L_M_10to50_amcatnloFXFX_singleFlavor
generator: madgraphMLM
nanoAOD:
v12: /DYto2Tau-4Jets_Bin-MLL-10to50_TuneCP5_13p6TeV_madgraphMLM-pythia8/RunIII2024Summer24NanoAODv15-150X_mcRun3_2024_realistic_v2-v2/NANOAODSIM
Comment thread
kandrosov marked this conversation as resolved.


############## GGH ##############
############## HtoMuMu ##############
ggZH_Hto2B_Zto2Q:
Expand Down
10 changes: 10 additions & 0 deletions config/dataset_exceptions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,16 @@
^DYto2Tau_M_50_(0|1|2)J_Filtered_amcatnloFXFX:
- Run3_2024
- Run3_2025

^DYto2L_MLL_10to50_madgraphMLM:
- Run3_2024
- Run3_2025
^DYto2(E|Mu|Tau)_MLL_10to50_madgraphMLM:
- Run3_2022
- Run3_2022EE
- Run3_2023
- Run3_2023BPix
Comment thread
kandrosov marked this conversation as resolved.

# H
^GluGluH*.:
- Run3_2025
Expand Down
1 change: 1 addition & 0 deletions config/law.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Analysis.tasks
RunKit.grid_helper_tasks
RunKit.crabLaw
inference.dhi.tasks
test.hello_world_task


[job]
Expand Down
Loading
Loading