From 0e32711ddf8fed287de7d88a012e2476df53cacc Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Thu, 19 Sep 2024 15:32:12 +0200 Subject: [PATCH 01/14] Add a tool to merge several podio files into a single one --- tools/CMakeLists.txt | 1 + tools/podio-merge-files | 50 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100755 tools/podio-merge-files diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index c5fa5d4d8..cbacb59a2 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -3,6 +3,7 @@ install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL if(ENABLE_RNTUPLE) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-ttree-to-rntuple DESTINATION ${CMAKE_INSTALL_BINDIR}) endif() +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-merge-files DESTINATION ${CMAKE_INSTALL_BINDIR}) # Add a very basic test of podio-vis if(BUILD_TESTING) diff --git a/tools/podio-merge-files b/tools/podio-merge-files new file mode 100755 index 000000000..8d15a281d --- /dev/null +++ b/tools/podio-merge-files @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +"""podio-merge-files tool to merge any number of podio files into one""" + +import argparse +import podio.root_io +from podio import reading + +parser = argparse.ArgumentParser( + description="Merge any number of podio files into one, can merge TTree and RNTuple files" +) + +parser.add_argument("--output-file", help="name of the output file", required=True) +parser.add_argument("files", nargs="+", help="which files to merge") +parser.add_argument( + "--metadata", + choices=["none", "all", "first"], + default="first", + help="metadata to include in the output file, default: only the one from the first event, other options: all events, none", +) +parser.add_argument("--metadata-category-name", + default="metadata", + help="name of the metadata category in the output file, default: metadata") +args = parser.parse_args() + +all_files = set() +for f in args.files: + if f in all_files: + raise ValueError(f"File {f} is present more than once in the input list") + all_files.add(f) + +root_format = reading._determine_root_format(args.files[0]) +if root_format == reading.RootFileFormat.TTREE: + reader = podio.root_io.Reader(args.files) + writer = podio.root_io.Writer(args.output_file) +elif root_format == reading.RootFileFormat.RNTUPLE: + reader = podio.root_io.RNTupleReader(args.files) + writer = podio.root_io.RNTupleWriter(args.output_file) +else: + raise ValueError(f"Input file {args.files[0]} is not a TTree or RNTuple file") + + +for category in reader.categories: + if category == args.metadata_category_name and args.metadata == "none": + continue + if category == args.metadata_category_name and args.metadata == "first": + all_frames = [reader.get(category)[0]] + else: + all_frames = reader.get(category) + for frame in all_frames: + writer.write_frame(frame, category) From 10373515bc3789d4b34d9eb1f266bada06cc0517 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Thu, 19 Sep 2024 15:49:06 +0200 Subject: [PATCH 02/14] Fix format --- tools/podio-merge-files | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 8d15a281d..0dc9e45a6 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -17,9 +17,11 @@ parser.add_argument( default="first", help="metadata to include in the output file, default: only the one from the first event, other options: all events, none", ) -parser.add_argument("--metadata-category-name", - default="metadata", - help="name of the metadata category in the output file, default: metadata") +parser.add_argument( + "--metadata-category-name", + default="metadata", + help="name of the metadata category in the output file, default: metadata", +) args = parser.parse_args() all_files = set() From 6af93d14e3251bb64aa6804821c2d54dfe8f92d6 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Fri, 20 Sep 2024 21:54:41 +0200 Subject: [PATCH 03/14] Fix format --- tools/podio-merge-files | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 0dc9e45a6..886f4af1c 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -15,7 +15,8 @@ parser.add_argument( "--metadata", choices=["none", "all", "first"], default="first", - help="metadata to include in the output file, default: only the one from the first event, other options: all events, none", + help="metadata to include in the output file, default: " + "only the one from the first event, other options: all events, none", ) parser.add_argument( "--metadata-category-name", From 0e0fe5c555ad1cf9c1d96f6ed8a764f876b6fe75 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Fri, 20 Sep 2024 21:55:21 +0200 Subject: [PATCH 04/14] Improve message --- tools/podio-merge-files | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 886f4af1c..645708f2e 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -16,7 +16,7 @@ parser.add_argument( choices=["none", "all", "first"], default="first", help="metadata to include in the output file, default: " - "only the one from the first event, other options: all events, none", + "only the one from the first file, other options: all files, none", ) parser.add_argument( "--metadata-category-name", From 61fca90e259d75a92fed5004e60b4a59641bc386 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Mon, 18 Nov 2024 15:33:57 +0100 Subject: [PATCH 05/14] Generate a metadata frame if it doesn't exist --- tools/podio-merge-files | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 645708f2e..c199b8e8c 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -2,6 +2,8 @@ """podio-merge-files tool to merge any number of podio files into one""" import argparse +import sys +import podio import podio.root_io from podio import reading @@ -41,13 +43,29 @@ elif root_format == reading.RootFileFormat.RNTUPLE: else: raise ValueError(f"Input file {args.files[0]} is not a TTree or RNTuple file") +categories = list(reader.categories) +metadata_available = True +try: + categories.remove(args.metadata_category_name) +except ValueError: + metadata_available = False for category in reader.categories: - if category == args.metadata_category_name and args.metadata == "none": - continue - if category == args.metadata_category_name and args.metadata == "first": - all_frames = [reader.get(category)[0]] - else: - all_frames = reader.get(category) + all_frames = reader.get(category) for frame in all_frames: writer.write_frame(frame, category) + +if args.metadata == "none": + sys.exit(0) + +if not metadata_available: + print(f"Warning: metadata category '{args.metadata_category_name}' not found in input files, it will be created") + all_frames = [podio.Frame()] +else: + if args.metadata == "first": + all_frames = [reader.get(args.metadata_category_name)[0]] + else: + all_frames = reader.get(args.metadata_category_name) +for frame in all_frames: + frame.put_parameter("MergeInputFiles", args.files) + writer.write_frame(frame, args.metadata_category_name) From 7efcdeef10a5e8f34cfb8caa4ddbbee45b4b3dfb Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Mon, 18 Nov 2024 15:34:37 +0100 Subject: [PATCH 06/14] Format with black --- tools/podio-merge-files | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index c199b8e8c..5791f2fa6 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -18,7 +18,7 @@ parser.add_argument( choices=["none", "all", "first"], default="first", help="metadata to include in the output file, default: " - "only the one from the first file, other options: all files, none", + "only the one from the first file, other options: all files, none", ) parser.add_argument( "--metadata-category-name", @@ -59,7 +59,9 @@ if args.metadata == "none": sys.exit(0) if not metadata_available: - print(f"Warning: metadata category '{args.metadata_category_name}' not found in input files, it will be created") + print( + f"Warning: metadata category '{args.metadata_category_name}' not found in input files, it will be created" + ) all_frames = [podio.Frame()] else: if args.metadata == "first": From 02c0ebb16e9b7f06e86bd85fa336692a307bb156 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Mon, 18 Nov 2024 15:35:48 +0100 Subject: [PATCH 07/14] Add configuration for the metadata parameter name --- tools/podio-merge-files | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 5791f2fa6..d7a297042 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -25,6 +25,11 @@ parser.add_argument( default="metadata", help="name of the metadata category in the output file, default: metadata", ) +parser.add_argument( + "--metadata-parameter-name", + default="MergeInputFiles", + help="name of the metadata category in the output file, default: metadata", +) args = parser.parse_args() all_files = set() @@ -69,5 +74,5 @@ else: else: all_frames = reader.get(args.metadata_category_name) for frame in all_frames: - frame.put_parameter("MergeInputFiles", args.files) + frame.put_parameter(args.metadata_parameter_name, args.files) writer.write_frame(frame, args.metadata_category_name) From aec7f7ff508f160f87ff7d1270c53d3cfac78019 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Mon, 18 Nov 2024 15:45:25 +0100 Subject: [PATCH 08/14] Fix pre-commit --- tools/podio-merge-files | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index d7a297042..2f22902f1 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -65,7 +65,8 @@ if args.metadata == "none": if not metadata_available: print( - f"Warning: metadata category '{args.metadata_category_name}' not found in input files, it will be created" + f"Warning: metadata category '{args.metadata_category_name}'" + " not found in input files, it will be created" ) all_frames = [podio.Frame()] else: From 926812f28139f9e8a5b44bfe5e6e478fe8171a18 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Mon, 18 Nov 2024 18:20:23 +0100 Subject: [PATCH 09/14] Fix pre-commit --- tools/podio-merge-files | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 2f22902f1..483c9ba36 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -38,32 +38,33 @@ for f in args.files: raise ValueError(f"File {f} is present more than once in the input list") all_files.add(f) -root_format = reading._determine_root_format(args.files[0]) -if root_format == reading.RootFileFormat.TTREE: +ROOT_FORMAT = reading._determine_root_format(args.files[0]) # pylint: disable=protected-access +if ROOT_FORMAT == reading.RootFileFormat.TTREE: reader = podio.root_io.Reader(args.files) writer = podio.root_io.Writer(args.output_file) -elif root_format == reading.RootFileFormat.RNTUPLE: +elif ROOT_FORMAT == reading.RootFileFormat.RNTUPLE: reader = podio.root_io.RNTupleReader(args.files) writer = podio.root_io.RNTupleWriter(args.output_file) else: raise ValueError(f"Input file {args.files[0]} is not a TTree or RNTuple file") categories = list(reader.categories) -metadata_available = True +is_metadata_available = True # pylint: disable=invalid-name try: categories.remove(args.metadata_category_name) except ValueError: - metadata_available = False + is_metadata_available = False for category in reader.categories: all_frames = reader.get(category) + print(all_frames, len(all_frames)) for frame in all_frames: writer.write_frame(frame, category) if args.metadata == "none": sys.exit(0) -if not metadata_available: +if not is_metadata_available: print( f"Warning: metadata category '{args.metadata_category_name}'" " not found in input files, it will be created" From 74e55d6dafd61aed174aad353bc115325a9cac12 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Mon, 18 Nov 2024 18:35:05 +0100 Subject: [PATCH 10/14] Disable pylint check --- tools/podio-merge-files | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 483c9ba36..112133884 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -53,7 +53,7 @@ is_metadata_available = True # pylint: disable=invalid-name try: categories.remove(args.metadata_category_name) except ValueError: - is_metadata_available = False + is_metadata_available = False # pylint: disable=invalid-name for category in reader.categories: all_frames = reader.get(category) From 5bd511b56dc6a7f8f27706123f1f8b68ce499395 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Tue, 19 Nov 2024 09:44:58 +0100 Subject: [PATCH 11/14] Hardcode the metadata parameters --- tools/podio-merge-files | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 112133884..9c85ac42f 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -20,16 +20,6 @@ parser.add_argument( help="metadata to include in the output file, default: " "only the one from the first file, other options: all files, none", ) -parser.add_argument( - "--metadata-category-name", - default="metadata", - help="name of the metadata category in the output file, default: metadata", -) -parser.add_argument( - "--metadata-parameter-name", - default="MergeInputFiles", - help="name of the metadata category in the output file, default: metadata", -) args = parser.parse_args() all_files = set() @@ -51,13 +41,12 @@ else: categories = list(reader.categories) is_metadata_available = True # pylint: disable=invalid-name try: - categories.remove(args.metadata_category_name) + categories.remove("metadata") except ValueError: is_metadata_available = False # pylint: disable=invalid-name -for category in reader.categories: +for category in categories: all_frames = reader.get(category) - print(all_frames, len(all_frames)) for frame in all_frames: writer.write_frame(frame, category) @@ -66,15 +55,15 @@ if args.metadata == "none": if not is_metadata_available: print( - f"Warning: metadata category '{args.metadata_category_name}'" + f"Warning: metadata category 'metadata'" " not found in input files, it will be created" ) all_frames = [podio.Frame()] else: if args.metadata == "first": - all_frames = [reader.get(args.metadata_category_name)[0]] + all_frames = [reader.get("metadata")[0]] else: - all_frames = reader.get(args.metadata_category_name) + all_frames = reader.get("metadata") for frame in all_frames: - frame.put_parameter(args.metadata_parameter_name, args.files) - writer.write_frame(frame, args.metadata_category_name) + frame.put_parameter("MergedInputFiles", args.files) + writer.write_frame(frame, "metadata") From 5fddc2ad399e0b1ead8aefa47f0c2ca34d8ff9f0 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Tue, 19 Nov 2024 09:50:10 +0100 Subject: [PATCH 12/14] Add a comment --- tools/podio-merge-files | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 9c85ac42f..66334250c 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -41,6 +41,7 @@ else: categories = list(reader.categories) is_metadata_available = True # pylint: disable=invalid-name try: + # All frames will be copied as they are except the metadata ones categories.remove("metadata") except ValueError: is_metadata_available = False # pylint: disable=invalid-name From 2ffa8c8af1d826b6b1403c0aad79ef9c3f4a3cb0 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Tue, 19 Nov 2024 10:09:10 +0100 Subject: [PATCH 13/14] Fix f-string --- tools/podio-merge-files | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index 66334250c..de69a69c0 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -56,7 +56,7 @@ if args.metadata == "none": if not is_metadata_available: print( - f"Warning: metadata category 'metadata'" + "Warning: metadata category 'metadata'" " not found in input files, it will be created" ) all_frames = [podio.Frame()] From 977c75b407d5134dcd21f9d444873a4a4d05bd77 Mon Sep 17 00:00:00 2001 From: jmcarcell Date: Tue, 19 Nov 2024 20:55:57 +0100 Subject: [PATCH 14/14] Fix pre-commit --- tools/podio-merge-files | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tools/podio-merge-files b/tools/podio-merge-files index de69a69c0..4b313b78b 100755 --- a/tools/podio-merge-files +++ b/tools/podio-merge-files @@ -55,10 +55,7 @@ if args.metadata == "none": sys.exit(0) if not is_metadata_available: - print( - "Warning: metadata category 'metadata'" - " not found in input files, it will be created" - ) + print("Warning: metadata category 'metadata' not found in the input files, it will be created") all_frames = [podio.Frame()] else: if args.metadata == "first":