From 33db35328782b1a8a399e7ba988e69755b84f365 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 6 Jun 2024 21:21:17 +0200 Subject: [PATCH 01/18] Move sortAlphabetically to public header --- include/podio/utilities/MiscHelpers.h | 32 +++++++++++++++++++++++++++ src/RNTupleWriter.cc | 2 +- src/ROOTWriter.cc | 2 +- src/rootUtils.h | 20 +---------------- 4 files changed, 35 insertions(+), 21 deletions(-) create mode 100644 include/podio/utilities/MiscHelpers.h diff --git a/include/podio/utilities/MiscHelpers.h b/include/podio/utilities/MiscHelpers.h new file mode 100644 index 000000000..522f49366 --- /dev/null +++ b/include/podio/utilities/MiscHelpers.h @@ -0,0 +1,32 @@ +#ifndef PODIO_UTILITIES_MISCHELPERS_H +#define PODIO_UTILITIES_MISCHELPERS_H + +#include +#include +#include + +namespace podio::utils { + +/// Sort the input vector of strings alphabetically, case insensitive. +/// +/// @param strings The strings that should be sorted alphabetically +/// +/// @returns A vector of strings sorted alphabetically, case insensitive +inline std::vector sortAlphabeticaly(std::vector strings) { + // Obviously there is no tolower(std::string) in c++, so this is slightly more + // involved and we make use of the fact that lexicographical_compare works on + // ranges and the fact that we can feed it a dedicated comparison function, + // where we convert the strings to lower case char-by-char. The alternative is + // to make string copies inside the first lambda, transform them to lowercase + // and then use operator< of std::string, which would be effectively + // hand-writing what is happening below. + std::ranges::sort(strings, [](const auto& lhs, const auto& rhs) { + return std::lexicographical_compare( + lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), + [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); + }); + return strings; +} +} // namespace podio::utils + +#endif // PODIO_UTILITIES_MISCHELPERS_H diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 4027079c2..b34e21446 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -58,7 +58,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat const bool new_category = (catInfo.writer == nullptr); if (new_category) { // This is the minimal information that we need for now - catInfo.names = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.names = podio::utils::sortAlphabeticaly(collsToWrite); } std::vector collections; diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 21e2998db..c4a3bc30f 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -33,7 +33,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego // been initialized if (catInfo.tree == nullptr) { catInfo.idTable = frame.getCollectionIDTableForWrite(); - catInfo.collsToWrite = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.collsToWrite = podio::utils::sortAlphabeticaly(collsToWrite); catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str()); catInfo.tree->SetDirectory(m_file.get()); } diff --git a/src/rootUtils.h b/src/rootUtils.h index b022500b6..b58fc6aea 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -2,6 +2,7 @@ #define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #include "podio/CollectionIDTable.h" +#include "podio/utilities/MiscHelpers.h" #include "podio/utilities/RootHelpers.h" #include "podio/utilities/TypeHelpers.h" @@ -280,25 +281,6 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable return collInfo; } -/** - * Sort the input vector of strings alphabetically, case insensitive. - */ -inline std::vector sortAlphabeticaly(std::vector strings) { - // Obviously there is no tolower(std::string) in c++, so this is slightly more - // involved and we make use of the fact that lexicographical_compare works on - // ranges and the fact that we can feed it a dedicated comparison function, - // where we convert the strings to lower case char-by-char. The alternative is - // to make string copies inside the first lambda, transform them to lowercase - // and then use operator< of std::string, which would be effectively - // hand-writing what is happening below. - std::ranges::sort(strings, [](const auto& lhs, const auto& rhs) { - return std::lexicographical_compare( - lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), - [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); - }); - return strings; -} - /** * Check whether existingColls and candidateColls both contain the same * collection names. Returns false if the two vectors differ in content. Inputs From 88f3503941e3a9d3c2047ca0049f839a76b928f9 Mon Sep 17 00:00:00 2001 From: tmadlener Date: Thu, 6 Jun 2024 21:56:14 +0200 Subject: [PATCH 02/18] Implement podio-dump-tool in c++ and wrap it in thin script Drop-in replacement of podio-dump python implementation which gets moved to podio-dump.py because it supports dumping the pre-release legacy files --- CMakeLists.txt | 3 + tools/CMakeLists.txt | 16 ++- tools/json-to-yaml | 15 ++ tools/podio-dump | 239 +++++-------------------------- tools/podio-dump.py | 213 ++++++++++++++++++++++++++++ tools/src/argparseUtils.h | 54 +++++++ tools/src/podio-dump-tool.cpp | 258 ++++++++++++++++++++++++++++++++++ tools/src/tabulate.h | 58 ++++++++ 8 files changed, 651 insertions(+), 205 deletions(-) create mode 100755 tools/json-to-yaml create mode 100755 tools/podio-dump.py create mode 100644 tools/src/argparseUtils.h create mode 100644 tools/src/podio-dump-tool.cpp create mode 100644 tools/src/tabulate.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ff9ea0d0..8830336ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,6 +190,9 @@ if(BUILD_TESTING) include(cmake/podioTest.cmake) add_subdirectory(tests) endif() + +find_package(fmt 9 REQUIRED) + add_subdirectory(tools) add_subdirectory(python) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index cbacb59a2..7d4cf2232 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,4 +1,10 @@ +add_executable(podio-dump-tool src/podio-dump-tool.cpp) +target_link_libraries(podio-dump-tool PRIVATE podio::podio podio::podioIO fmt::fmt) + +install(TARGETS podio-dump-tool EXPORT podioTargets DESTINATION ${CMAKE_INSTALL_BINDIR}) + install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/json-to-yaml DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL_BINDIR}) if(ENABLE_RNTUPLE) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-ttree-to-rntuple DESTINATION ${CMAKE_INSTALL_BINDIR}) @@ -37,6 +43,10 @@ endif() # Add a very basic tests here to make sure that podio-dump at least runs if(BUILD_TESTING) + # Copy these two files into the build tree to be able to test things + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/json-to-yaml ${CMAKE_CURRENT_BINARY_DIR}/json-to-yaml COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/podio-dump ${CMAKE_CURRENT_BINARY_DIR}/podio-dump COPYONLY) + # Helper function for easily creating "tests" that simply execute podio-dump # with different arguments. Not crashing is considered success. # @@ -44,11 +54,11 @@ if(BUILD_TESTING) # name the name of the test # depends_on the target name of the test that produces the required input file function(CREATE_DUMP_TEST name depends_on) - add_test(NAME ${name} COMMAND ./podio-dump ${ARGN}) + add_test(NAME ${name} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/podio-dump ${ARGN}) PODIO_SET_TEST_ENV(${name}) set_tests_properties(${name} PROPERTIES - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) if (depends_on) set_tests_properties(${name} PROPERTIES @@ -69,7 +79,7 @@ if(BUILD_TESTING) set(_name podio-dump-legacy_${name}_${version}) ExternalData_Add_Test(legacy_test_cases NAME ${_name} - COMMAND ./podio-dump ${ARGN} DATA{${PROJECT_SOURCE_DIR}/tests/input_files/${input_file}} + COMMAND ./podio-dump.py ${ARGN} DATA{${PROJECT_SOURCE_DIR}/tests/input_files/${input_file}} ) PODIO_SET_TEST_ENV(${_name}) diff --git a/tools/json-to-yaml b/tools/json-to-yaml new file mode 100755 index 000000000..a9fcbe77b --- /dev/null +++ b/tools/json-to-yaml @@ -0,0 +1,15 @@ +#!/usr/bin/env python3 + +import sys +import json +import yaml + + +def main(): + input_data = sys.stdin.read() + model_def = json.loads(input_data) + print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) + + +if __name__ == "__main__": + main() diff --git a/tools/podio-dump b/tools/podio-dump index ea8143762..04db2ec6b 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -1,202 +1,37 @@ -#!/usr/bin/env python3 -"""podio-dump tool to dump contents of podio files""" - -import sys -import json -import yaml - -from tabulate import tabulate - -from podio_version import __version__ - - -def print_general_info(reader, filename): - """Print an overview of the file contents at the very beginning. - - This prints things like the available categories (and how many entries they - have) as well as the filename, etc. - - Args: - reader (root_io.Reader, sio_io.Reader): An initialized reader - filename (str): The name of the input file - """ - legacy_text = " (this is a legacy file!)" if reader.is_legacy else "" - print( - f"input file: {filename}{legacy_text}\n" - " (written with podio version: " - f"{reader.current_file_version()})\n" - ) - - print("datamodel model definitions stored in this file: ") - for edm_name in reader.datamodel_definitions: - try: - edm_version = reader.current_file_version(edm_name) - print(f" - {edm_name} ({edm_version})") - except KeyError: - print(f" - {edm_name}") - - print() - print("Frame categories in this file:") - cats = [(c, len(reader.get(c))) for c in reader.categories] - print(tabulate(cats, headers=["Name", "Entries"])) - - -def print_frame_detailed(frame): - """Print the Frame in all its glory, dumping every collection via print - - Args: - frame (podio.Frame): The frame to print - """ - print("Collections:") - for name in sorted(frame.getAvailableCollections(), key=str.casefold): - coll = frame.get(name) - print(name, flush=True) - coll.print() - print(flush=True) - - print("\nParameters:", flush=True) - frame.get_parameters().print() - print(flush=True) - - -def print_frame_overview(frame): - """Print a Frame overview, dumping just collection names, types and sizes - - Args: - frame (podio.Frame): The frame to print - """ - rows = [] - for name in sorted(frame.getAvailableCollections(), key=str.casefold): - coll = frame.get(name) - rows.append((name, coll.getValueTypeName().data(), len(coll), f"{coll.getID():0>8x}")) - print("Collections:") - print(tabulate(rows, headers=["Name", "ValueType", "Size", "ID"])) - - rows = [] - for name in sorted(frame.parameters, key=str.casefold): - for par_type, n_pars in frame.get_param_info(name).items(): - rows.append([name, par_type, n_pars]) - print("\nParameters:") - print(tabulate(rows, headers=["Name", "Type", "Elements"])) - - -def print_frame(frame, cat_name, ientry, detailed): - """Print a Frame. - - Args: - frame (podio.Frame): The frame to print - cat_name (str): The category name - ientry (int): The entry number of this Frame - detailed (bool): Print just an overview or dump the whole contents - """ - print("{:#^82}".format(f" {cat_name}: {ientry} ")) # pylint: disable=consider-using-f-string - - if detailed: - print_frame_detailed(frame) - else: - print_frame_overview(frame) - - # Additional new line before the next entry - print("\n", flush=True) - - -def dump_model(reader, model_name): - """Dump the model in yaml format""" - if model_name not in reader.datamodel_definitions: - print(f"ERROR: Cannot dump model '{model_name}' (not present in file)") - return False - - model_def = json.loads(reader.get_datamodel_definition(model_name)) - print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) - - return True - - -def main(args): - """Main""" - from podio.reading import get_reader # pylint: disable=import-outside-toplevel - - try: - reader = get_reader(args.inputfile) - except ValueError as err: - print(f"ERROR: Cannot open file '{args.inputfile}': {err}") - sys.exit(1) - - if args.dump_edm is not None: - if dump_model(reader, args.dump_edm): - sys.exit(0) - else: - sys.exit(1) - - print_general_info(reader, args.inputfile) - if args.category not in reader.categories: - print(f"ERROR: Cannot print category '{args.category}' (not present in file)") - sys.exit(1) - - frames = reader.get(args.category) - for ient in args.entries: - try: - print_frame(frames[ient], args.category, ient, args.detailed) - except IndexError: - print(f'WARNING: Entry no. {ient} in "{args.category}" not present in the file!') - - -def parse_entry_range(ent_string): - """Parse which entries to print""" - try: - return [int(ent_string)] - except ValueError: - pass - - try: - return [int(i) for i in ent_string.split(",")] - except ValueError: - pass - - try: - first, last = [int(i) for i in ent_string.split(":")] - return list(range(first, last + 1)) - except ValueError: - pass - - raise argparse.ArgumentTypeError(f"'{ent_string}' cannot be parsed into a list of entries") - - -if __name__ == "__main__": - import argparse - - # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here - parser = argparse.ArgumentParser(description="Dump contents of a podio file to stdout") - parser.add_argument("inputfile", help="Name of the file to dump content from") - parser.add_argument( - "-c", - "--category", - help="Which Frame category to dump", - default="events", - type=str, - ) - parser.add_argument( - "-e", - "--entries", - help="Which entries to print. A single number, comma separated list of numbers" - ' or "first:last" for an inclusive range of entries. Defaults to the first entry.', - type=parse_entry_range, - default=[0], - ) - parser.add_argument( - "-d", - "--detailed", - help="Dump the full contents not just the collection info", - action="store_true", - default=False, - ) - parser.add_argument( - "--dump-edm", - help="Dump the specified EDM definition from the file in yaml format", - type=str, - default=None, - ) - parser.add_argument("--version", action="version", version=f"podio {__version__}") - - clargs = parser.parse_args() - main(clargs) +#!/usr/bin/env bash + +# Small wrapper script around the c++ executable that dumps the data and some +# potential post-processing that is easier with python + +set -euo pipefail + +THIS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +if ! ls "${THIS_DIR}"/podio-dump-tool > /dev/null 2>&1; then + echo "Could not find podio-dump-tool executable (has it been installed?)" >&2 + exit 1 +fi + +ALL_ARGS=("$@") +DUMP_MODEL=0 + +while [[ "$#" -gt 0 ]]; do + case $1 in + --dump-edm*) + DUMP_MODEL=1 + break + ;; + *) + shift + ;; + esac +done + +if [ ${DUMP_MODEL} = 1 ]; then + if ! ls "${THIS_DIR}"/json-to-yaml > /dev/null 2>&1; then + echo "Could not find the json-to-yaml executable (has it been instaled?)" >&2 + exit 1 + fi + "${THIS_DIR}"/podio-dump-tool "${ALL_ARGS[@]}" | "${THIS_DIR}"/json-to-yaml +else + "${THIS_DIR}"/podio-dump-tool "${ALL_ARGS[@]}" +fi diff --git a/tools/podio-dump.py b/tools/podio-dump.py new file mode 100755 index 000000000..6f654c5e2 --- /dev/null +++ b/tools/podio-dump.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +"""podio-dump tool to dump contents of podio files + +NOTE: This is a legacy implementation in python. A newer version implemented in +c++ is available that is much faster. +""" + +import sys +import json +import yaml + +from tabulate import tabulate + +from podio_version import __version__ + + +def print_general_info(reader, filename): + """Print an overview of the file contents at the very beginning. + + This prints things like the available categories (and how many entries they + have) as well as the filename, etc. + + Args: + reader (root_io.Reader, sio_io.Reader): An initialized reader + filename (str): The name of the input file + """ + legacy_text = " (this is a legacy file!)" if reader.is_legacy else "" + print( + f"input file: {filename}{legacy_text}\n" + " (written with podio version: " + f"{reader.current_file_version()})\n" + ) + + print("datamodel model definitions stored in this file: ") + for edm_name in reader.datamodel_definitions: + try: + edm_version = reader.current_file_version(edm_name) + print(f" - {edm_name} ({edm_version})") + except KeyError: + print(f" - {edm_name}") + + print() + print("Frame categories in this file:") + cats = [(c, len(reader.get(c))) for c in reader.categories] + print(tabulate(cats, headers=["Name", "Entries"])) + + +def print_frame_detailed(frame): + """Print the Frame in all its glory, dumping every collection via print + + Args: + frame (podio.Frame): The frame to print + """ + print("Collections:") + for name in sorted(frame.getAvailableCollections(), key=str.casefold): + coll = frame.get(name) + print(name, flush=True) + coll.print() + print(flush=True) + + print("\nParameters:", flush=True) + frame.get_parameters().print() + print(flush=True) + + +def print_frame_overview(frame): + """Print a Frame overview, dumping just collection names, types and sizes + + Args: + frame (podio.Frame): The frame to print + """ + rows = [] + for name in sorted(frame.getAvailableCollections(), key=str.casefold): + coll = frame.get(name) + rows.append((name, coll.getValueTypeName().data(), len(coll), f"{coll.getID():0>8x}")) + print("Collections:") + print(tabulate(rows, headers=["Name", "ValueType", "Size", "ID"])) + + rows = [] + for name in sorted(frame.parameters, key=str.casefold): + for par_type, n_pars in frame.get_param_info(name).items(): + rows.append([name, par_type, n_pars]) + print("\nParameters:") + print(tabulate(rows, headers=["Name", "Type", "Elements"])) + + +def print_frame(frame, cat_name, ientry, detailed): + """Print a Frame. + + Args: + frame (podio.Frame): The frame to print + cat_name (str): The category name + ientry (int): The entry number of this Frame + detailed (bool): Print just an overview or dump the whole contents + """ + print("{:#^82}".format(f" {cat_name}: {ientry} ")) # pylint: disable=consider-using-f-string + + if detailed: + print_frame_detailed(frame) + else: + print_frame_overview(frame) + + # Additional new line before the next entry + print("\n", flush=True) + + +def dump_model(reader, model_name): + """Dump the model in yaml format""" + if model_name not in reader.datamodel_definitions: + print(f"ERROR: Cannot dump model '{model_name}' (not present in file)") + return False + + model_def = json.loads(reader.get_datamodel_definition(model_name)) + print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) + + return True + + +def main(args): + """Main""" + from podio.reading import get_reader # pylint: disable=import-outside-toplevel + + try: + reader = get_reader(args.inputfile) + except ValueError as err: + print(f"ERROR: Cannot open file '{args.inputfile}': {err}") + sys.exit(1) + + if args.dump_edm is not None: + if dump_model(reader, args.dump_edm): + sys.exit(0) + else: + sys.exit(1) + + print_general_info(reader, args.inputfile) + if args.category not in reader.categories: + print(f"ERROR: Cannot print category '{args.category}' (not present in file)") + sys.exit(1) + + frames = reader.get(args.category) + for ient in args.entries: + try: + print_frame(frames[ient], args.category, ient, args.detailed) + except IndexError: + print(f'WARNING: Entry no. {ient} in "{args.category}" not present in the file!') + + +def parse_entry_range(ent_string): + """Parse which entries to print""" + try: + return [int(ent_string)] + except ValueError: + pass + + try: + return [int(i) for i in ent_string.split(",")] + except ValueError: + pass + + try: + first, last = [int(i) for i in ent_string.split(":")] + return list(range(first, last + 1)) + except ValueError: + pass + + raise argparse.ArgumentTypeError(f"'{ent_string}' cannot be parsed into a list of entries") + + +if __name__ == "__main__": + import argparse + + _epilog = ( + "NOTE: This is a legacy implementation in python. A newer version based on a c++ implementation is available. " + "That version is much faster, but does not handle pre-release version files." + ) + + # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here + parser = argparse.ArgumentParser( + description="Dump contents of a podio file to stdout.", epilog=_epilog + ) + parser.add_argument("inputfile", help="Name of the file to dump content from") + parser.add_argument( + "-c", + "--category", + help="Which Frame category to dump", + default="events", + type=str, + ) + parser.add_argument( + "-e", + "--entries", + help="Which entries to print. A single number, comma separated list of numbers" + ' or "first:last" for an inclusive range of entries. Defaults to the first entry.', + type=parse_entry_range, + default=[0], + ) + parser.add_argument( + "-d", + "--detailed", + help="Dump the full contents not just the collection info", + action="store_true", + default=False, + ) + parser.add_argument( + "--dump-edm", + help="Dump the specified EDM definition from the file in yaml format", + type=str, + default=None, + ) + parser.add_argument("--version", action="version", version=f"podio {__version__}") + + clargs = parser.parse_args() + main(clargs) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h new file mode 100644 index 000000000..7e133012c --- /dev/null +++ b/tools/src/argparseUtils.h @@ -0,0 +1,54 @@ +#ifndef PODIO_TOOLS_ARGPARSEUTILS_H +#define PODIO_TOOLS_ARGPARSEUTILS_H + +#include +#include +#include +#include +#include + +template +auto findFlags(const std::vector& argv, Flags... flags) { + return std::find_if(argv.begin(), argv.end(), [&](const auto& elem) { return ((elem == flags) || ...); }); +} + +inline std::vector splitString(const std::string& str, const char delimiter) { + std::vector tokens; + std::string token; + for (char ch : str) { + if (ch == delimiter) { + if (!token.empty()) { + tokens.push_back(token); + token.clear(); + } + } else { + token += ch; + } + } + if (!token.empty()) { + tokens.push_back(token); + } + return tokens; +} + +inline size_t parseSizeOrExit(const std::string& str) { + const auto parseError = [&str]() { + std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl; + std::exit(1); + }; + + try { + std::size_t pos{}; + auto number = std::stoll(str, &pos); + if (pos != str.size()) { + throw std::invalid_argument(""); + } + return number; + } catch (const std::invalid_argument& err) { + parseError(); + } + + return -1; +} + +#endif // PODIO_TOOLS_ARGPARSEUTILS_H diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp new file mode 100644 index 000000000..1a105a41d --- /dev/null +++ b/tools/src/podio-dump-tool.cpp @@ -0,0 +1,258 @@ +#include "argparseUtils.h" +#include "tabulate.h" + +#include "podio/Frame.h" +#include "podio/Reader.h" +#include "podio/podioVersion.h" +#include "podio/utilities/MiscHelpers.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +template <> +struct fmt::formatter : ostream_formatter {}; + +struct ParsedArgs { + std::string inputFile{}; + std::string category{"events"}; + std::vector events = std::vector(1, 0); + std::string dumpEDM{}; + bool detailed{false}; +}; + +constexpr auto usageMsg = R"(usage: podio-dump [-h] [-c CATEGORY] [-e ENTRIES] [-d] [--version] inputfile)"; + +constexpr auto helpMsg = R"( +Dump contents of a podio file to stdout + +positional arguments: + inputfile Name of the file to dump content from + +options: + -h, --help show this help message and exit + -c CATEGORY, --category CATEGORY + Which Frame category to dump + -e ENTRIES, --entries ENTRIES + Which entries to print. A single number, comma separated list of numbers or "first:last" for an inclusive range of entries. Defaults to the first entry. + -d, --detailed Dump the full contents not just the collection info + --dump-edm DUMP_EDM Dump the specified EDM definition from the file in yaml format + --version show program's version number and exit +)"; + +void printUsageAndExit() { + fmt::print(stderr, "{}\n", usageMsg); + std::exit(1); +} + +auto getArgumentValueOrExit(const std::vector& argv, std::vector::const_iterator it) { + const int argc = argv.size(); + const auto index = std::distance(argv.begin(), it); + if (index > argc - 2) { + printUsageAndExit(); + } + return argv[index + 1]; +} + +std::vector parseEventRange(const std::string& evtRange) { + const auto splitRange = splitString(evtRange, ','); + const auto parseError = [&evtRange]() { + fmt::print(stderr, "'{}' canot be parsed into a list of entries\n", evtRange); + std::exit(1); + }; + + if (splitRange.size() == 1) { + const auto colonSplitRange = splitString(evtRange, ':'); + if (colonSplitRange.size() == 1) { + return {parseSizeOrExit(splitRange[0])}; + } else if (colonSplitRange.size() == 2) { + // we have two numbers signifying an inclusive range + const auto start = parseSizeOrExit(colonSplitRange[0]); + const auto end = parseSizeOrExit(colonSplitRange[1]); + std::vector events(end - start + 1); + std::iota(events.begin(), events.end(), start); + return events; + } else { + parseError(); + } + } else { + std::vector events; + events.reserve(splitRange.size()); + std::transform(splitRange.begin(), splitRange.end(), std::back_inserter(events), + [](const auto& elem) { return parseSizeOrExit(elem); }); + + return events; + } + + parseError(); + return {}; +} + +ParsedArgs parseArgs(std::vector argv) { + // find help or version + if (const auto it = findFlags(argv, "-h", "--help", "--version"); it != argv.end()) { + if (*it == "--version") { + fmt::print("podio {}\n", podio::version::build_version); + } else { + fmt::print("{}\n{}", usageMsg, helpMsg); + } + std::exit(0); + } + + ParsedArgs args; + // detailed flag + if (const auto it = findFlags(argv, "-d", "--detailed"); it != argv.end()) { + args.detailed = true; + argv.erase(it); + } + // category + if (const auto it = findFlags(argv, "-c", "--category"); it != argv.end()) { + args.category = getArgumentValueOrExit(argv, it); + argv.erase(it, it + 2); + } + // event range + if (const auto it = findFlags(argv, "-e", "--entries"); it != argv.end()) { + args.events = parseEventRange(*(it + 1)); + argv.erase(it, it + 2); + } + // dump-edm + if (const auto it = findFlags(argv, "--dump-edm"); it != argv.end()) { + args.dumpEDM = getArgumentValueOrExit(argv, it); + argv.erase(it, it + 2); + } + + if (argv.size() != 1) { + printUsageAndExit(); + } + args.inputFile = argv[0]; + + return args; +} + +template +std::string getTypeString() { + if constexpr (std::is_same_v) { + return "int"; + } else if constexpr (std::is_same_v) { + return "float"; + } else if constexpr (std::is_same_v) { + return "double"; + } else if constexpr (std::is_same_v) { + return "std::string"; + } + + return "unknown"; +} + +template +void getParameterOverview(const podio::Frame& frame, std::vector>& rows) { + const auto typeString = getTypeString(); + for (const auto& parKey : podio::utils::sortAlphabeticaly(frame.getParameterKeys())) { + rows.emplace_back(parKey, typeString, frame.getParameter>(parKey)->size()); + } +} + +void printFrameOverview(const podio::Frame& frame) { + fmt::print("Collections:\n"); + const auto collNames = frame.getAvailableCollections(); + + std::vector> rows; + rows.reserve(collNames.size()); + + for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { + const auto coll = frame.get(name); + rows.emplace_back(name, coll->getValueTypeName(), coll->size(), fmt::format("{:0>8x}", coll->getID())); + } + printTable(rows, {"Name", "ValueType", "Size", "ID"}); + + fmt::print("\nParameters:\n"); + std::vector> paramRows{}; + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + + printTable(paramRows, {"Name", "Type", "Elements"}); +} + +template +void print_flush(fmt::format_string fmtstr, Args&&... args) { + fmt::print(fmtstr, std::forward(args)...); + std::fflush(stdout); +} + +void printFrameDetailed(const podio::Frame& frame) { + fmt::print("Collections:\n"); + const auto collNames = frame.getAvailableCollections(); + for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { + const auto coll = frame.get(name); + print_flush("{}\n", name); + coll->print(); + print_flush("\n"); + } + + print_flush("\nParameters\n:"); + frame.getParameters().print(); + print_flush("\n"); +} + +void printGeneralInfo(const podio::Reader& reader, const std::string& filename) { + fmt::print("input file: {}\n", filename); + fmt::print("datamodel model definitions stored in this file: {}\n\n", reader.getAvailableDatamodels()); + + std::vector> rows{}; + for (const auto& cat : reader.getAvailableCategories()) { + rows.emplace_back(cat, reader.getEntries(std::string(cat))); + } + fmt::print("Frame categories in this file:\nName\tEntries\n"); + printTable(rows, {"Name", "Entries"}); +} + +int dumpEDMDefinition(const podio::Reader& reader, const std::string& modelName) { + const auto availModels = reader.getAvailableDatamodels(); + if (const auto it = std::ranges::find(availModels, modelName); it == availModels.end()) { + fmt::print(stderr, "ERROR: cannot dump model '{}' (not present in file)\n", modelName); + return 1; + } + fmt::print("{}\n", reader.getDatamodelDefinition(modelName)); + return 0; +} + +void printFrame(const podio::Frame& frame, const std::string& category, size_t iEntry, bool detailed) { + fmt::print("{:#^82}\n", fmt::format(" {}: {} ", category, iEntry)); + if (detailed) { + printFrameDetailed(frame); + } else { + printFrameOverview(frame); + } +} + +int main(int argc, char* argv[]) { + // We strip the executable name off directly for parsing + const auto args = parseArgs({argv + 1, argv + argc}); + + auto reader = podio::makeReader(args.inputFile); + if (!args.dumpEDM.empty()) { + return dumpEDMDefinition(reader, args.dumpEDM); + } + + printGeneralInfo(reader, args.inputFile); + + for (const auto event : args.events) { + try { + const auto& frame = reader.readFrame(args.category, event); + printFrame(frame, args.category, event, args.detailed); + } catch (std::runtime_error& err) { + fmt::print(stderr, "{}\n", err.what()); + return 1; + } + } + + return 0; +} diff --git a/tools/src/tabulate.h b/tools/src/tabulate.h new file mode 100644 index 000000000..8de76e7dd --- /dev/null +++ b/tools/src/tabulate.h @@ -0,0 +1,58 @@ +#include + +#include +#include +#include +#include +#include +#include + +template +void printTable(const std::vector>& rows, const std::vector& headers) { + // Simply assume that all rows have the same widths + const auto nCols = headers.size(); + constexpr auto nColsFromRows = std::tuple_size_v>; + if (nCols != nColsFromRows) { + throw std::invalid_argument("headers and rows have to have the same number of columns"); + } + + // Transform all elements into strings first to determine column widths + std::vector> stringRows; + stringRows.reserve(rows.size()); + std::transform(rows.begin(), rows.end(), std::back_inserter(stringRows), [&nCols](const auto& elem) { + std::vector strs; + strs.reserve(nCols); + std::apply([&strs](auto&&... args) { (strs.emplace_back(fmt::format("{}", args)), ...); }, elem); + return strs; + }); + + // First figure out how large each column has to be to fit all the content + std::vector colWidths(nCols, 0); + for (size_t i = 0; i < nCols; ++i) { + colWidths[i] = headers[i].size(); + } + for (const auto& row : stringRows) { + for (size_t iCol = 0; iCol < nCols; ++iCol) { + colWidths[iCol] = std::max(row[iCol].size(), colWidths[iCol]); + } + } + + // print the table header + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:<{}} ", headers[iCol], colWidths[iCol]); + } + fmt::print("\n"); + std::cout << '\n'; + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:->{}} ", "", colWidths[iCol]); + } + fmt::print("\n"); + + // and the contents + for (const auto& row : stringRows) { + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:<{}} ", row[iCol], colWidths[iCol]); + } + fmt::print("\n"); + } +} From 937842c4ed3292dbde3fbc6afd95c3a8727d8ef3 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 14:00:41 +0200 Subject: [PATCH 03/18] Fix test env and paths for roundtrip tests --- cmake/podioTest.cmake | 1 + tests/scripts/dumpModelRoundTrip.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/podioTest.cmake b/cmake/podioTest.cmake index 2b86cb1e1..bb74adb7e 100644 --- a/cmake/podioTest.cmake +++ b/cmake/podioTest.cmake @@ -12,6 +12,7 @@ function(PODIO_SET_TEST_ENV test) IO_HANDLERS=${IO_HANDLERS} PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT} PODIO_BASE=${PROJECT_SOURCE_DIR} + PODIO_BUILD_BASE=${PROJECT_BINARY_DIR} ENABLE_SIO=${ENABLE_SIO} PODIO_BUILD_BASE=${PROJECT_BINARY_DIR} LSAN_OPTIONS=suppressions=${PROJECT_SOURCE_DIR}/tests/root_io/leak_sanitizer_suppressions.txt diff --git a/tests/scripts/dumpModelRoundTrip.sh b/tests/scripts/dumpModelRoundTrip.sh index c0c201a57..dc4970698 100755 --- a/tests/scripts/dumpModelRoundTrip.sh +++ b/tests/scripts/dumpModelRoundTrip.sh @@ -21,7 +21,7 @@ OUTPUT_FOLDER=${INPUT_FILE}.dumped_${EDM_NAME} mkdir -p ${OUTPUT_FOLDER} # Dump the model to a yaml file -${PODIO_BASE}/tools/podio-dump --dump-edm ${EDM_NAME} ${INPUT_FILE} > ${DUMPED_MODEL} +${PODIO_BUILD_BASE}/tools/podio-dump --dump-edm ${EDM_NAME} ${INPUT_FILE} > ${DUMPED_MODEL} # Regenerate the code via the class generator and the freshly dumped model ${PODIO_BASE}/python/podio_class_generator.py \ From 60f350bbba7d3ad8f0c2baf456c76451bad0776d Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 14:29:43 +0200 Subject: [PATCH 04/18] Keep podio-dump.py in set of installed programs for now --- tools/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 7d4cf2232..09151944e 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -4,6 +4,7 @@ target_link_libraries(podio-dump-tool PRIVATE podio::podio podio::podioIO fmt::f install(TARGETS podio-dump-tool EXPORT podioTargets DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump.py DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/json-to-yaml DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL_BINDIR}) if(ENABLE_RNTUPLE) From 6285b8266b209bbd886502fc2152ec2b910ab858 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 14:44:36 +0200 Subject: [PATCH 05/18] Fix minor differences in output --- tools/src/podio-dump-tool.cpp | 13 +++++++++++-- tools/src/tabulate.h | 1 - 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index 1a105a41d..da0ef3e4d 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -204,13 +204,22 @@ void printFrameDetailed(const podio::Frame& frame) { void printGeneralInfo(const podio::Reader& reader, const std::string& filename) { fmt::print("input file: {}\n", filename); - fmt::print("datamodel model definitions stored in this file: {}\n\n", reader.getAvailableDatamodels()); + fmt::print(" (written with podio version: {})\n\n", reader.currentFileVersion()); + fmt::print("datamodel model definitions stored in this file:\n"); + for (const auto& modelName : reader.getAvailableDatamodels()) { + const auto modelVersion = reader.currentFileVersion(modelName); + if (modelVersion) { + fmt::print(" - {} ({})\n", modelName, modelVersion.value()); + } else { + fmt::print(" - {}\n", modelName); + } + } std::vector> rows{}; for (const auto& cat : reader.getAvailableCategories()) { rows.emplace_back(cat, reader.getEntries(std::string(cat))); } - fmt::print("Frame categories in this file:\nName\tEntries\n"); + fmt::print("\nFrame categories in this file:\n"); printTable(rows, {"Name", "Entries"}); } diff --git a/tools/src/tabulate.h b/tools/src/tabulate.h index 8de76e7dd..f16de2962 100644 --- a/tools/src/tabulate.h +++ b/tools/src/tabulate.h @@ -42,7 +42,6 @@ void printTable(const std::vector>& rows, const std::vector fmt::print("{:<{}} ", headers[iCol], colWidths[iCol]); } fmt::print("\n"); - std::cout << '\n'; for (size_t iCol = 0; iCol < nCols; ++iCol) { fmt::print("{:->{}} ", "", colWidths[iCol]); } From 7419b9c1876e085e0ce9dd63df27e563867aac1f Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 14:46:05 +0200 Subject: [PATCH 06/18] Use ranges where possible --- tools/src/podio-dump-tool.cpp | 4 ++-- tools/src/tabulate.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index da0ef3e4d..64bce123e 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -84,8 +84,8 @@ std::vector parseEventRange(const std::string& evtRange) { } else { std::vector events; events.reserve(splitRange.size()); - std::transform(splitRange.begin(), splitRange.end(), std::back_inserter(events), - [](const auto& elem) { return parseSizeOrExit(elem); }); + std::ranges::transform(splitRange, std::back_inserter(events), + [](const auto& elem) { return parseSizeOrExit(elem); }); return events; } diff --git a/tools/src/tabulate.h b/tools/src/tabulate.h index f16de2962..2d3db3010 100644 --- a/tools/src/tabulate.h +++ b/tools/src/tabulate.h @@ -19,7 +19,7 @@ void printTable(const std::vector>& rows, const std::vector // Transform all elements into strings first to determine column widths std::vector> stringRows; stringRows.reserve(rows.size()); - std::transform(rows.begin(), rows.end(), std::back_inserter(stringRows), [&nCols](const auto& elem) { + std::ranges::transform(rows, std::back_inserter(stringRows), [&nCols](const auto& elem) { std::vector strs; strs.reserve(nCols); std::apply([&strs](auto&&... args) { (strs.emplace_back(fmt::format("{}", args)), ...); }, elem); From 102872fe778af1b3a9428045ea0b2e92327f41e7 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 20:19:52 +0200 Subject: [PATCH 07/18] Remove unnecessary flushing of stdout --- tools/src/podio-dump-tool.cpp | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index 64bce123e..27f840293 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -181,25 +181,19 @@ void printFrameOverview(const podio::Frame& frame) { printTable(paramRows, {"Name", "Type", "Elements"}); } -template -void print_flush(fmt::format_string fmtstr, Args&&... args) { - fmt::print(fmtstr, std::forward(args)...); - std::fflush(stdout); -} - void printFrameDetailed(const podio::Frame& frame) { fmt::print("Collections:\n"); const auto collNames = frame.getAvailableCollections(); for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { const auto coll = frame.get(name); - print_flush("{}\n", name); + fmt::println("{}", name); coll->print(); - print_flush("\n"); + fmt::println(""); } - print_flush("\nParameters\n:"); + fmt::println("\nParameters:"); frame.getParameters().print(); - print_flush("\n"); + fmt::println(""); } void printGeneralInfo(const podio::Reader& reader, const std::string& filename) { From f17a264b92c1da75e49c1a0eab2c8f09a0d488c4 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 20:21:16 +0200 Subject: [PATCH 08/18] Switch to println where possible --- tools/src/podio-dump-tool.cpp | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index 27f840293..c1f735124 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -47,7 +47,7 @@ positional arguments: )"; void printUsageAndExit() { - fmt::print(stderr, "{}\n", usageMsg); + fmt::println(stderr, "{}", usageMsg); std::exit(1); } @@ -63,7 +63,7 @@ auto getArgumentValueOrExit(const std::vector& argv, std::vector parseEventRange(const std::string& evtRange) { const auto splitRange = splitString(evtRange, ','); const auto parseError = [&evtRange]() { - fmt::print(stderr, "'{}' canot be parsed into a list of entries\n", evtRange); + fmt::println(stderr, "'{}' canot be parsed into a list of entries", evtRange); std::exit(1); }; @@ -98,7 +98,7 @@ ParsedArgs parseArgs(std::vector argv) { // find help or version if (const auto it = findFlags(argv, "-h", "--help", "--version"); it != argv.end()) { if (*it == "--version") { - fmt::print("podio {}\n", podio::version::build_version); + fmt::println("podio {}", podio::version::build_version); } else { fmt::print("{}\n{}", usageMsg, helpMsg); } @@ -159,7 +159,7 @@ void getParameterOverview(const podio::Frame& frame, std::vector> rows; @@ -171,7 +171,7 @@ void printFrameOverview(const podio::Frame& frame) { } printTable(rows, {"Name", "ValueType", "Size", "ID"}); - fmt::print("\nParameters:\n"); + fmt::println("\nParameters:"); std::vector> paramRows{}; getParameterOverview(frame, paramRows); getParameterOverview(frame, paramRows); @@ -182,7 +182,7 @@ void printFrameOverview(const podio::Frame& frame) { } void printFrameDetailed(const podio::Frame& frame) { - fmt::print("Collections:\n"); + fmt::println("Collections:"); const auto collNames = frame.getAvailableCollections(); for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { const auto coll = frame.get(name); @@ -197,15 +197,15 @@ void printFrameDetailed(const podio::Frame& frame) { } void printGeneralInfo(const podio::Reader& reader, const std::string& filename) { - fmt::print("input file: {}\n", filename); - fmt::print(" (written with podio version: {})\n\n", reader.currentFileVersion()); - fmt::print("datamodel model definitions stored in this file:\n"); + fmt::println("input file: {}", filename); + fmt::println(" (written with podio version: {})", reader.currentFileVersion()); + fmt::println("\ndatamodel model definitions stored in this file:"); for (const auto& modelName : reader.getAvailableDatamodels()) { const auto modelVersion = reader.currentFileVersion(modelName); if (modelVersion) { - fmt::print(" - {} ({})\n", modelName, modelVersion.value()); + fmt::println(" - {} ({})", modelName, modelVersion.value()); } else { - fmt::print(" - {}\n", modelName); + fmt::println(" - {}", modelName); } } @@ -213,22 +213,22 @@ void printGeneralInfo(const podio::Reader& reader, const std::string& filename) for (const auto& cat : reader.getAvailableCategories()) { rows.emplace_back(cat, reader.getEntries(std::string(cat))); } - fmt::print("\nFrame categories in this file:\n"); + fmt::println("\nFrame categories in this file:"); printTable(rows, {"Name", "Entries"}); } int dumpEDMDefinition(const podio::Reader& reader, const std::string& modelName) { const auto availModels = reader.getAvailableDatamodels(); if (const auto it = std::ranges::find(availModels, modelName); it == availModels.end()) { - fmt::print(stderr, "ERROR: cannot dump model '{}' (not present in file)\n", modelName); + fmt::println(stderr, "ERROR: cannot dump model '{}' (not present in file)", modelName); return 1; } - fmt::print("{}\n", reader.getDatamodelDefinition(modelName)); + fmt::println("{}", reader.getDatamodelDefinition(modelName)); return 0; } void printFrame(const podio::Frame& frame, const std::string& category, size_t iEntry, bool detailed) { - fmt::print("{:#^82}\n", fmt::format(" {}: {} ", category, iEntry)); + fmt::println("{:#^82}", fmt::format(" {}: {} ", category, iEntry)); if (detailed) { printFrameDetailed(frame); } else { @@ -252,7 +252,7 @@ int main(int argc, char* argv[]) { const auto& frame = reader.readFrame(args.category, event); printFrame(frame, args.category, event, args.detailed); } catch (std::runtime_error& err) { - fmt::print(stderr, "{}\n", err.what()); + fmt::println(stderr, "{}", err.what()); return 1; } } From 1ce978046b63ad47f3c7cc4cb9ba00cab17d19e4 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 20:23:19 +0200 Subject: [PATCH 09/18] Rename legacy tool to podio-dump-legacy --- tools/CMakeLists.txt | 4 ++-- tools/{podio-dump.py => podio-dump-legacy} | 0 2 files changed, 2 insertions(+), 2 deletions(-) rename tools/{podio-dump.py => podio-dump-legacy} (100%) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 09151944e..c956621aa 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -4,7 +4,7 @@ target_link_libraries(podio-dump-tool PRIVATE podio::podio podio::podioIO fmt::f install(TARGETS podio-dump-tool EXPORT podioTargets DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) -install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump.py DESTINATION ${CMAKE_INSTALL_BINDIR}) +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump-legacy DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/json-to-yaml DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL_BINDIR}) if(ENABLE_RNTUPLE) @@ -80,7 +80,7 @@ if(BUILD_TESTING) set(_name podio-dump-legacy_${name}_${version}) ExternalData_Add_Test(legacy_test_cases NAME ${_name} - COMMAND ./podio-dump.py ${ARGN} DATA{${PROJECT_SOURCE_DIR}/tests/input_files/${input_file}} + COMMAND ./podio-dump-legacy ${ARGN} DATA{${PROJECT_SOURCE_DIR}/tests/input_files/${input_file}} ) PODIO_SET_TEST_ENV(${_name}) diff --git a/tools/podio-dump.py b/tools/podio-dump-legacy similarity index 100% rename from tools/podio-dump.py rename to tools/podio-dump-legacy From 1d84a827bdda4450033cc87ef6679713103fd644 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 20:39:09 +0200 Subject: [PATCH 10/18] Make more things done at compile time --- tools/src/podio-dump-tool.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index c1f735124..ecf0b77e5 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -136,23 +136,25 @@ ParsedArgs parseArgs(std::vector argv) { } template -std::string getTypeString() { +consteval const std::string_view getTypeString() { + using namespace std::string_view_literals; if constexpr (std::is_same_v) { - return "int"; + return "int"sv; } else if constexpr (std::is_same_v) { - return "float"; + return "float"sv; } else if constexpr (std::is_same_v) { - return "double"; + return "double"sv; } else if constexpr (std::is_same_v) { - return "std::string"; + return "std::string"sv; } - return "unknown"; + return "unknown"sv; } template -void getParameterOverview(const podio::Frame& frame, std::vector>& rows) { - const auto typeString = getTypeString(); +void getParameterOverview(const podio::Frame& frame, + std::vector>& rows) { + constexpr auto typeString = getTypeString(); for (const auto& parKey : podio::utils::sortAlphabeticaly(frame.getParameterKeys())) { rows.emplace_back(parKey, typeString, frame.getParameter>(parKey)->size()); } @@ -172,7 +174,7 @@ void printFrameOverview(const podio::Frame& frame) { printTable(rows, {"Name", "ValueType", "Size", "ID"}); fmt::println("\nParameters:"); - std::vector> paramRows{}; + std::vector> paramRows{}; getParameterOverview(frame, paramRows); getParameterOverview(frame, paramRows); getParameterOverview(frame, paramRows); From 8ee6923cc0dc5bf8478a4206f2e781aab9d782cd Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 20:39:18 +0200 Subject: [PATCH 11/18] Enable more tests for sanitizers --- tests/CTestCustom.cmake | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 42d66b3b2..2861e32c0 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -40,27 +40,16 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ pyunittest - podio-dump-help podio-dump-root podio-dump-detailed-root podio-dump-legacy_root_v00-16-06 podio-dump-legacy_root-detailed_v00-16-06 - podio-dump-sio - podio-dump-detailed-sio podio-dump-legacy_sio_v00-16-06 podio-dump-legacy_sio-detailed_v00-16-06 - podio-dump-rntuple - podio-dump-detailed-rntuple - datamodel_def_store_roundtrip_root datamodel_def_store_roundtrip_root_extension - datamodel_def_store_roundtrip_sio - datamodel_def_store_roundtrip_sio_extension - datamodel_def_store_roundtrip_rntuple - datamodel_def_store_roundtrip_rntuple_extension - write_old_data_root read_new_data_root From a49720f98fc8c635c3217632110bbfc7c16a7ce4 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Tue, 1 Apr 2025 20:46:26 +0200 Subject: [PATCH 12/18] Fix pre-commit issues --- tools/json-to-yaml | 2 ++ tools/podio-dump-legacy | 9 +++++---- tools/src/argparseUtils.h | 4 ++-- tools/src/tabulate.h | 5 +++++ 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tools/json-to-yaml b/tools/json-to-yaml index a9fcbe77b..6a59e7d26 100755 --- a/tools/json-to-yaml +++ b/tools/json-to-yaml @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +"""Tiny script to ingest a json string and dump it as a yaml string""" import sys import json @@ -6,6 +7,7 @@ import yaml def main(): + """Main, read json from stdin and dump yaml to stdout""" input_data = sys.stdin.read() model_def = json.loads(input_data) print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) diff --git a/tools/podio-dump-legacy b/tools/podio-dump-legacy index 6f654c5e2..0b2b892fa 100755 --- a/tools/podio-dump-legacy +++ b/tools/podio-dump-legacy @@ -169,14 +169,15 @@ def parse_entry_range(ent_string): if __name__ == "__main__": import argparse - _epilog = ( - "NOTE: This is a legacy implementation in python. A newer version based on a c++ implementation is available. " - "That version is much faster, but does not handle pre-release version files." + _EPILOG = ( + "NOTE: This is a legacy implementation in python. A newer version based on a c++ " + "implementation is available. That version is much faster, but does not handle pre-release" + " version files." ) # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here parser = argparse.ArgumentParser( - description="Dump contents of a podio file to stdout.", epilog=_epilog + description="Dump contents of a podio file to stdout.", epilog=_EPILOG ) parser.add_argument("inputfile", help="Name of the file to dump content from") parser.add_argument( diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h index 7e133012c..255e7737e 100644 --- a/tools/src/argparseUtils.h +++ b/tools/src/argparseUtils.h @@ -1,5 +1,5 @@ -#ifndef PODIO_TOOLS_ARGPARSEUTILS_H -#define PODIO_TOOLS_ARGPARSEUTILS_H +#ifndef PODIO_TOOLS_ARGPARSEUTILS_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TOOLS_ARGPARSEUTILS_H // NOLINT(llvm-header-guard): folder structure not suitable #include #include diff --git a/tools/src/tabulate.h b/tools/src/tabulate.h index 2d3db3010..cc2fd9609 100644 --- a/tools/src/tabulate.h +++ b/tools/src/tabulate.h @@ -1,3 +1,6 @@ +#ifndef PODIO_TOOLS_TABULATE_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TOOLS_TABULATE_H // NOLINT(llvm-header-guard): folder structure not suitable + #include #include @@ -55,3 +58,5 @@ void printTable(const std::vector>& rows, const std::vector fmt::print("\n"); } } + +#endif // PODIO_TOOLS_TABULATE_H From 36ff461c396020abf67eb8fd97cc48f1aa8d186d Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 7 Apr 2025 10:09:48 +0200 Subject: [PATCH 13/18] Simplify parse function Co-authored-by: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> --- tools/src/argparseUtils.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h index 255e7737e..d215fdd8d 100644 --- a/tools/src/argparseUtils.h +++ b/tools/src/argparseUtils.h @@ -32,23 +32,17 @@ inline std::vector splitString(const std::string& str, const char d } inline size_t parseSizeOrExit(const std::string& str) { - const auto parseError = [&str]() { - std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl; - std::exit(1); - }; - try { std::size_t pos{}; - auto number = std::stoll(str, &pos); + const auto number = std::stoull(str, &pos); if (pos != str.size()) { throw std::invalid_argument(""); } return number; - } catch (const std::invalid_argument& err) { - parseError(); + } catch (const std::invalid_argument&) { + std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl; + std::exit(1); } - - return -1; } #endif // PODIO_TOOLS_ARGPARSEUTILS_H From 2d6a26bacb411ff3a01e3b46c251a7a134e0b6e5 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 7 Apr 2025 10:25:38 +0200 Subject: [PATCH 14/18] Use ranges and views in some places --- tools/src/argparseUtils.h | 32 +++++++++++++------------------- tools/src/podio-dump-tool.cpp | 2 +- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h index d215fdd8d..a4c7d1597 100644 --- a/tools/src/argparseUtils.h +++ b/tools/src/argparseUtils.h @@ -2,9 +2,12 @@ #define PODIO_TOOLS_ARGPARSEUTILS_H // NOLINT(llvm-header-guard): folder structure not suitable #include +#include #include +#include #include #include +#include #include template @@ -12,30 +15,21 @@ auto findFlags(const std::vector& argv, Flags... flags) { return std::find_if(argv.begin(), argv.end(), [&](const auto& elem) { return ((elem == flags) || ...); }); } -inline std::vector splitString(const std::string& str, const char delimiter) { - std::vector tokens; - std::string token; - for (char ch : str) { - if (ch == delimiter) { - if (!token.empty()) { - tokens.push_back(token); - token.clear(); - } - } else { - token += ch; - } - } - if (!token.empty()) { - tokens.push_back(token); +inline std::vector splitString(const std::string& str, const char delimiter) { + std::vector tokens; + + for (const auto& token : std::ranges::views::split(str, delimiter)) { + tokens.emplace_back(token.begin(), token.end()); } + return tokens; } -inline size_t parseSizeOrExit(const std::string& str) { +inline size_t parseSizeOrExit(std::string_view str) { try { - std::size_t pos{}; - const auto number = std::stoull(str, &pos); - if (pos != str.size()) { + std::size_t number{}; + const auto [ptr, err] = std::from_chars(str.data(), str.data() + str.size(), number); + if (err != std::errc{} || ptr != str.data() + str.size()) { throw std::invalid_argument(""); } return number; diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index ecf0b77e5..91a334c04 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -22,7 +22,7 @@ struct fmt::formatter : ostream_formatter {}; struct ParsedArgs { std::string inputFile{}; std::string category{"events"}; - std::vector events = std::vector(1, 0); + std::vector events{0}; std::string dumpEDM{}; bool detailed{false}; }; From 8b0419b3f163d1b5776fef4df2fda84bd5cdcb30 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 7 Apr 2025 10:32:03 +0200 Subject: [PATCH 15/18] Disable some tests still for TSan and UBSan --- tests/CTestCustom.cmake | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 2861e32c0..870e20b7b 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -89,6 +89,12 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_rntuple read_interface_rntuple selected_colls_roundtrip_rntuple + + podio-dump-rntuple + podio-dump-detailed-rntuple + + datamodel_def_store_roundtrip_rntuple + datamodel_def_store_roundtrip_rntuple_extension ) endif() @@ -101,6 +107,12 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ write_interface_rntuple read_interface_rntuple selected_colls_roundtrip_rntuple + + podio-dump-rntuple + podio-dump-detailed-rntuple + + datamodel_def_store_roundtrip_rntuple + datamodel_def_store_roundtrip_rntuple_extension ) endif() From 03e3622d2d2f227bd3284b3d8518d8bd5fc03598 Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 7 Apr 2025 14:21:39 +0200 Subject: [PATCH 16/18] Range-ify more of the implementation Co-authored-by: Juan Miguel Carceller <22276694+jmcarcell@users.noreply.github.com> --- tools/src/argparseUtils.h | 24 +++++---------------- tools/src/podio-dump-tool.cpp | 39 ++++++++++++++++++++--------------- 2 files changed, 27 insertions(+), 36 deletions(-) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h index a4c7d1597..3bbb63167 100644 --- a/tools/src/argparseUtils.h +++ b/tools/src/argparseUtils.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -15,28 +14,15 @@ auto findFlags(const std::vector& argv, Flags... flags) { return std::find_if(argv.begin(), argv.end(), [&](const auto& elem) { return ((elem == flags) || ...); }); } -inline std::vector splitString(const std::string& str, const char delimiter) { - std::vector tokens; - - for (const auto& token : std::ranges::views::split(str, delimiter)) { - tokens.emplace_back(token.begin(), token.end()); - } - - return tokens; -} - inline size_t parseSizeOrExit(std::string_view str) { - try { - std::size_t number{}; - const auto [ptr, err] = std::from_chars(str.data(), str.data() + str.size(), number); - if (err != std::errc{} || ptr != str.data() + str.size()) { - throw std::invalid_argument(""); - } - return number; - } catch (const std::invalid_argument&) { + std::cout << "Parsing '" << str << "' into a size_t" << std::endl; + std::size_t number{}; + const auto [ptr, err] = std::from_chars(str.data(), str.end(), number); + if (err != std::errc{} || ptr != str.end()) { std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl; std::exit(1); } + return number; } #endif // PODIO_TOOLS_ARGPARSEUTILS_H diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index 91a334c04..4fa52e18d 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -60,22 +60,29 @@ auto getArgumentValueOrExit(const std::vector& argv, std::vector parseEventRange(const std::string& evtRange) { - const auto splitRange = splitString(evtRange, ','); - const auto parseError = [&evtRange]() { - fmt::println(stderr, "'{}' canot be parsed into a list of entries", evtRange); +std::vector parseEventRange(const std::string_view evtRange) { + auto parseError = [&evtRange]() { + fmt::println(stderr, "'{}' cannot be parsed into a list of entries", evtRange); std::exit(1); }; - if (splitRange.size() == 1) { - const auto colonSplitRange = splitString(evtRange, ':'); - if (colonSplitRange.size() == 1) { - return {parseSizeOrExit(splitRange[0])}; - } else if (colonSplitRange.size() == 2) { - // we have two numbers signifying an inclusive range - const auto start = parseSizeOrExit(colonSplitRange[0]); - const auto end = parseSizeOrExit(colonSplitRange[1]); - std::vector events(end - start + 1); + // Split by ',' and transform into a range of string views + auto splitRange = evtRange | std::views::split(',') | + std::views::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); + + if (std::ranges::distance(splitRange) == 1) { + // Only one entry, check if it's a range (start:end) + auto colonSplitRange = evtRange | std::views::split(':') | + std::views::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); + + const auto it = std::ranges::begin(colonSplitRange); + + if (std::ranges::distance(colonSplitRange) == 1) { + return {parseSizeOrExit(*it)}; + } else if (std::ranges::distance(colonSplitRange) == 2) { + size_t start = parseSizeOrExit(*it); + size_t stop = parseSizeOrExit(*std::ranges::next(it)); + std::vector events(stop - start + 1); std::iota(events.begin(), events.end(), start); return events; } else { @@ -83,10 +90,8 @@ std::vector parseEventRange(const std::string& evtRange) { } } else { std::vector events; - events.reserve(splitRange.size()); - std::ranges::transform(splitRange, std::back_inserter(events), - [](const auto& elem) { return parseSizeOrExit(elem); }); - + events.reserve(std::ranges::distance(splitRange)); + std::ranges::transform(splitRange, std::back_inserter(events), parseSizeOrExit); return events; } From c832bf0124deb0da0bd59a768898825ea625775c Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 7 Apr 2025 14:27:58 +0200 Subject: [PATCH 17/18] Introduce alias namespace for better readability --- tools/src/argparseUtils.h | 2 -- tools/src/podio-dump-tool.cpp | 11 +++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h index 3bbb63167..8c83aedf4 100644 --- a/tools/src/argparseUtils.h +++ b/tools/src/argparseUtils.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -15,7 +14,6 @@ auto findFlags(const std::vector& argv, Flags... flags) { } inline size_t parseSizeOrExit(std::string_view str) { - std::cout << "Parsing '" << str << "' into a size_t" << std::endl; std::size_t number{}; const auto [ptr, err] = std::from_chars(str.data(), str.end(), number); if (err != std::errc{} || ptr != str.end()) { diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index 4fa52e18d..60afa060e 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -13,9 +13,12 @@ #include #include #include +#include #include #include +namespace rv = std::ranges::views; + template <> struct fmt::formatter : ostream_formatter {}; @@ -67,13 +70,13 @@ std::vector parseEventRange(const std::string_view evtRange) { }; // Split by ',' and transform into a range of string views - auto splitRange = evtRange | std::views::split(',') | - std::views::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); + auto splitRange = evtRange | rv::split(',') | + rv::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); if (std::ranges::distance(splitRange) == 1) { // Only one entry, check if it's a range (start:end) - auto colonSplitRange = evtRange | std::views::split(':') | - std::views::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); + auto colonSplitRange = evtRange | rv::split(':') | + rv::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); const auto it = std::ranges::begin(colonSplitRange); From f0217c0d5ebd69b3040814143421a5b5e4a28e7a Mon Sep 17 00:00:00 2001 From: Thomas Madlener Date: Mon, 7 Apr 2025 14:38:15 +0200 Subject: [PATCH 18/18] Improve error message --- tools/src/podio-dump-tool.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp index 60afa060e..6e1b60ff8 100644 --- a/tools/src/podio-dump-tool.cpp +++ b/tools/src/podio-dump-tool.cpp @@ -64,8 +64,8 @@ auto getArgumentValueOrExit(const std::vector& argv, std::vector parseEventRange(const std::string_view evtRange) { - auto parseError = [&evtRange]() { - fmt::println(stderr, "'{}' cannot be parsed into a list of entries", evtRange); + auto parseError = [evtRange]() { + fmt::println(stderr, "error: argument -e/--entries: '{}' cannot be parsed into a list of entries", evtRange); std::exit(1); }; @@ -79,12 +79,13 @@ std::vector parseEventRange(const std::string_view evtRange) { rv::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); const auto it = std::ranges::begin(colonSplitRange); + const auto nextIt = std::ranges::next(it); if (std::ranges::distance(colonSplitRange) == 1) { return {parseSizeOrExit(*it)}; - } else if (std::ranges::distance(colonSplitRange) == 2) { + } else if (std::ranges::distance(colonSplitRange) == 2 && !(*nextIt).empty()) { size_t start = parseSizeOrExit(*it); - size_t stop = parseSizeOrExit(*std::ranges::next(it)); + size_t stop = parseSizeOrExit(*nextIt); std::vector events(stop - start + 1); std::iota(events.begin(), events.end(), start); return events;