diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ff9ea0d0..8830336ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -190,6 +190,9 @@ if(BUILD_TESTING) include(cmake/podioTest.cmake) add_subdirectory(tests) endif() + +find_package(fmt 9 REQUIRED) + add_subdirectory(tools) add_subdirectory(python) diff --git a/cmake/podioTest.cmake b/cmake/podioTest.cmake index 2b86cb1e1..bb74adb7e 100644 --- a/cmake/podioTest.cmake +++ b/cmake/podioTest.cmake @@ -12,6 +12,7 @@ function(PODIO_SET_TEST_ENV test) IO_HANDLERS=${IO_HANDLERS} PODIO_USE_CLANG_FORMAT=${PODIO_USE_CLANG_FORMAT} PODIO_BASE=${PROJECT_SOURCE_DIR} + PODIO_BUILD_BASE=${PROJECT_BINARY_DIR} ENABLE_SIO=${ENABLE_SIO} PODIO_BUILD_BASE=${PROJECT_BINARY_DIR} LSAN_OPTIONS=suppressions=${PROJECT_SOURCE_DIR}/tests/root_io/leak_sanitizer_suppressions.txt diff --git a/include/podio/utilities/MiscHelpers.h b/include/podio/utilities/MiscHelpers.h new file mode 100644 index 000000000..522f49366 --- /dev/null +++ b/include/podio/utilities/MiscHelpers.h @@ -0,0 +1,32 @@ +#ifndef PODIO_UTILITIES_MISCHELPERS_H +#define PODIO_UTILITIES_MISCHELPERS_H + +#include +#include +#include + +namespace podio::utils { + +/// Sort the input vector of strings alphabetically, case insensitive. +/// +/// @param strings The strings that should be sorted alphabetically +/// +/// @returns A vector of strings sorted alphabetically, case insensitive +inline std::vector sortAlphabeticaly(std::vector strings) { + // Obviously there is no tolower(std::string) in c++, so this is slightly more + // involved and we make use of the fact that lexicographical_compare works on + // ranges and the fact that we can feed it a dedicated comparison function, + // where we convert the strings to lower case char-by-char. The alternative is + // to make string copies inside the first lambda, transform them to lowercase + // and then use operator< of std::string, which would be effectively + // hand-writing what is happening below. + std::ranges::sort(strings, [](const auto& lhs, const auto& rhs) { + return std::lexicographical_compare( + lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), + [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); + }); + return strings; +} +} // namespace podio::utils + +#endif // PODIO_UTILITIES_MISCHELPERS_H diff --git a/src/RNTupleWriter.cc b/src/RNTupleWriter.cc index 4027079c2..b34e21446 100644 --- a/src/RNTupleWriter.cc +++ b/src/RNTupleWriter.cc @@ -58,7 +58,7 @@ void RNTupleWriter::writeFrame(const podio::Frame& frame, const std::string& cat const bool new_category = (catInfo.writer == nullptr); if (new_category) { // This is the minimal information that we need for now - catInfo.names = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.names = podio::utils::sortAlphabeticaly(collsToWrite); } std::vector collections; diff --git a/src/ROOTWriter.cc b/src/ROOTWriter.cc index 21e2998db..c4a3bc30f 100644 --- a/src/ROOTWriter.cc +++ b/src/ROOTWriter.cc @@ -33,7 +33,7 @@ void ROOTWriter::writeFrame(const podio::Frame& frame, const std::string& catego // been initialized if (catInfo.tree == nullptr) { catInfo.idTable = frame.getCollectionIDTableForWrite(); - catInfo.collsToWrite = root_utils::sortAlphabeticaly(collsToWrite); + catInfo.collsToWrite = podio::utils::sortAlphabeticaly(collsToWrite); catInfo.tree = new TTree(category.c_str(), (category + " data tree").c_str()); catInfo.tree->SetDirectory(m_file.get()); } diff --git a/src/rootUtils.h b/src/rootUtils.h index b022500b6..b58fc6aea 100644 --- a/src/rootUtils.h +++ b/src/rootUtils.h @@ -2,6 +2,7 @@ #define PODIO_ROOT_UTILS_H // NOLINT(llvm-header-guard): internal headers confuse clang-tidy #include "podio/CollectionIDTable.h" +#include "podio/utilities/MiscHelpers.h" #include "podio/utilities/RootHelpers.h" #include "podio/utilities/TypeHelpers.h" @@ -280,25 +281,6 @@ inline auto reconstructCollectionInfo(TTree* eventTree, podio::CollectionIDTable return collInfo; } -/** - * Sort the input vector of strings alphabetically, case insensitive. - */ -inline std::vector sortAlphabeticaly(std::vector strings) { - // Obviously there is no tolower(std::string) in c++, so this is slightly more - // involved and we make use of the fact that lexicographical_compare works on - // ranges and the fact that we can feed it a dedicated comparison function, - // where we convert the strings to lower case char-by-char. The alternative is - // to make string copies inside the first lambda, transform them to lowercase - // and then use operator< of std::string, which would be effectively - // hand-writing what is happening below. - std::ranges::sort(strings, [](const auto& lhs, const auto& rhs) { - return std::lexicographical_compare( - lhs.begin(), lhs.end(), rhs.begin(), rhs.end(), - [](const auto& cl, const auto& cr) { return std::tolower(cl) < std::tolower(cr); }); - }); - return strings; -} - /** * Check whether existingColls and candidateColls both contain the same * collection names. Returns false if the two vectors differ in content. Inputs diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake index 42d66b3b2..870e20b7b 100644 --- a/tests/CTestCustom.cmake +++ b/tests/CTestCustom.cmake @@ -40,27 +40,16 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ pyunittest - podio-dump-help podio-dump-root podio-dump-detailed-root podio-dump-legacy_root_v00-16-06 podio-dump-legacy_root-detailed_v00-16-06 - podio-dump-sio - podio-dump-detailed-sio podio-dump-legacy_sio_v00-16-06 podio-dump-legacy_sio-detailed_v00-16-06 - podio-dump-rntuple - podio-dump-detailed-rntuple - datamodel_def_store_roundtrip_root datamodel_def_store_roundtrip_root_extension - datamodel_def_store_roundtrip_sio - datamodel_def_store_roundtrip_sio_extension - datamodel_def_store_roundtrip_rntuple - datamodel_def_store_roundtrip_rntuple_extension - write_old_data_root read_new_data_root @@ -100,6 +89,12 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ read_rntuple read_interface_rntuple selected_colls_roundtrip_rntuple + + podio-dump-rntuple + podio-dump-detailed-rntuple + + datamodel_def_store_roundtrip_rntuple + datamodel_def_store_roundtrip_rntuple_extension ) endif() @@ -112,6 +107,12 @@ if ((NOT "@FORCE_RUN_ALL_TESTS@" STREQUAL "ON") AND (NOT "@USE_SANITIZER@" STREQ write_interface_rntuple read_interface_rntuple selected_colls_roundtrip_rntuple + + podio-dump-rntuple + podio-dump-detailed-rntuple + + datamodel_def_store_roundtrip_rntuple + datamodel_def_store_roundtrip_rntuple_extension ) endif() diff --git a/tests/scripts/dumpModelRoundTrip.sh b/tests/scripts/dumpModelRoundTrip.sh index c0c201a57..dc4970698 100755 --- a/tests/scripts/dumpModelRoundTrip.sh +++ b/tests/scripts/dumpModelRoundTrip.sh @@ -21,7 +21,7 @@ OUTPUT_FOLDER=${INPUT_FILE}.dumped_${EDM_NAME} mkdir -p ${OUTPUT_FOLDER} # Dump the model to a yaml file -${PODIO_BASE}/tools/podio-dump --dump-edm ${EDM_NAME} ${INPUT_FILE} > ${DUMPED_MODEL} +${PODIO_BUILD_BASE}/tools/podio-dump --dump-edm ${EDM_NAME} ${INPUT_FILE} > ${DUMPED_MODEL} # Regenerate the code via the class generator and the freshly dumped model ${PODIO_BASE}/python/podio_class_generator.py \ diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index cbacb59a2..c956621aa 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -1,4 +1,11 @@ +add_executable(podio-dump-tool src/podio-dump-tool.cpp) +target_link_libraries(podio-dump-tool PRIVATE podio::podio podio::podioIO fmt::fmt) + +install(TARGETS podio-dump-tool EXPORT podioTargets DESTINATION ${CMAKE_INSTALL_BINDIR}) + install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump DESTINATION ${CMAKE_INSTALL_BINDIR}) +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-dump-legacy DESTINATION ${CMAKE_INSTALL_BINDIR}) +install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/json-to-yaml DESTINATION ${CMAKE_INSTALL_BINDIR}) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-vis DESTINATION ${CMAKE_INSTALL_BINDIR}) if(ENABLE_RNTUPLE) install(PROGRAMS ${CMAKE_CURRENT_LIST_DIR}/podio-ttree-to-rntuple DESTINATION ${CMAKE_INSTALL_BINDIR}) @@ -37,6 +44,10 @@ endif() # Add a very basic tests here to make sure that podio-dump at least runs if(BUILD_TESTING) + # Copy these two files into the build tree to be able to test things + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/json-to-yaml ${CMAKE_CURRENT_BINARY_DIR}/json-to-yaml COPYONLY) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/podio-dump ${CMAKE_CURRENT_BINARY_DIR}/podio-dump COPYONLY) + # Helper function for easily creating "tests" that simply execute podio-dump # with different arguments. Not crashing is considered success. # @@ -44,11 +55,11 @@ if(BUILD_TESTING) # name the name of the test # depends_on the target name of the test that produces the required input file function(CREATE_DUMP_TEST name depends_on) - add_test(NAME ${name} COMMAND ./podio-dump ${ARGN}) + add_test(NAME ${name} COMMAND ${CMAKE_CURRENT_BINARY_DIR}/podio-dump ${ARGN}) PODIO_SET_TEST_ENV(${name}) set_tests_properties(${name} PROPERTIES - WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} ) if (depends_on) set_tests_properties(${name} PROPERTIES @@ -69,7 +80,7 @@ if(BUILD_TESTING) set(_name podio-dump-legacy_${name}_${version}) ExternalData_Add_Test(legacy_test_cases NAME ${_name} - COMMAND ./podio-dump ${ARGN} DATA{${PROJECT_SOURCE_DIR}/tests/input_files/${input_file}} + COMMAND ./podio-dump-legacy ${ARGN} DATA{${PROJECT_SOURCE_DIR}/tests/input_files/${input_file}} ) PODIO_SET_TEST_ENV(${_name}) diff --git a/tools/json-to-yaml b/tools/json-to-yaml new file mode 100755 index 000000000..6a59e7d26 --- /dev/null +++ b/tools/json-to-yaml @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +"""Tiny script to ingest a json string and dump it as a yaml string""" + +import sys +import json +import yaml + + +def main(): + """Main, read json from stdin and dump yaml to stdout""" + input_data = sys.stdin.read() + model_def = json.loads(input_data) + print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) + + +if __name__ == "__main__": + main() diff --git a/tools/podio-dump b/tools/podio-dump index ea8143762..04db2ec6b 100755 --- a/tools/podio-dump +++ b/tools/podio-dump @@ -1,202 +1,37 @@ -#!/usr/bin/env python3 -"""podio-dump tool to dump contents of podio files""" - -import sys -import json -import yaml - -from tabulate import tabulate - -from podio_version import __version__ - - -def print_general_info(reader, filename): - """Print an overview of the file contents at the very beginning. - - This prints things like the available categories (and how many entries they - have) as well as the filename, etc. - - Args: - reader (root_io.Reader, sio_io.Reader): An initialized reader - filename (str): The name of the input file - """ - legacy_text = " (this is a legacy file!)" if reader.is_legacy else "" - print( - f"input file: {filename}{legacy_text}\n" - " (written with podio version: " - f"{reader.current_file_version()})\n" - ) - - print("datamodel model definitions stored in this file: ") - for edm_name in reader.datamodel_definitions: - try: - edm_version = reader.current_file_version(edm_name) - print(f" - {edm_name} ({edm_version})") - except KeyError: - print(f" - {edm_name}") - - print() - print("Frame categories in this file:") - cats = [(c, len(reader.get(c))) for c in reader.categories] - print(tabulate(cats, headers=["Name", "Entries"])) - - -def print_frame_detailed(frame): - """Print the Frame in all its glory, dumping every collection via print - - Args: - frame (podio.Frame): The frame to print - """ - print("Collections:") - for name in sorted(frame.getAvailableCollections(), key=str.casefold): - coll = frame.get(name) - print(name, flush=True) - coll.print() - print(flush=True) - - print("\nParameters:", flush=True) - frame.get_parameters().print() - print(flush=True) - - -def print_frame_overview(frame): - """Print a Frame overview, dumping just collection names, types and sizes - - Args: - frame (podio.Frame): The frame to print - """ - rows = [] - for name in sorted(frame.getAvailableCollections(), key=str.casefold): - coll = frame.get(name) - rows.append((name, coll.getValueTypeName().data(), len(coll), f"{coll.getID():0>8x}")) - print("Collections:") - print(tabulate(rows, headers=["Name", "ValueType", "Size", "ID"])) - - rows = [] - for name in sorted(frame.parameters, key=str.casefold): - for par_type, n_pars in frame.get_param_info(name).items(): - rows.append([name, par_type, n_pars]) - print("\nParameters:") - print(tabulate(rows, headers=["Name", "Type", "Elements"])) - - -def print_frame(frame, cat_name, ientry, detailed): - """Print a Frame. - - Args: - frame (podio.Frame): The frame to print - cat_name (str): The category name - ientry (int): The entry number of this Frame - detailed (bool): Print just an overview or dump the whole contents - """ - print("{:#^82}".format(f" {cat_name}: {ientry} ")) # pylint: disable=consider-using-f-string - - if detailed: - print_frame_detailed(frame) - else: - print_frame_overview(frame) - - # Additional new line before the next entry - print("\n", flush=True) - - -def dump_model(reader, model_name): - """Dump the model in yaml format""" - if model_name not in reader.datamodel_definitions: - print(f"ERROR: Cannot dump model '{model_name}' (not present in file)") - return False - - model_def = json.loads(reader.get_datamodel_definition(model_name)) - print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) - - return True - - -def main(args): - """Main""" - from podio.reading import get_reader # pylint: disable=import-outside-toplevel - - try: - reader = get_reader(args.inputfile) - except ValueError as err: - print(f"ERROR: Cannot open file '{args.inputfile}': {err}") - sys.exit(1) - - if args.dump_edm is not None: - if dump_model(reader, args.dump_edm): - sys.exit(0) - else: - sys.exit(1) - - print_general_info(reader, args.inputfile) - if args.category not in reader.categories: - print(f"ERROR: Cannot print category '{args.category}' (not present in file)") - sys.exit(1) - - frames = reader.get(args.category) - for ient in args.entries: - try: - print_frame(frames[ient], args.category, ient, args.detailed) - except IndexError: - print(f'WARNING: Entry no. {ient} in "{args.category}" not present in the file!') - - -def parse_entry_range(ent_string): - """Parse which entries to print""" - try: - return [int(ent_string)] - except ValueError: - pass - - try: - return [int(i) for i in ent_string.split(",")] - except ValueError: - pass - - try: - first, last = [int(i) for i in ent_string.split(":")] - return list(range(first, last + 1)) - except ValueError: - pass - - raise argparse.ArgumentTypeError(f"'{ent_string}' cannot be parsed into a list of entries") - - -if __name__ == "__main__": - import argparse - - # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here - parser = argparse.ArgumentParser(description="Dump contents of a podio file to stdout") - parser.add_argument("inputfile", help="Name of the file to dump content from") - parser.add_argument( - "-c", - "--category", - help="Which Frame category to dump", - default="events", - type=str, - ) - parser.add_argument( - "-e", - "--entries", - help="Which entries to print. A single number, comma separated list of numbers" - ' or "first:last" for an inclusive range of entries. Defaults to the first entry.', - type=parse_entry_range, - default=[0], - ) - parser.add_argument( - "-d", - "--detailed", - help="Dump the full contents not just the collection info", - action="store_true", - default=False, - ) - parser.add_argument( - "--dump-edm", - help="Dump the specified EDM definition from the file in yaml format", - type=str, - default=None, - ) - parser.add_argument("--version", action="version", version=f"podio {__version__}") - - clargs = parser.parse_args() - main(clargs) +#!/usr/bin/env bash + +# Small wrapper script around the c++ executable that dumps the data and some +# potential post-processing that is easier with python + +set -euo pipefail + +THIS_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +if ! ls "${THIS_DIR}"/podio-dump-tool > /dev/null 2>&1; then + echo "Could not find podio-dump-tool executable (has it been installed?)" >&2 + exit 1 +fi + +ALL_ARGS=("$@") +DUMP_MODEL=0 + +while [[ "$#" -gt 0 ]]; do + case $1 in + --dump-edm*) + DUMP_MODEL=1 + break + ;; + *) + shift + ;; + esac +done + +if [ ${DUMP_MODEL} = 1 ]; then + if ! ls "${THIS_DIR}"/json-to-yaml > /dev/null 2>&1; then + echo "Could not find the json-to-yaml executable (has it been instaled?)" >&2 + exit 1 + fi + "${THIS_DIR}"/podio-dump-tool "${ALL_ARGS[@]}" | "${THIS_DIR}"/json-to-yaml +else + "${THIS_DIR}"/podio-dump-tool "${ALL_ARGS[@]}" +fi diff --git a/tools/podio-dump-legacy b/tools/podio-dump-legacy new file mode 100755 index 000000000..0b2b892fa --- /dev/null +++ b/tools/podio-dump-legacy @@ -0,0 +1,214 @@ +#!/usr/bin/env python3 +"""podio-dump tool to dump contents of podio files + +NOTE: This is a legacy implementation in python. A newer version implemented in +c++ is available that is much faster. +""" + +import sys +import json +import yaml + +from tabulate import tabulate + +from podio_version import __version__ + + +def print_general_info(reader, filename): + """Print an overview of the file contents at the very beginning. + + This prints things like the available categories (and how many entries they + have) as well as the filename, etc. + + Args: + reader (root_io.Reader, sio_io.Reader): An initialized reader + filename (str): The name of the input file + """ + legacy_text = " (this is a legacy file!)" if reader.is_legacy else "" + print( + f"input file: {filename}{legacy_text}\n" + " (written with podio version: " + f"{reader.current_file_version()})\n" + ) + + print("datamodel model definitions stored in this file: ") + for edm_name in reader.datamodel_definitions: + try: + edm_version = reader.current_file_version(edm_name) + print(f" - {edm_name} ({edm_version})") + except KeyError: + print(f" - {edm_name}") + + print() + print("Frame categories in this file:") + cats = [(c, len(reader.get(c))) for c in reader.categories] + print(tabulate(cats, headers=["Name", "Entries"])) + + +def print_frame_detailed(frame): + """Print the Frame in all its glory, dumping every collection via print + + Args: + frame (podio.Frame): The frame to print + """ + print("Collections:") + for name in sorted(frame.getAvailableCollections(), key=str.casefold): + coll = frame.get(name) + print(name, flush=True) + coll.print() + print(flush=True) + + print("\nParameters:", flush=True) + frame.get_parameters().print() + print(flush=True) + + +def print_frame_overview(frame): + """Print a Frame overview, dumping just collection names, types and sizes + + Args: + frame (podio.Frame): The frame to print + """ + rows = [] + for name in sorted(frame.getAvailableCollections(), key=str.casefold): + coll = frame.get(name) + rows.append((name, coll.getValueTypeName().data(), len(coll), f"{coll.getID():0>8x}")) + print("Collections:") + print(tabulate(rows, headers=["Name", "ValueType", "Size", "ID"])) + + rows = [] + for name in sorted(frame.parameters, key=str.casefold): + for par_type, n_pars in frame.get_param_info(name).items(): + rows.append([name, par_type, n_pars]) + print("\nParameters:") + print(tabulate(rows, headers=["Name", "Type", "Elements"])) + + +def print_frame(frame, cat_name, ientry, detailed): + """Print a Frame. + + Args: + frame (podio.Frame): The frame to print + cat_name (str): The category name + ientry (int): The entry number of this Frame + detailed (bool): Print just an overview or dump the whole contents + """ + print("{:#^82}".format(f" {cat_name}: {ientry} ")) # pylint: disable=consider-using-f-string + + if detailed: + print_frame_detailed(frame) + else: + print_frame_overview(frame) + + # Additional new line before the next entry + print("\n", flush=True) + + +def dump_model(reader, model_name): + """Dump the model in yaml format""" + if model_name not in reader.datamodel_definitions: + print(f"ERROR: Cannot dump model '{model_name}' (not present in file)") + return False + + model_def = json.loads(reader.get_datamodel_definition(model_name)) + print(yaml.dump(model_def, sort_keys=False, default_flow_style=False)) + + return True + + +def main(args): + """Main""" + from podio.reading import get_reader # pylint: disable=import-outside-toplevel + + try: + reader = get_reader(args.inputfile) + except ValueError as err: + print(f"ERROR: Cannot open file '{args.inputfile}': {err}") + sys.exit(1) + + if args.dump_edm is not None: + if dump_model(reader, args.dump_edm): + sys.exit(0) + else: + sys.exit(1) + + print_general_info(reader, args.inputfile) + if args.category not in reader.categories: + print(f"ERROR: Cannot print category '{args.category}' (not present in file)") + sys.exit(1) + + frames = reader.get(args.category) + for ient in args.entries: + try: + print_frame(frames[ient], args.category, ient, args.detailed) + except IndexError: + print(f'WARNING: Entry no. {ient} in "{args.category}" not present in the file!') + + +def parse_entry_range(ent_string): + """Parse which entries to print""" + try: + return [int(ent_string)] + except ValueError: + pass + + try: + return [int(i) for i in ent_string.split(",")] + except ValueError: + pass + + try: + first, last = [int(i) for i in ent_string.split(":")] + return list(range(first, last + 1)) + except ValueError: + pass + + raise argparse.ArgumentTypeError(f"'{ent_string}' cannot be parsed into a list of entries") + + +if __name__ == "__main__": + import argparse + + _EPILOG = ( + "NOTE: This is a legacy implementation in python. A newer version based on a c++ " + "implementation is available. That version is much faster, but does not handle pre-release" + " version files." + ) + + # pylint: disable=invalid-name # before 2.5.0 pylint is too strict with the naming here + parser = argparse.ArgumentParser( + description="Dump contents of a podio file to stdout.", epilog=_EPILOG + ) + parser.add_argument("inputfile", help="Name of the file to dump content from") + parser.add_argument( + "-c", + "--category", + help="Which Frame category to dump", + default="events", + type=str, + ) + parser.add_argument( + "-e", + "--entries", + help="Which entries to print. A single number, comma separated list of numbers" + ' or "first:last" for an inclusive range of entries. Defaults to the first entry.', + type=parse_entry_range, + default=[0], + ) + parser.add_argument( + "-d", + "--detailed", + help="Dump the full contents not just the collection info", + action="store_true", + default=False, + ) + parser.add_argument( + "--dump-edm", + help="Dump the specified EDM definition from the file in yaml format", + type=str, + default=None, + ) + parser.add_argument("--version", action="version", version=f"podio {__version__}") + + clargs = parser.parse_args() + main(clargs) diff --git a/tools/src/argparseUtils.h b/tools/src/argparseUtils.h new file mode 100644 index 000000000..8c83aedf4 --- /dev/null +++ b/tools/src/argparseUtils.h @@ -0,0 +1,26 @@ +#ifndef PODIO_TOOLS_ARGPARSEUTILS_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TOOLS_ARGPARSEUTILS_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include +#include +#include +#include +#include +#include + +template +auto findFlags(const std::vector& argv, Flags... flags) { + return std::find_if(argv.begin(), argv.end(), [&](const auto& elem) { return ((elem == flags) || ...); }); +} + +inline size_t parseSizeOrExit(std::string_view str) { + std::size_t number{}; + const auto [ptr, err] = std::from_chars(str.data(), str.end(), number); + if (err != std::errc{} || ptr != str.end()) { + std::cerr << "'" << str << "' cannot be parsed into an integer number" << std::endl; + std::exit(1); + } + return number; +} + +#endif // PODIO_TOOLS_ARGPARSEUTILS_H diff --git a/tools/src/podio-dump-tool.cpp b/tools/src/podio-dump-tool.cpp new file mode 100644 index 000000000..6e1b60ff8 --- /dev/null +++ b/tools/src/podio-dump-tool.cpp @@ -0,0 +1,272 @@ +#include "argparseUtils.h" +#include "tabulate.h" + +#include "podio/Frame.h" +#include "podio/Reader.h" +#include "podio/podioVersion.h" +#include "podio/utilities/MiscHelpers.h" + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace rv = std::ranges::views; + +template <> +struct fmt::formatter : ostream_formatter {}; + +struct ParsedArgs { + std::string inputFile{}; + std::string category{"events"}; + std::vector events{0}; + std::string dumpEDM{}; + bool detailed{false}; +}; + +constexpr auto usageMsg = R"(usage: podio-dump [-h] [-c CATEGORY] [-e ENTRIES] [-d] [--version] inputfile)"; + +constexpr auto helpMsg = R"( +Dump contents of a podio file to stdout + +positional arguments: + inputfile Name of the file to dump content from + +options: + -h, --help show this help message and exit + -c CATEGORY, --category CATEGORY + Which Frame category to dump + -e ENTRIES, --entries ENTRIES + Which entries to print. A single number, comma separated list of numbers or "first:last" for an inclusive range of entries. Defaults to the first entry. + -d, --detailed Dump the full contents not just the collection info + --dump-edm DUMP_EDM Dump the specified EDM definition from the file in yaml format + --version show program's version number and exit +)"; + +void printUsageAndExit() { + fmt::println(stderr, "{}", usageMsg); + std::exit(1); +} + +auto getArgumentValueOrExit(const std::vector& argv, std::vector::const_iterator it) { + const int argc = argv.size(); + const auto index = std::distance(argv.begin(), it); + if (index > argc - 2) { + printUsageAndExit(); + } + return argv[index + 1]; +} + +std::vector parseEventRange(const std::string_view evtRange) { + auto parseError = [evtRange]() { + fmt::println(stderr, "error: argument -e/--entries: '{}' cannot be parsed into a list of entries", evtRange); + std::exit(1); + }; + + // Split by ',' and transform into a range of string views + auto splitRange = evtRange | rv::split(',') | + rv::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); + + if (std::ranges::distance(splitRange) == 1) { + // Only one entry, check if it's a range (start:end) + auto colonSplitRange = evtRange | rv::split(':') | + rv::transform([](auto&& subrange) { return std::string_view(subrange.begin(), subrange.end()); }); + + const auto it = std::ranges::begin(colonSplitRange); + const auto nextIt = std::ranges::next(it); + + if (std::ranges::distance(colonSplitRange) == 1) { + return {parseSizeOrExit(*it)}; + } else if (std::ranges::distance(colonSplitRange) == 2 && !(*nextIt).empty()) { + size_t start = parseSizeOrExit(*it); + size_t stop = parseSizeOrExit(*nextIt); + std::vector events(stop - start + 1); + std::iota(events.begin(), events.end(), start); + return events; + } else { + parseError(); + } + } else { + std::vector events; + events.reserve(std::ranges::distance(splitRange)); + std::ranges::transform(splitRange, std::back_inserter(events), parseSizeOrExit); + return events; + } + + parseError(); + return {}; +} + +ParsedArgs parseArgs(std::vector argv) { + // find help or version + if (const auto it = findFlags(argv, "-h", "--help", "--version"); it != argv.end()) { + if (*it == "--version") { + fmt::println("podio {}", podio::version::build_version); + } else { + fmt::print("{}\n{}", usageMsg, helpMsg); + } + std::exit(0); + } + + ParsedArgs args; + // detailed flag + if (const auto it = findFlags(argv, "-d", "--detailed"); it != argv.end()) { + args.detailed = true; + argv.erase(it); + } + // category + if (const auto it = findFlags(argv, "-c", "--category"); it != argv.end()) { + args.category = getArgumentValueOrExit(argv, it); + argv.erase(it, it + 2); + } + // event range + if (const auto it = findFlags(argv, "-e", "--entries"); it != argv.end()) { + args.events = parseEventRange(*(it + 1)); + argv.erase(it, it + 2); + } + // dump-edm + if (const auto it = findFlags(argv, "--dump-edm"); it != argv.end()) { + args.dumpEDM = getArgumentValueOrExit(argv, it); + argv.erase(it, it + 2); + } + + if (argv.size() != 1) { + printUsageAndExit(); + } + args.inputFile = argv[0]; + + return args; +} + +template +consteval const std::string_view getTypeString() { + using namespace std::string_view_literals; + if constexpr (std::is_same_v) { + return "int"sv; + } else if constexpr (std::is_same_v) { + return "float"sv; + } else if constexpr (std::is_same_v) { + return "double"sv; + } else if constexpr (std::is_same_v) { + return "std::string"sv; + } + + return "unknown"sv; +} + +template +void getParameterOverview(const podio::Frame& frame, + std::vector>& rows) { + constexpr auto typeString = getTypeString(); + for (const auto& parKey : podio::utils::sortAlphabeticaly(frame.getParameterKeys())) { + rows.emplace_back(parKey, typeString, frame.getParameter>(parKey)->size()); + } +} + +void printFrameOverview(const podio::Frame& frame) { + fmt::println("Collections:"); + const auto collNames = frame.getAvailableCollections(); + + std::vector> rows; + rows.reserve(collNames.size()); + + for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { + const auto coll = frame.get(name); + rows.emplace_back(name, coll->getValueTypeName(), coll->size(), fmt::format("{:0>8x}", coll->getID())); + } + printTable(rows, {"Name", "ValueType", "Size", "ID"}); + + fmt::println("\nParameters:"); + std::vector> paramRows{}; + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + getParameterOverview(frame, paramRows); + + printTable(paramRows, {"Name", "Type", "Elements"}); +} + +void printFrameDetailed(const podio::Frame& frame) { + fmt::println("Collections:"); + const auto collNames = frame.getAvailableCollections(); + for (const auto& name : podio::utils::sortAlphabeticaly(collNames)) { + const auto coll = frame.get(name); + fmt::println("{}", name); + coll->print(); + fmt::println(""); + } + + fmt::println("\nParameters:"); + frame.getParameters().print(); + fmt::println(""); +} + +void printGeneralInfo(const podio::Reader& reader, const std::string& filename) { + fmt::println("input file: {}", filename); + fmt::println(" (written with podio version: {})", reader.currentFileVersion()); + fmt::println("\ndatamodel model definitions stored in this file:"); + for (const auto& modelName : reader.getAvailableDatamodels()) { + const auto modelVersion = reader.currentFileVersion(modelName); + if (modelVersion) { + fmt::println(" - {} ({})", modelName, modelVersion.value()); + } else { + fmt::println(" - {}", modelName); + } + } + + std::vector> rows{}; + for (const auto& cat : reader.getAvailableCategories()) { + rows.emplace_back(cat, reader.getEntries(std::string(cat))); + } + fmt::println("\nFrame categories in this file:"); + printTable(rows, {"Name", "Entries"}); +} + +int dumpEDMDefinition(const podio::Reader& reader, const std::string& modelName) { + const auto availModels = reader.getAvailableDatamodels(); + if (const auto it = std::ranges::find(availModels, modelName); it == availModels.end()) { + fmt::println(stderr, "ERROR: cannot dump model '{}' (not present in file)", modelName); + return 1; + } + fmt::println("{}", reader.getDatamodelDefinition(modelName)); + return 0; +} + +void printFrame(const podio::Frame& frame, const std::string& category, size_t iEntry, bool detailed) { + fmt::println("{:#^82}", fmt::format(" {}: {} ", category, iEntry)); + if (detailed) { + printFrameDetailed(frame); + } else { + printFrameOverview(frame); + } +} + +int main(int argc, char* argv[]) { + // We strip the executable name off directly for parsing + const auto args = parseArgs({argv + 1, argv + argc}); + + auto reader = podio::makeReader(args.inputFile); + if (!args.dumpEDM.empty()) { + return dumpEDMDefinition(reader, args.dumpEDM); + } + + printGeneralInfo(reader, args.inputFile); + + for (const auto event : args.events) { + try { + const auto& frame = reader.readFrame(args.category, event); + printFrame(frame, args.category, event, args.detailed); + } catch (std::runtime_error& err) { + fmt::println(stderr, "{}", err.what()); + return 1; + } + } + + return 0; +} diff --git a/tools/src/tabulate.h b/tools/src/tabulate.h new file mode 100644 index 000000000..cc2fd9609 --- /dev/null +++ b/tools/src/tabulate.h @@ -0,0 +1,62 @@ +#ifndef PODIO_TOOLS_TABULATE_H // NOLINT(llvm-header-guard): folder structure not suitable +#define PODIO_TOOLS_TABULATE_H // NOLINT(llvm-header-guard): folder structure not suitable + +#include + +#include +#include +#include +#include +#include +#include + +template +void printTable(const std::vector>& rows, const std::vector& headers) { + // Simply assume that all rows have the same widths + const auto nCols = headers.size(); + constexpr auto nColsFromRows = std::tuple_size_v>; + if (nCols != nColsFromRows) { + throw std::invalid_argument("headers and rows have to have the same number of columns"); + } + + // Transform all elements into strings first to determine column widths + std::vector> stringRows; + stringRows.reserve(rows.size()); + std::ranges::transform(rows, std::back_inserter(stringRows), [&nCols](const auto& elem) { + std::vector strs; + strs.reserve(nCols); + std::apply([&strs](auto&&... args) { (strs.emplace_back(fmt::format("{}", args)), ...); }, elem); + return strs; + }); + + // First figure out how large each column has to be to fit all the content + std::vector colWidths(nCols, 0); + for (size_t i = 0; i < nCols; ++i) { + colWidths[i] = headers[i].size(); + } + for (const auto& row : stringRows) { + for (size_t iCol = 0; iCol < nCols; ++iCol) { + colWidths[iCol] = std::max(row[iCol].size(), colWidths[iCol]); + } + } + + // print the table header + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:<{}} ", headers[iCol], colWidths[iCol]); + } + fmt::print("\n"); + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:->{}} ", "", colWidths[iCol]); + } + fmt::print("\n"); + + // and the contents + for (const auto& row : stringRows) { + for (size_t iCol = 0; iCol < nCols; ++iCol) { + fmt::print("{:<{}} ", row[iCol], colWidths[iCol]); + } + fmt::print("\n"); + } +} + +#endif // PODIO_TOOLS_TABULATE_H