Skip to content

Commit a609b08

Browse files
committed
Merge remote-tracking branch 'mainline/dev' into topic-pybind11-2.13-py-3.13
2 parents 7c8c704 + 2b255f6 commit a609b08

29 files changed

+804
-158
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -826,6 +826,7 @@ if(openPMD_BUILD_TESTING)
826826
test/Files_SerialIO/close_and_reopen_test.cpp
827827
test/Files_SerialIO/filebased_write_test.cpp
828828
test/Files_SerialIO/issue_1744_unique_ptrs_at_close_time.cpp
829+
test/Files_SerialIO/components_without_extent.cpp
829830
)
830831
elseif(${test_name} STREQUAL "ParallelIO" AND openPMD_HAVE_MPI)
831832
list(APPEND ${out_list}

docs/source/analysis/pandas.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ One can also combine all iterations in a single dataframe like this:
5050
# like before but with a new column "iteration" and all particles
5151
print(df)
5252
53+
Additionally, one can add additional openPMD particle species attributes, e.g.,
54+
from the `ED-PIC <https://github.com/openPMD/openPMD-standard/blob/1.1.0/EXT_ED-PIC.md#particle-records-macroparticles>`__ extension
55+
or `custom code properties <https://impactx.readthedocs.io/en/25.11/dataanalysis/dataanalysis.html#additional-beam-attributes>`__
56+
as extra dataframe columns:
57+
58+
.. code-block:: python
59+
60+
df = s.to_df("electrons", attributes=["s_ref"])
61+
62+
# like before but with a new column "s_ref"
63+
print(df)
5364
5465
.. _analysis-pandas-ascii:
5566

docs/source/details/backendconfig.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ Or in a Series constructor JSON/TOML configuration:
122122
"hint_lazy_parsing_timeout": 20
123123
}
124124
125+
As of openPMD-api 0.17.0, the parser verifies that all records within a mesh or within a particle species have consistent shapes / extents.
126+
This is used for filling in the shape for constant components that do not define it.
127+
In order to skip this check in the error case, the key ``{"verify_homogeneous_extents": false}`` may be set (alternatively ``export OPENPMD_VERIFY_HOMOGENEOUS_EXTENTS=0`` will do the same).
128+
This will help read datasets with inconsistent metadata definitions.
129+
125130
The key ``resizable`` can be passed to ``Dataset`` options.
126131
It if set to ``{"resizable": true}``, this declares that it shall be allowed to increased the ``Extent`` of a ``Dataset`` via ``resetDataset()`` at a later time, i.e., after it has been first declared (and potentially written).
127132
For HDF5, resizable Datasets come with a performance penalty.

examples/11_particle_dataframe.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,27 @@
3838
s = io.Series("../samples/git-sample/data%T.h5", io.Access.read_only)
3939
electrons = s.snapshots()[400].particles["electrons"]
4040

41-
# all particles
42-
df = electrons.to_df()
41+
# all particles, extra column for "particleShape" attribute
42+
# (from ED-PIC extension)
43+
df = electrons.to_df(attributes=["particleShape"])
4344
print(type(df) is pd.DataFrame)
4445
print(df)
4546

4647
# only first 100 particles
47-
df = electrons.to_df(np.s_[:100])
48+
df = electrons.to_df(slice=np.s_[:100])
4849
print(df)
4950

5051
# all particles over all steps
51-
df = s.to_df("electrons")
52+
df = s.to_df("electrons", attributes=["particleShape"])
5253
print(df)
5354

5455
if found_cudf:
5556
# all particles - to GPU
56-
cdf = cudf.from_pandas(electrons.to_df())
57+
cdf = cudf.from_pandas(electrons.to_df(attributes=["particleShape"]))
5758
print(cdf)
5859

5960
# all particles over all steps - to GPU
60-
cdf = s.to_cudf("electrons")
61+
cdf = s.to_cudf("electrons", attributes=["particleShape"])
6162
print(cdf)
6263

6364
# Particles
@@ -67,7 +68,7 @@
6768
# pickle capabilities, so we test this here:
6869
dask.config.set(scheduler='processes')
6970

70-
df = electrons.to_dask()
71+
df = electrons.to_dask(attributes=["particleShape"])
7172
print(df)
7273

7374
# check chunking of a variable

include/openPMD/Dataset.hpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,21 @@ class Dataset
5454
*/
5555
JOINED_DIMENSION = std::numeric_limits<std::uint64_t>::max(),
5656
/**
57-
* Some backends (i.e. JSON and TOML in template mode) support the
58-
* creation of dataset with undefined datatype and extent.
59-
* The extent should be given as {UNDEFINED_EXTENT} for that.
57+
* In some use cases, the extent needs not be specified.
58+
* For these, specify Extent{UNDEFINED_EXTENT}.
59+
* Use cases:
60+
*
61+
* 1. Some backends (i.e. JSON and TOML in template mode) support the
62+
* creation of datasets with undefined datatype and extent.
63+
* The extent should be given as {UNDEFINED_EXTENT} for that.
64+
* 2. With openPMD 2.0, the shape of constant components may be omitted
65+
* in writing if it is defined somewhere else as part
66+
* of the same Mesh / Species.
67+
* (https://github.com/openPMD/openPMD-standard/pull/289)
68+
* When reading such datasets, the openPMD-api will try to fill in
69+
* the missing extents, so the extent for constistently-defined
70+
* datasets should ideally not be reported by the read-side API
71+
* as undefined.
6072
*/
6173
UNDEFINED_EXTENT = std::numeric_limits<std::uint64_t>::max() - 1
6274
};
@@ -87,5 +99,8 @@ class Dataset
8799

88100
std::optional<size_t> joinedDimension() const;
89101
static std::optional<size_t> joinedDimension(Extent const &);
102+
103+
bool undefinedExtent() const;
104+
static bool undefinedExtent(Extent const &);
90105
};
91106
} // namespace openPMD

include/openPMD/IO/AbstractIOHandler.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,7 @@ class AbstractIOHandler
314314
internal::SeriesStatus m_seriesStatus = internal::SeriesStatus::Default;
315315
IterationEncoding m_encoding = IterationEncoding::groupBased;
316316
OpenpmdStandard m_standard = auxiliary::parseStandard(getStandardDefault());
317+
bool m_verify_homogeneous_extents = true;
317318
}; // AbstractIOHandler
318319

319320
} // namespace openPMD

include/openPMD/IO/IOTask.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include "openPMD/ChunkInfo.hpp"
2424
#include "openPMD/Dataset.hpp"
25+
#include "openPMD/Error.hpp"
2526
#include "openPMD/IterationEncoding.hpp"
2627
#include "openPMD/Streaming.hpp"
2728
#include "openPMD/auxiliary/Export.hpp"

include/openPMD/Record.hpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ class Record : public BaseRecord<RecordComponent>
5353

5454
void
5555
flush_impl(std::string const &, internal::FlushParams const &) override;
56-
void read();
56+
57+
[[nodiscard]] internal::HomogenizeExtents read();
5758
}; // Record
5859

5960
template <typename T>

include/openPMD/RecordComponent.hpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,8 @@ class RecordComponent : public BaseRecordComponent
158158
* * Shrinking any dimension's extent.
159159
* * Changing the number of dimensions.
160160
*
161+
* The dataset extent may be empty to indicate undefined extents.
162+
*
161163
* Backend support for resizing datasets:
162164
* * JSON: Supported
163165
* * ADIOS2: Supported as of ADIOS2 2.7.0
@@ -540,6 +542,26 @@ OPENPMD_protected
540542
void verifyChunk(Datatype, Offset const &, Extent const &) const;
541543
}; // RecordComponent
542544

545+
namespace internal
546+
{
547+
// Must put this after the definition of RecordComponent due to the
548+
// deque<RecordComponent>
549+
struct HomogenizeExtents
550+
{
551+
std::deque<RecordComponent> without_extent;
552+
std::optional<Extent> retrieved_extent;
553+
bool verify_homogeneous_extents = true;
554+
555+
explicit HomogenizeExtents();
556+
HomogenizeExtents(bool verify_homogeneous_extents);
557+
558+
void check_extent(Attributable const &callsite, RecordComponent &);
559+
auto merge(Attributable const &callsite, HomogenizeExtents)
560+
-> HomogenizeExtents &;
561+
void homogenize(Attributable const &callsite) &&;
562+
};
563+
} // namespace internal
564+
543565
} // namespace openPMD
544566

545567
#include "openPMD/UndefDatatypeMacros.hpp"

include/openPMD/backend/Attributable.hpp

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
*/
2121
#pragma once
2222

23+
#include "openPMD/Error.hpp"
2324
#include "openPMD/IO/AbstractIOHandler.hpp"
2425
#include "openPMD/ThrowError.hpp"
2526
#include "openPMD/auxiliary/OutOfRangeMsg.hpp"
@@ -29,6 +30,7 @@
2930
#include <cstddef>
3031
#include <map>
3132
#include <memory>
33+
#include <optional>
3234
#include <string>
3335
#include <type_traits>
3436
#include <vector>
@@ -54,6 +56,7 @@ namespace internal
5456
{
5557
class IterationData;
5658
class SeriesData;
59+
struct HomogenizeExtents;
5760

5861
class SharedAttributableData
5962
{
@@ -105,6 +108,7 @@ namespace internal
105108
friend class openPMD::Attributable;
106109

107110
using SharedData_t = std::shared_ptr<SharedAttributableData>;
111+
using A_MAP = SharedData_t::element_type::A_MAP;
108112

109113
public:
110114
AttributableData();
@@ -155,6 +159,32 @@ namespace internal
155159
std::shared_ptr<typename T::Data_t>(self, [](auto const *) {}));
156160
return res;
157161
}
162+
163+
inline auto attributes() -> A_MAP &
164+
{
165+
return operator*().m_attributes;
166+
}
167+
[[nodiscard]] inline auto attributes() const -> A_MAP const &
168+
{
169+
return operator*().m_attributes;
170+
}
171+
[[nodiscard]] inline auto readAttribute(std::string const &name) const
172+
-> Attribute const &
173+
{
174+
auto const &attr = attributes();
175+
if (auto it = attr.find(name); it != attr.end())
176+
{
177+
return it->second;
178+
}
179+
else
180+
{
181+
throw error::ReadError(
182+
error::AffectedObject::Attribute,
183+
error::Reason::NotFound,
184+
std::nullopt,
185+
"Not found: '" + name + "'.");
186+
}
187+
}
158188
};
159189

160190
template <typename, typename>
@@ -213,6 +243,7 @@ class Attributable
213243
friend class StatefulSnapshotsContainer;
214244
friend class internal::AttributableData;
215245
friend class Snapshots;
246+
friend struct internal::HomogenizeExtents;
216247

217248
protected:
218249
// tag for internal constructor

0 commit comments

Comments
 (0)