Skip to content

Commit 9b90ba0

Browse files
authored
DataFrame: Add Attribute Columns (#1814)
* DataFrame: Add Attribute Columns Optionally add particle species attributes as extra columns. This is super useful when dealing with openPMD extensions or custom attributes, e.g., for accelerator physics. In the latter case, we store reference particle information on the particle species group (changes per iteration/snapshot). * `to_df()` backwards compatibility Specifically for `to_df(np.s[...])` calls that did not name an argument
1 parent 57dba13 commit 9b90ba0

File tree

4 files changed

+65
-16
lines changed

4 files changed

+65
-16
lines changed

docs/source/analysis/pandas.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,17 @@ One can also combine all iterations in a single dataframe like this:
5050
# like before but with a new column "iteration" and all particles
5151
print(df)
5252
53+
Additionally, one can add additional openPMD particle species attributes, e.g.,
54+
from the `ED-PIC <https://github.com/openPMD/openPMD-standard/blob/1.1.0/EXT_ED-PIC.md#particle-records-macroparticles>`__ extension
55+
or `custom code properties <https://impactx.readthedocs.io/en/25.11/dataanalysis/dataanalysis.html#additional-beam-attributes>`__
56+
as extra dataframe columns:
57+
58+
.. code-block:: python
59+
60+
df = s.to_df("electrons", attributes=["s_ref"])
61+
62+
# like before but with a new column "s_ref"
63+
print(df)
5364
5465
.. _analysis-pandas-ascii:
5566

examples/11_particle_dataframe.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,27 @@
3838
s = io.Series("../samples/git-sample/data%T.h5", io.Access.read_only)
3939
electrons = s.snapshots()[400].particles["electrons"]
4040

41-
# all particles
42-
df = electrons.to_df()
41+
# all particles, extra column for "particleShape" attribute
42+
# (from ED-PIC extension)
43+
df = electrons.to_df(attributes=["particleShape"])
4344
print(type(df) is pd.DataFrame)
4445
print(df)
4546

4647
# only first 100 particles
47-
df = electrons.to_df(np.s_[:100])
48+
df = electrons.to_df(slice=np.s_[:100])
4849
print(df)
4950

5051
# all particles over all steps
51-
df = s.to_df("electrons")
52+
df = s.to_df("electrons", attributes=["particleShape"])
5253
print(df)
5354

5455
if found_cudf:
5556
# all particles - to GPU
56-
cdf = cudf.from_pandas(electrons.to_df())
57+
cdf = cudf.from_pandas(electrons.to_df(attributes=["particleShape"]))
5758
print(cdf)
5859

5960
# all particles over all steps - to GPU
60-
cdf = s.to_cudf("electrons")
61+
cdf = s.to_cudf("electrons", attributes=["particleShape"])
6162
print(cdf)
6263

6364
# Particles
@@ -67,7 +68,7 @@
6768
# pickle capabilities, so we test this here:
6869
dask.config.set(scheduler='processes')
6970

70-
df = electrons.to_dask()
71+
df = electrons.to_dask(attributes=["particleShape"])
7172
print(df)
7273

7374
# check chunking of a variable

src/binding/python/openpmd_api/DaskDataFrame.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,22 @@
88
import numpy as np
99

1010

11-
def read_chunk_to_df(species, chunk):
11+
def read_chunk_to_df(species, chunk, attributes=None):
1212
stride = np.s_[chunk.offset[0]:chunk.offset[0]+chunk.extent[0]]
13-
return species.to_df(stride)
13+
return species.to_df(attributes=attributes, slice=stride)
1414

1515

16-
def particles_to_daskdataframe(particle_species):
16+
def particles_to_daskdataframe(particle_species, attributes=None):
1717
"""
1818
Load all records of a particle species into a Dask DataFrame.
1919
2020
Parameters
2121
----------
2222
particle_species : openpmd_api.ParticleSpecies
2323
A ParticleSpecies class in openPMD-api.
24+
attributes : list of strings, optional
25+
A list of attributes of the particle_species that should be read and
26+
added as extra columns.
2427
2528
Returns
2629
-------
@@ -83,7 +86,9 @@ def particles_to_daskdataframe(particle_species):
8386

8487
# merge DataFrames
8588
dfs = [
86-
delayed(read_chunk_to_df)(particle_species, chunk) for chunk in chunks
89+
delayed(read_chunk_to_df)(
90+
particle_species, chunk=chunk, attributes=attributes
91+
) for chunk in chunks
8792
]
8893
df = dd.from_delayed(dfs)
8994

src/binding/python/openpmd_api/DataFrame.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,22 @@
1010
import numpy as np
1111

1212

13-
def particles_to_dataframe(particle_species, slice=None):
13+
def particles_to_dataframe(particle_species,
14+
*legacy_args,
15+
attributes=None,
16+
slice=None):
1417
"""
1518
Load all records of a particle species into a Pandas DataFrame.
1619
1720
Parameters
1821
----------
1922
particle_species : openpmd_api.ParticleSpecies
2023
A ParticleSpecies class in openPMD-api.
24+
legacy_args : tuple
25+
DO NOT USE. Catch-all for legacy, unnamed arguments.
26+
attributes : list of strings, optional
27+
A list of attributes of the particle_species that should be read and
28+
added as extra columns.
2129
slice : np.s_, optional
2230
A numpy slice that can be used to load only a sub-selection of
2331
particles.
@@ -40,6 +48,20 @@ def particles_to_dataframe(particle_species, slice=None):
4048
are optimal arguments for the slice parameter
4149
pandas.DataFrame : the central dataframe object created here
4250
"""
51+
# backwards compatibility: in openPMD-api 0.17+, we added the
52+
# additional "attributes" argument and moved slice= to the end.
53+
if legacy_args:
54+
if attributes is None and slice is None and len(legacy_args) == 1:
55+
slice = legacy_args[0]
56+
import warnings
57+
warnings.warn("The to_df() argument order changed in "
58+
"openPMD-api 0.17.0!\nThe slice "
59+
"argument must be passed as a named argument.",
60+
DeprecationWarning
61+
)
62+
else:
63+
raise RuntimeError("to_df() does not support unnamed arguments!")
64+
4365
# import pandas here for a lazy import
4466
try:
4567
import pandas as pd
@@ -69,14 +91,18 @@ def particles_to_dataframe(particle_species, slice=None):
6991

7092
df = pd.DataFrame(columns)
7193

94+
if attributes is not None:
95+
for attribute in attributes:
96+
df[attribute] = particle_species.get_attribute(attribute)
97+
7298
# set a header for the first column (row index)
7399
# note: this is NOT the particle id
74100
df.index.name = "row"
75101

76102
return df
77103

78104

79-
def iterations_to_dataframe(series, species_name):
105+
def iterations_to_dataframe(series, species_name, attributes=None):
80106
"""
81107
Load all iterations of a particle species into a Pandas DataFrame.
82108
@@ -86,6 +112,9 @@ def iterations_to_dataframe(series, species_name):
86112
A Series class in openPMD-api.
87113
species_name : string
88114
The name of a particle species.
115+
attributes : list of strings, optional
116+
A list of attributes of the particle_species that should be read and
117+
added as extra columns.
89118
90119
Returns
91120
-------
@@ -115,7 +144,7 @@ def iterations_to_dataframe(series, species_name):
115144
(
116145
iteration
117146
.particles[species_name]
118-
.to_df()
147+
.to_df(attributes=attributes)
119148
.assign(iteration=i)
120149
for i, iteration in series.snapshots().items()
121150
),
@@ -126,7 +155,7 @@ def iterations_to_dataframe(series, species_name):
126155
return df
127156

128157

129-
def iterations_to_cudf(series, species_name):
158+
def iterations_to_cudf(series, species_name, attributes=None):
130159
"""
131160
Load all iterations of a particle species into a cuDF DataFrame.
132161
@@ -136,6 +165,9 @@ def iterations_to_cudf(series, species_name):
136165
A Series class in openPMD-api.
137166
species_name : string
138167
The name of a particle species.
168+
attributes : list of strings, optional
169+
A list of attributes of the particle_species that should be read and
170+
added as extra columns.
139171
140172
Returns
141173
-------
@@ -172,7 +204,7 @@ def iterations_to_cudf(series, species_name):
172204
cudf.from_pandas(
173205
iteration
174206
.particles[species_name]
175-
.to_df()
207+
.to_df(attributes=attributes)
176208
.assign(iteration=i)
177209
)
178210
for i, iteration in series.snapshots().items()

0 commit comments

Comments
 (0)