Skip to content

Commit 8faadde

Browse files
Initial support for HDF5 subfiling (#1580)
* Initial support for HDF5 subfiling * Add JSON/TOML parameterization * Turn errors into warnings when subfiling is unavailable * Add documentation
1 parent 17e757f commit 8faadde

File tree

7 files changed

+247
-21
lines changed

7 files changed

+247
-21
lines changed

docs/source/backends/hdf5.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,15 @@ I/O Method
1515

1616
HDF5 internally either writes serially, via ``POSIX`` on Unix systems, or parallel to a single logical file via MPI-I/O.
1717

18+
Virtual File Drivers
19+
********************
20+
21+
Rudimentary support for HDF5 VFDs (`virtual file driver <https://www.hdfgroup.org/wp-content/uploads/2021/10/HDF5-VFD-Plugins-HUG.pdf>`_) is available (currently only the *subfiling* VFD).
22+
Note that the subfiling VFD needs to be enabled explicitly when configuring HDF5 and threaded MPI must be used.
23+
24+
Virtual file drivers are configured via JSON/TOML.
25+
Refer to the page on `JSON/TOML configuration <backendconfig-hdf5>`_ for further details.
26+
1827

1928
Backend-Specific Controls
2029
-------------------------

docs/source/details/backendconfig.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,6 +190,18 @@ Explanation of the single keys:
190190
The default is ``"auto"`` for a heuristic.
191191
``"none"`` can be used to disable chunking.
192192
Chunking generally improves performance and only needs to be disabled in corner-cases, e.g. when heavily relying on independent, parallel I/O that non-collectively declares data records.
193+
* ``hdf5.vfd.type`` selects the HDF5 virtual file driver.
194+
Currently available are:
195+
196+
* ``"default"``: Equivalent to specifying nothing.
197+
* ``subfiling"``: Use the `subfiling VFD <https://www.hdfgroup.org/wp-content/uploads/2022/09/HDF5-Subfiling-VFD.pdf>`_.
198+
Note that the subfiling VFD needs to be enabled explicitly when configuring HDF5 and threaded MPI must be used.
199+
When using this VFD, the options described below are additionally available.
200+
They correspond with the field entries of ``H5FD_subfiling_params_t``, refer to the HDF5 documentation for their detailed meanings.
201+
202+
* ``hdf5.vfd.ioc_selection``: Must be one of ``["one_per_node", "every_nth_rank", "with_config", "total"]``
203+
* ``hdf5.vfd.stripe_size``: Must be an integer
204+
* ``hdf5.vfd.stripe_count``: Must be an integer
193205

194206
.. _backendconfig-other:
195207

docs/source/details/hdf5.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22
"hdf5": {
33
"dataset": {
44
"chunks": "auto"
5+
},
6+
"vfd": {
7+
"type": "subfiling",
8+
"ioc_selection": "every_nth_rank",
9+
"stripe_size": 33554432,
10+
"stripe_count": -1
511
}
612
}
713
}

examples/5_write_parallel.cpp

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ using namespace openPMD;
3131

3232
int main(int argc, char *argv[])
3333
{
34-
MPI_Init(&argc, &argv);
34+
int provided;
35+
MPI_Init_thread(&argc, &argv, MPI_THREAD_MULTIPLE, &provided);
3536

3637
int mpi_size;
3738
int mpi_rank;
@@ -47,9 +48,20 @@ int main(int argc, char *argv[])
4748
cout << "Set up a 2D array with 10x300 elements per MPI rank ("
4849
<< mpi_size << "x) that will be written to disk\n";
4950

51+
std::string subfiling_config = R"(
52+
[hdf5.vfd]
53+
type = "subfiling"
54+
ioc_selection = "every_nth_rank"
55+
stripe_size = 33554432
56+
stripe_count = -1
57+
)";
58+
5059
// open file for writing
5160
Series series = Series(
52-
"../samples/5_parallel_write.h5", Access::CREATE, MPI_COMM_WORLD);
61+
"../samples/5_parallel_write.h5",
62+
Access::CREATE,
63+
MPI_COMM_WORLD,
64+
subfiling_config);
5365
if (0 == mpi_rank)
5466
cout << "Created an empty series in parallel with " << mpi_size
5567
<< " MPI ranks\n";

include/openPMD/IO/HDF5/HDF5IOHandlerImpl.hpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,10 @@ namespace openPMD
3838
class HDF5IOHandlerImpl : public AbstractIOHandlerImpl
3939
{
4040
public:
41-
HDF5IOHandlerImpl(AbstractIOHandler *, json::TracingJSON config);
41+
HDF5IOHandlerImpl(
42+
AbstractIOHandler *,
43+
json::TracingJSON config,
44+
bool do_warn_unused_params = true);
4245
~HDF5IOHandlerImpl() override;
4346

4447
void
@@ -114,8 +117,9 @@ class HDF5IOHandlerImpl : public AbstractIOHandlerImpl
114117
std::optional<MPI_Comm> m_communicator;
115118
#endif
116119

117-
private:
118120
json::TracingJSON m_config;
121+
122+
private:
119123
std::string m_chunks = "auto";
120124
struct File
121125
{

src/IO/HDF5/HDF5IOHandler.cpp

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,9 @@ namespace openPMD
6666
#endif
6767

6868
HDF5IOHandlerImpl::HDF5IOHandlerImpl(
69-
AbstractIOHandler *handler, json::TracingJSON config)
69+
AbstractIOHandler *handler,
70+
json::TracingJSON config,
71+
bool do_warn_unused_params)
7072
: AbstractIOHandlerImpl(handler)
7173
, m_datasetTransferProperty{H5P_DEFAULT}
7274
, m_fileAccessProperty{H5P_DEFAULT}
@@ -167,23 +169,28 @@ HDF5IOHandlerImpl::HDF5IOHandlerImpl(
167169
}
168170

169171
// unused params
170-
auto shadow = m_config.invertShadow();
171-
if (shadow.size() > 0)
172+
if (do_warn_unused_params)
172173
{
173-
switch (m_config.originallySpecifiedAs)
174+
auto shadow = m_config.invertShadow();
175+
if (shadow.size() > 0)
174176
{
175-
case json::SupportedLanguages::JSON:
176-
std::cerr << "Warning: parts of the backend configuration for "
177-
"HDF5 remain unused:\n"
178-
<< shadow << std::endl;
179-
break;
180-
case json::SupportedLanguages::TOML: {
181-
auto asToml = json::jsonToToml(shadow);
182-
std::cerr << "Warning: parts of the backend configuration for "
183-
"HDF5 remain unused:\n"
184-
<< asToml << std::endl;
185-
break;
186-
}
177+
switch (m_config.originallySpecifiedAs)
178+
{
179+
case json::SupportedLanguages::JSON:
180+
std::cerr
181+
<< "Warning: parts of the backend configuration for "
182+
"HDF5 remain unused:\n"
183+
<< shadow << std::endl;
184+
break;
185+
case json::SupportedLanguages::TOML: {
186+
auto asToml = json::jsonToToml(shadow);
187+
std::cerr
188+
<< "Warning: parts of the backend configuration for "
189+
"HDF5 remain unused:\n"
190+
<< asToml << std::endl;
191+
break;
192+
}
193+
}
187194
}
188195
}
189196
}

src/IO/HDF5/ParallelHDF5IOHandler.cpp

Lines changed: 177 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,17 @@
1919
* If not, see <http://www.gnu.org/licenses/>.
2020
*/
2121
#include "openPMD/IO/HDF5/ParallelHDF5IOHandler.hpp"
22+
#include "openPMD/Error.hpp"
2223
#include "openPMD/IO/HDF5/ParallelHDF5IOHandlerImpl.hpp"
2324
#include "openPMD/auxiliary/Environment.hpp"
25+
#include "openPMD/auxiliary/JSON_internal.hpp"
26+
#include "openPMD/auxiliary/StringManip.hpp"
27+
#include "openPMD/auxiliary/Variant.hpp"
28+
#include <type_traits>
29+
30+
#ifdef H5_HAVE_SUBFILING_VFD
31+
#include <H5FDsubfiling.h>
32+
#endif
2433

2534
#if openPMD_HAVE_MPI
2635
#include <mpi.h>
@@ -61,7 +70,7 @@ std::future<void> ParallelHDF5IOHandler::flush(internal::ParsedFlushParams &)
6170

6271
ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl(
6372
AbstractIOHandler *handler, MPI_Comm comm, json::TracingJSON config)
64-
: HDF5IOHandlerImpl{handler, std::move(config)}
73+
: HDF5IOHandlerImpl{handler, std::move(config), /* do_warn_unused_params = */ false}
6574
, m_mpiComm{comm}
6675
, m_mpiInfo{MPI_INFO_NULL} /* MPI 3.0+: MPI_INFO_ENV */
6776
{
@@ -164,6 +173,173 @@ ParallelHDF5IOHandlerImpl::ParallelHDF5IOHandlerImpl(
164173
VERIFY(
165174
status >= 0,
166175
"[HDF5] Internal error: Failed to set HDF5 file access property");
176+
177+
if (!m_config.json().is_null() && m_config.json().contains("vfd"))
178+
{
179+
auto vfd_json_config = m_config["vfd"];
180+
if (!vfd_json_config.json().contains("type"))
181+
{
182+
throw error::BackendConfigSchema(
183+
{"hdf5", "vfd"},
184+
"VFD configuration requires specifying the VFD type.");
185+
}
186+
std::string user_specified_type;
187+
if (auto value =
188+
json::asLowerCaseStringDynamic(vfd_json_config["type"].json());
189+
value.has_value())
190+
{
191+
user_specified_type = *value;
192+
}
193+
else
194+
{
195+
throw error::BackendConfigSchema(
196+
{"hdf5", "vfd", "type"}, "VFD type must be given as a string.");
197+
}
198+
199+
if (user_specified_type == "default")
200+
{ /* no-op */
201+
}
202+
else if (user_specified_type == "subfiling")
203+
{
204+
#ifdef H5_HAVE_SUBFILING_VFD
205+
int thread_level = 0;
206+
MPI_Query_thread(&thread_level);
207+
if (thread_level >= MPI_THREAD_MULTIPLE)
208+
{
209+
H5FD_subfiling_config_t vfd_config;
210+
// query default subfiling parameters
211+
H5Pget_fapl_subfiling(m_fileAccessProperty, &vfd_config);
212+
213+
auto int_accessor =
214+
[&vfd_json_config](
215+
std::string const &key) -> std::optional<long long> {
216+
if (!vfd_json_config.json().contains(key))
217+
{
218+
return std::nullopt;
219+
}
220+
auto const &val = vfd_json_config[key].json();
221+
if (val.is_number_integer())
222+
{
223+
return val.get<long long>();
224+
}
225+
else
226+
{
227+
throw error::BackendConfigSchema(
228+
{"hdf5", "vfd", key},
229+
"Excpecting value of type integer.");
230+
}
231+
};
232+
auto string_accessor =
233+
[&vfd_json_config](
234+
std::string const &key) -> std::optional<std::string> {
235+
if (!vfd_json_config.json().contains(key))
236+
{
237+
return std::nullopt;
238+
}
239+
auto const &val = vfd_json_config[key].json();
240+
if (auto str_val = json::asLowerCaseStringDynamic(val);
241+
str_val.has_value())
242+
{
243+
return *str_val;
244+
}
245+
else
246+
{
247+
throw error::BackendConfigSchema(
248+
{"hdf5", "vfd", key},
249+
"Excpecting value of type string.");
250+
}
251+
};
252+
253+
auto set_param = [](std::string const &key,
254+
auto *target,
255+
auto const &accessor) {
256+
if (auto val = accessor(key); val.has_value())
257+
{
258+
*target = static_cast<
259+
std::remove_reference_t<decltype(*target)>>(*val);
260+
}
261+
};
262+
263+
set_param(
264+
"stripe_size",
265+
&vfd_config.shared_cfg.stripe_size,
266+
int_accessor);
267+
set_param(
268+
"stripe_count",
269+
&vfd_config.shared_cfg.stripe_count,
270+
int_accessor);
271+
std::optional<std::string> ioc_selection_raw;
272+
set_param("ioc_selection", &ioc_selection_raw, string_accessor);
273+
274+
std::map<std::string, H5FD_subfiling_ioc_select_t> const
275+
ioc_selection_map{
276+
{"one_per_node", SELECT_IOC_ONE_PER_NODE},
277+
{"every_nth_rank", SELECT_IOC_EVERY_NTH_RANK},
278+
{"with_config", SELECT_IOC_WITH_CONFIG},
279+
{"total", SELECT_IOC_TOTAL}};
280+
if (ioc_selection_raw.has_value())
281+
{
282+
if (auto ioc_selection =
283+
ioc_selection_map.find(*ioc_selection_raw);
284+
ioc_selection != ioc_selection_map.end())
285+
{
286+
vfd_config.shared_cfg.ioc_selection =
287+
ioc_selection->second;
288+
}
289+
else
290+
{
291+
throw error::BackendConfigSchema(
292+
{"hdf5", "vfd", "ioc_selection"},
293+
"Unexpected value: '" + *ioc_selection_raw + "'.");
294+
}
295+
}
296+
297+
// ... and set them
298+
H5Pset_fapl_subfiling(m_fileAccessProperty, &vfd_config);
299+
}
300+
else
301+
{
302+
std::cerr << "[HDF5 Backend] The requested subfiling VFD of "
303+
"HDF5 requires the use of threaded MPI."
304+
<< std::endl;
305+
}
306+
#else
307+
std::cerr
308+
<< "[HDF5 Backend] No support for the requested subfiling VFD "
309+
"found in the installed version of HDF5. Will continue with "
310+
"default settings. Tip: Configure a recent version of HDF5 "
311+
"with '-DHDF5_ENABLE_SUBFILING_VFD=ON'."
312+
<< std::endl;
313+
#endif
314+
}
315+
else
316+
{
317+
throw error::BackendConfigSchema(
318+
{"hdf5", "vfd", "type"},
319+
"Unknown value: '" + user_specified_type + "'.");
320+
}
321+
322+
// unused params
323+
auto shadow = m_config.invertShadow();
324+
if (shadow.size() > 0)
325+
{
326+
switch (m_config.originallySpecifiedAs)
327+
{
328+
case json::SupportedLanguages::JSON:
329+
std::cerr << "Warning: parts of the backend configuration for "
330+
"HDF5 remain unused:\n"
331+
<< shadow << std::endl;
332+
break;
333+
case json::SupportedLanguages::TOML: {
334+
auto asToml = json::jsonToToml(shadow);
335+
std::cerr << "Warning: parts of the backend configuration for "
336+
"HDF5 remain unused:\n"
337+
<< asToml << std::endl;
338+
break;
339+
}
340+
}
341+
}
342+
}
167343
}
168344

169345
ParallelHDF5IOHandlerImpl::~ParallelHDF5IOHandlerImpl()

0 commit comments

Comments
 (0)