Skip to content

Commit 6e3ba34

Browse files
authored
Merge pull request #163 from SasView/full-hdf-save
Full hdf save
2 parents c61aebe + 04605d9 commit 6e3ba34

38 files changed

+73284
-1616
lines changed

sasdata/data.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import h5py
44
import numpy as np
5+
from h5py._hl.group import Group as HDF5Group
56

67
from sasdata import dataset_types
78
from sasdata.dataset_types import DatasetType
@@ -21,7 +22,7 @@ def __init__(
2122
self.name = name
2223
# validate data contents
2324
if not all([key in dataset_type.optional or key in dataset_type.required for key in data_contents]):
24-
raise ValueError("Columns don't match the dataset type")
25+
raise ValueError(f"Columns don't match the dataset type: {[key for key in data_contents]}")
2526
self._data_contents = data_contents
2627
self._verbose = verbose
2728

@@ -103,13 +104,27 @@ def from_json(obj):
103104
metadata=Metadata.from_json(obj["metadata"]),
104105
)
105106

106-
def save_h5(self, path: str | typing.BinaryIO):
107+
def _save_h5(self, sasentry: HDF5Group):
107108
"""Export data into HDF5 file"""
109+
sasentry.attrs["name"] = self.name
110+
self.metadata.as_h5(sasentry)
111+
112+
# We export each data set into its own entry, so we only ever
113+
# need sasdata01
114+
group = sasentry.create_group("sasdata01")
115+
for idx, (key, sasdata) in enumerate(self._data_contents.items()):
116+
sasdata.as_h5(group, key)
117+
118+
119+
@staticmethod
120+
def save_h5(data: dict[str, typing.Self], path: str | typing.BinaryIO):
108121
with h5py.File(path, "w") as f:
109-
f.attrs["name"] = self.name
110-
for idx, (key, entry) in enumerate(self._data_contents.items()):
111-
group = f.create_group(f"sasentry{idx:02d}")
112-
self.metadata.as_h5(group)
122+
for idx, (key, data) in enumerate(data.items()):
123+
sasentry = f.create_group(f"sasentry{idx+1:02d}")
124+
if not key.startswith("sasentry"):
125+
sasentry.attrs["sasview_key"] = key
126+
data._save_h5(sasentry)
127+
113128

114129

115130
class SasDataEncoder(MetadataEncoder):
@@ -125,7 +140,7 @@ def default(self, obj):
125140
case SasData():
126141
return {
127142
"name": obj.name,
128-
"data_contents": {},
143+
"data_contents": obj._data_contents,
129144
"type": obj.dataset_type,
130145
"mask": obj.mask,
131146
"metadata": obj.metadata,

sasdata/dataset_types.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class DatasetType:
2727
two_dim = DatasetType(
2828
name="2D I vs Q",
2929
required=["Qx", "Qy", "I"],
30-
optional=["dQx", "dQy", "dI", "Qz", "ShadowFactor"],
30+
optional=["dQx", "dQy", "dI", "Qz", "ShadowFactor", "mask"],
3131
expected_orders=[
3232
["Qx", "Qy", "I"],
3333
["Qx", "Qy", "I", "dI"],

sasdata/metadata.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class Rot3:
7171
def from_json(obj: dict) -> Quantity | None:
7272
if obj is None:
7373
return None
74-
return Vec3(
74+
return Rot3(
7575
roll=from_json_quantity(obj["roll"]),
7676
pitch=from_json_quantity(obj["pitch"]),
7777
yaw=from_json_quantity(obj["yaw"]),
@@ -165,7 +165,7 @@ def from_json(obj):
165165
size=Vec3.from_json(obj["size"]),
166166
size_name=obj["size_name"],
167167
name=obj["name"],
168-
type_=obj["type_"],
168+
type_=obj["type"],
169169
)
170170

171171

@@ -541,11 +541,11 @@ class Metadata:
541541
raw: MetaNode
542542

543543
def summary(self):
544-
run_string = self.run[0] if len(self.run) == 1 else self.run
544+
run_string = str(self.run[0] if len(self.run) == 1 else self.run)
545545
return (
546546
f" {self.title}, Run: {run_string}\n"
547547
+ " "
548-
+ "=" * len(self.title if self.title else "")
548+
+ "=" * len(str(self.title))
549549
+ "======="
550550
+ "=" * len(run_string)
551551
+ "\n\n"
@@ -558,12 +558,12 @@ def summary(self):
558558
@staticmethod
559559
def from_json(obj):
560560
return Metadata(
561-
title=obj["title"],
561+
title=obj["title"] if obj["title"] else None,
562562
run=obj["run"],
563-
definition=obj["definition"],
563+
definition=obj["definition"] if obj["definition"] else None,
564564
process=[Process.from_json(p) for p in obj["process"]],
565-
sample=Sample.from_json(obj["sample"]),
566-
instrument=Instrument.from_json(obj["instrument"]),
565+
sample=Sample.from_json(obj["sample"]) if obj["sample"] else None,
566+
instrument=Instrument.from_json(obj["instrument"]) if obj["instrument"] else None,
567567
raw=MetaNode.from_json(obj["raw"]),
568568
)
569569

@@ -591,10 +591,12 @@ def default(self, obj):
591591
match obj:
592592
case None:
593593
return None
594+
case bytes():
595+
return obj.decode("utf-8")
594596
case NamedUnit():
595597
return obj.name
596598
case Quantity():
597-
return {"value": obj.value, "units": obj.units}
599+
return {"value": obj.value, "units": obj.units.ascii_symbol}
598600
case ndarray():
599601
return {
600602
"type": "ndarray",

sasdata/quantities/quantity.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,8 +1401,9 @@ def string_repr(self):
14011401

14021402
def as_h5(self, group: h5py.Group, name: str):
14031403
"""Add this data onto a group as a dataset under the given name"""
1404-
data = group.create_dataset(name, data=[self.value])
1405-
data.attrs["units"] = self.units.symbol
1404+
boxed = self.value if type(self.value) is np.ndarray else [self.value]
1405+
data = group.create_dataset(name, data=boxed)
1406+
data.attrs["units"] = self.units.ascii_symbol
14061407

14071408

14081409
class NamedQuantity[QuantityType](Quantity[QuantityType]):

sasdata/quantities/unit_parser.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def split_unit_str(unit_str: str) -> list[str]:
1919
def validate_unit_str(unit_str: str) -> bool:
2020
"""Validate whether unit_str is valid. This doesn't mean that the unit specified in unit_str exists but rather it
2121
only consists of letters, and numbers as a unit string should."""
22-
return fullmatch(r"[A-Za-zΩµ%Å^1-9\-\+/\ \._]+", unit_str) is not None
22+
return fullmatch(r"[A-Za-zΩµ%Å^1-9⁻¹-⁹\-\+/\ \._]+", unit_str) is not None
2323

2424

2525
def parse_single_unit(
@@ -127,7 +127,7 @@ def parse_unit(unit_str: str, longest_unit: bool = True) -> Unit:
127127
return result
128128
try:
129129
if not validate_unit_str(unit_str):
130-
raise ValueError("unit_str contains forbidden characters.")
130+
raise ValueError(f"unit_str ({unit_str}) contains forbidden characters.")
131131
parsed_unit = Unit(1, Dimensions())
132132
unit_stack = parse_unit_stack(unit_str, longest_unit)
133133
for unit in unit_stack:
@@ -190,6 +190,8 @@ def parse_named_unit_from_group(unit_str: str, from_group: UnitGroup) -> NamedUn
190190

191191

192192
def parse(string: str, name_lookup: bool = True, longest_unit: bool = True, lookup_rtol: float = 1e-14):
193+
if type(string) is not str:
194+
string = string.decode("utf-8")
193195
unit = parse_unit(string, longest_unit=longest_unit)
194196
if name_lookup:
195197
named = find_named_unit(unit, rtol=lookup_rtol)

sasdata/temp_hdf5_reader.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from sasdata.data import SasData
1010
from sasdata.data_backing import Dataset as SASDataDataset
1111
from sasdata.data_backing import Group as SASDataGroup
12-
from sasdata.dataset_types import one_dim
12+
from sasdata.dataset_types import one_dim, two_dim
1313
from sasdata.metadata import (
1414
Aperture,
1515
BeamSize,
@@ -86,7 +86,7 @@ def connected_data(node: SASDataGroup, name_prefix="") -> dict[str, Quantity]:
8686
for name in node.children:
8787
child = node.children[name]
8888

89-
if "units" in child.attributes:
89+
if "units" in child.attributes and child.attributes["units"]:
9090
units = parse(child.attributes["units"])
9191
else:
9292
units = GET_UNITS_FROM_ELSEWHERE
@@ -314,24 +314,28 @@ def load_data(filename: str) -> dict[str, SasData]:
314314

315315
data_contents : dict[str, Quantity] = {}
316316

317-
entry_keys = [key for key in entry if "entry" in key]
317+
entry_keys = entry
318318

319-
if "sasdata" not in entry_keys and "data" not in entry_keys:
319+
if not [k for k in entry if k.startswith("sasdata") or k.startswith("data")]:
320320
logger.warning("No sasdata or data key")
321+
logger.warning(f"Known keys: {[k for k in entry_keys]}")
321322

322323
for key in entry_keys:
323324
component = entry[key]
324325
lower_key = key.lower()
325-
if lower_key == "sasdata" or lower_key == "data":
326+
if lower_key.startswith("sasdata") or lower_key.startswith("data"):
326327
datum = recurse_hdf5(component)
327-
# TODO: Use named identifier
328-
data_contents = connected_data(datum, "FILE_ID_HERE")
328+
data_contents = connected_data(datum, str(filename))
329329

330330
metadata = parse_metadata(f[root_key])
331331

332-
loaded_data[root_key] = SasData(
332+
dataset_type = two_dim if "Qy" in data_contents else one_dim
333+
334+
entry_key = entry.attrs["sasview_key"] if "sasview_key" in entry.attrs else root_key
335+
336+
loaded_data[entry_key] = SasData(
333337
name=root_key,
334-
dataset_type=one_dim,
338+
dataset_type=dataset_type,
335339
data_contents=data_contents,
336340
metadata=metadata,
337341
verbose=False,

0 commit comments

Comments
 (0)