Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/PureHDF/VOL/Native/Core.Reading/ReadTypes.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ namespace PureHDF.VOL.Native;
internal delegate void DecodeDelegate<T>(IH5ReadStream source, Span<T> target);
internal delegate object? ElementDecodeDelegate(IH5ReadStream source);

internal delegate object? ElementDecodeDelegateBuffered(IH5ReadStream source, Span<byte> buffer);

internal readonly record struct DecodeStep(
Action<object, object?>? SetValue,
ulong CompoundMemberOffset,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,21 @@ public static GlobalHeapCollection Decode(NativeReadContext context)

// collection size
var collectionSize = superblock.ReadLength(driver);
if (collectionSize > int.MaxValue)
{
throw new NotSupportedException("The collection size is too big.");
}

var buffer = ArrayPool<byte>.Shared.Rent((int)collectionSize);
driver.ReadDataset(buffer.AsSpan()[..(int)collectionSize]);

var memoryStream = new MemoryStream(buffer);
var subDriver = new H5StreamDriver(memoryStream, false);
var subContext = new NativeReadContext(subDriver, superblock)
{
ReadOptions = context.ReadOptions,
File = context.File,
};

// global heap objects
var globalHeapObjects = new Dictionary<int, GlobalHeapObject>();
Expand All @@ -68,20 +83,22 @@ public static GlobalHeapCollection Decode(NativeReadContext context)

while (remaining > headerSize)
{
var before = driver.Position;
var globalHeapObject = GlobalHeapObject.Decode(context);
var before = subDriver.Position;
var globalHeapObject = GlobalHeapObject.Decode(subContext);

// Global Heap Object 0 (free space) can appear at the end of the collection.
if (globalHeapObject.ObjectIndex == 0)
break;

globalHeapObjects[globalHeapObject.ObjectIndex] = globalHeapObject;
var after = driver.Position;
var after = subDriver.Position;
var consumed = (ulong)(after - before);

remaining -= consumed;
}

ArrayPool<byte>.Shared.Return(buffer);

return new GlobalHeapCollection(
GlobalHeapObjects: globalHeapObjects
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,20 @@ public DecodeDelegate<TElement> GetDecodeInfo<TElement>(
};
}

private (Type Type, ElementDecodeDelegateBuffered Decode)? GetDecodeInfoForScalarBuffered(NativeReadContext context, Type? memoryType)
{
return Class switch
{
DatatypeMessageClass.VariableLength when ((VariableLengthBitFieldDescription)BitField).Type == InternalVariableLengthType.String =>
memoryType is null || memoryType == typeof(string)
? (typeof(string), GetDecodeInfoForVariableLengthStringBuffered(context))
: throw new Exception($"Variable-length string data can only be decoded as string (incompatible type: {memoryType})."),

/* default */
_ => null
};
}

private (Type Type, ElementDecodeDelegate Decode) GetDecodeInfoForScalar(
NativeReadContext context,
Type? memoryType)
Expand Down Expand Up @@ -899,6 +913,70 @@ private ElementDecodeDelegate GetDecodeInfoForVariableLengthString(
return value;
}

else
{
// It would be more correct to just throw an exception
// when the object index is not found in the collection,
// but that would make the following test fail
// - CanRead_Array_nullable_struct.
//
// And it would make the user's life a bit more complicated
// if the library cannot handle missing entries.
return default;
}
}

return decode;
}

private ElementDecodeDelegateBuffered GetDecodeInfoForVariableLengthStringBuffered(
NativeReadContext context)
{
object? decode(IH5ReadStream source, Span<byte> buffer)
{
/* Padding
* https://support.hdfgroup.org/HDF5/doc/H5.format.html#DatatypeMessage
* Search for "null terminate": null terminate and null padding are essentially
* the same when simply reading them from file.
*/

/* String is always split after first \0 when writing data to file.
* In other words, padding type only matters when reading data.
*/

if (BitField is not VariableLengthBitFieldDescription bitField)
throw new Exception("Variable-length bit field description must not be null.");

// see IV.B. Disk Format: Level 2B - Data Object Data Storage
Func<string, string> trim = bitField.PaddingType switch
{
PaddingType.NullTerminate => value => value,
PaddingType.NullPad => value => value,
PaddingType.SpacePad => value => value.TrimEnd(' '),
_ => throw new Exception("Unsupported padding type.")
};

/* skip the length of the sequence (H5Tvlen.c H5T_vlen_disk_read) */
buffer = buffer.Slice(sizeof(uint));

/* decode global heap IDs and get associated data */
var globalHeapId = ReadingGlobalHeapId.Decode(context.Superblock, buffer);

if (globalHeapId.Equals(default))
return default;

var globalHeapCollection = NativeCache.GetGlobalHeapObject(
context,
globalHeapId.CollectionAddress,
restoreAddress: true);

if (globalHeapCollection.GlobalHeapObjects.TryGetValue((int)globalHeapId.ObjectIndex, out var globalHeapObject))
{
var value = Encoding.UTF8.GetString(globalHeapObject.ObjectData);
value = trim(value);
return value;
}

else
{
// It would be more correct to just throw an exception
Expand Down Expand Up @@ -987,6 +1065,26 @@ private DecodeDelegate<T> GetDecodeInfoForReferenceMemory<T>(
NativeReadContext context
)
{
var elementDecodeBuffered = GetDecodeInfoForScalarBuffered(context, typeof(T))?.Decode;
if (elementDecodeBuffered is not null)
{
void decodeBuffered(IH5ReadStream source, Span<T> target)
{
var totalSize = sizeof(uint) + context.Superblock.OffsetsSize + sizeof(uint);
using var memoryOwner = MemoryPool<byte>.Shared.Rent(target.Length * totalSize);
source.ReadDataset(memoryOwner.Memory.Slice(0, target.Length * totalSize).Span);
var targetSpan = target;

for (int i = 0; i < target.Length; i++)
{
var elementBuffer = memoryOwner.Memory.Slice(i * totalSize, totalSize).Span;
targetSpan[i] = (T)elementDecodeBuffered(source, elementBuffer)!;
}
}

return decodeBuffered;
}

var elementDecode = GetDecodeInfoForScalar(context, typeof(T)).Decode;

void decode(IH5ReadStream source, Span<T> target)
Expand Down