Documentation
¶
Overview ¶
Package core provides HDF5 file format parsing and manipulation functionality.
Package core provides HDF5 low-level format structures and parsers.
Package core provides HDF5 low-level format structures and parsers.
Package core provides low-level HDF5 file format parsing and generation. It handles superblocks, object headers, messages, and other HDF5 structures without CGo dependencies.
Index ¶
- Constants
- Variables
- func AddMessageToObjectHeader(oh *ObjectHeader, msgType MessageType, msgData []byte) error
- func DeleteCompactAttribute(writer io.WriterAt, objectAddr uint64, name string, sb *Superblock) error
- func DeleteDenseAttribute(heap HeapWriter, btree BTreeWriter, name string, rebalance bool) error
- func EncodeArrayDatatypeMessage(baseType []byte, dims []uint64, arraySize uint32) ([]byte, error)
- func EncodeAttributeFromStruct(attr *Attribute, sb *Superblock) ([]byte, error)
- func EncodeAttributeInfoMessage(aim *AttributeInfoMessage, sb *Superblock) ([]byte, error)
- func EncodeAttributeMessage(name string, datatype *DatatypeMessage, dataspace *DataspaceMessage, ...) ([]byte, error)
- func EncodeCompoundDatatypeV1(totalSize uint32, fields []CompoundFieldDef) ([]byte, error)
- func EncodeCompoundDatatypeV3(totalSize uint32, fields []CompoundFieldDef) ([]byte, error)
- func EncodeDataspaceMessage(dims, maxDims []uint64) ([]byte, error)
- func EncodeDatatypeMessage(dt *DatatypeMessage) ([]byte, error)
- func EncodeEnumDatatypeMessage(baseType []byte, names []string, values []byte, enumSize uint32) ([]byte, error)
- func EncodeLayoutMessage(layoutClass DataLayoutClass, dataSize, dataAddress uint64, sb *Superblock, ...) ([]byte, error)
- func EncodeLinkInfoMessage(lim *LinkInfoMessage, sb *Superblock) ([]byte, error)
- func EncodeLinkMessage(lm *LinkMessage, _ *Superblock) ([]byte, error)
- func EncodeSymbolTableMessage(btreeAddr, heapAddr uint64, offsetSize, _ int) []byte
- func ModifyCompactAttribute(writer io.WriterAt, objectAddr uint64, name string, newAttr *Attribute, ...) error
- func ModifyDenseAttribute(heap HeapWriter, btree BTreeWriter, name string, newAttr *Attribute) error
- func ReadDatasetFloat64(r io.ReaderAt, header *ObjectHeader, sb *Superblock) ([]float64, error)
- func ReadDatasetStrings(r io.ReaderAt, header *ObjectHeader, sb *Superblock) ([]string, error)
- func RewriteObjectHeaderV2(w io.WriterAt, r io.ReaderAt, addr uint64, sb *Superblock, ...) error
- func WriteObjectHeader(w io.WriterAt, addr uint64, oh *ObjectHeader, sb *Superblock) error
- type Attribute
- func FindCompactAttribute(oh *ObjectHeader, name string, endianness binary.ByteOrder) (*Attribute, int, error)
- func ParseAttributeMessage(data []byte, endianness binary.ByteOrder) (*Attribute, error)
- func ParseAttributesFromMessages(r io.ReaderAt, messages []*HeaderMessage, sb *Superblock) ([]*Attribute, error)
- type AttributeInfoMessage
- type BFloat16
- type BTreeV1Node
- type BTreeWriter
- type ChunkEntry
- type ChunkKey
- type CompoundFieldDef
- type CompoundMember
- type CompoundType
- type CompoundValue
- type DataLayoutClass
- type DataLayoutMessage
- type DatasetInfo
- type DataspaceMessage
- type DataspaceType
- type DatatypeClass
- type DatatypeMessage
- func (dt *DatatypeMessage) GetByteOrder() binary.ByteOrder
- func (dt *DatatypeMessage) GetEncodedSize() int
- func (dt *DatatypeMessage) GetStringPadding() uint8
- func (dt *DatatypeMessage) IsCompound() bool
- func (dt *DatatypeMessage) IsFixedString() bool
- func (dt *DatatypeMessage) IsFloat32() bool
- func (dt *DatatypeMessage) IsFloat64() bool
- func (dt *DatatypeMessage) IsInt32() bool
- func (dt *DatatypeMessage) IsInt64() bool
- func (dt *DatatypeMessage) IsString() bool
- func (dt *DatatypeMessage) IsVariableString() bool
- func (dt *DatatypeMessage) String() string
- type FP8E4M3
- type FP8E5M2
- type Filter
- type FilterID
- type FilterPipelineMessage
- type GlobalHeapCollection
- type GlobalHeapObject
- type GlobalHeapReference
- type HeaderMessage
- type HeapWriter
- type LinkInfoMessage
- type LinkMessage
- func (lm *LinkMessage) GetExternalLinkInfo() (string, string, error)
- func (lm *LinkMessage) GetHardLinkAddress(sb *Superblock) (uint64, error)
- func (lm *LinkMessage) GetLinkNameLengthSize() int
- func (lm *LinkMessage) GetSoftLinkPath() (string, error)
- func (lm *LinkMessage) HasCharSetField() bool
- func (lm *LinkMessage) HasCreationOrder() bool
- func (lm *LinkMessage) HasLinkTypeField() bool
- type LinkType
- type MessageType
- type MessageWriter
- type ObjectHeader
- type ObjectHeaderWriter
- type ObjectType
- type Superblock
Constants ¶
const ( LinkFlagSizeOfLengthMask uint8 = 0x03 // Bits 0-1: size of length field (0=1, 1=2, 2=4, 3=8 bytes) LinkFlagCreationOrderBit uint8 = 0x04 // Bit 2: creation order field present LinkFlagLinkTypeFieldBit uint8 = 0x08 // Bit 3: link type field present LinkFlagCharSetBit uint8 = 0x10 // Bit 4: link name character set field present LinkFlagLinkNameEncodedBit uint8 = 0x18 // Bits 3-4: both must be set for encoded name )
Link message flags.
const ( LinkInfoTrackCreationOrder uint8 = 0x01 // Bit 0: track creation order LinkInfoIndexCreationOrder uint8 = 0x02 // Bit 1: index creation order )
Flags for LinkInfoMessage.
const ( Signature = "\x89HDF\r\n\x1a\n" Version0 = 0 Version2 = 2 Version3 = 3 )
HDF5 file signature and supported superblock versions.
Variables ¶
var GlobalHeapSignature = [4]byte{'G', 'C', 'O', 'L'}
GlobalHeapSignature is the magic signature for global heap collections (4 bytes ASCII "GCOL").
Functions ¶
func AddMessageToObjectHeader ¶
func AddMessageToObjectHeader(oh *ObjectHeader, msgType MessageType, msgData []byte) error
AddMessageToObjectHeader adds a message to an object header. For MVP (v0.11.1-beta): Only supports object header v2 without continuation blocks.
Parameters:
- oh: Object header to modify
- msgType: Message type (e.g., MsgAttribute = 0x000C)
- msgData: Encoded message bytes
Returns:
- error: Non-nil if header full or add fails
Limitations:
- No continuation blocks (returns error if header would overflow)
- Only object header v2 supported
- No message flags (always 0)
Reference: H5O.c - H5O_msg_append().
func DeleteCompactAttribute ¶
func DeleteCompactAttribute(writer io.WriterAt, objectAddr uint64, name string, sb *Superblock) error
DeleteCompactAttribute deletes an attribute from compact storage.
This implements attribute deletion for compact storage (object header messages). Following HDF5 C library approach: - H5Adelete.c:H5A__delete() - Attribute deletion - H5O.c:H5O_msg_remove() - Object header message removal
Algorithm: 1. Find attribute message by name 2. Mark message as deleted (or remove from list for MVP) 3. Update object header checksum 4. Write back to file
MVP Limitation: We remove the message entirely instead of marking as deleted. This avoids implementing message deletion flags and header compaction.
Parameters:
- writer: Writer for file I/O
- objectAddr: Address of object header
- name: Attribute name to delete
- sb: Superblock for endianness
Returns:
- error: Non-nil if deletion fails
Reference: H5Adelete.c - H5A__delete().
func DeleteDenseAttribute ¶
func DeleteDenseAttribute(heap HeapWriter, btree BTreeWriter, name string, rebalance bool) error
DeleteDenseAttribute deletes an attribute from dense storage.
This function implements Phase 3 of attribute deletion: removing attributes stored in dense storage (fractal heap + B-tree v2).
Algorithm (matching H5Adense.c:H5A__dense_remove): 1. Search B-tree v2 for attribute name → get heap ID 2. Delete record from B-tree (with optional rebalancing) 3. Delete object from fractal heap 4. Update Attribute Info message (decrement count)
Parameters:
- heap: Writable fractal heap (loaded from file)
- btree: Writable B-tree v2 (loaded from file)
- name: Attribute name to delete
- rebalance: If true, use DeleteRecordWithRebalancing for optimal tree structure
Returns:
- error: Non-nil if deletion fails
Rebalancing behavior:
- When rebalance=true: Maintains optimal B-tree structure (nodes ≥50% full)
- When rebalance=false: Faster deletion, tree may become sparse
Reference: H5Adense.c - H5A__dense_remove(), H5Adelete.c - H5A__delete().
func EncodeArrayDatatypeMessage ¶
EncodeArrayDatatypeMessage encodes an array datatype message. Array datatypes contain a base type and dimensions.
Parameters:
- baseTyp: Encoded base datatype message
- dims: Array dimensions
Returns:
- Encoded message bytes (full datatype message with array properties)
- Error if encoding fails
Format (version 3, HDF5 1.8+):
- Bytes 0-3: Class (4 bits) | Version (4 bits) | Reserved (24 bits)
- Bytes 4-7: Size (total array size = product of dims * base size)
- Byte 8: Dimensionality (ndims)
- Bytes 9+: Dimension sizes (ndims * 4 bytes each, uint32)
- Following: Base type message (encoded datatype)
Reference: HDF5 spec III.C (Datatype Message - Array class). C Reference: H5Odtype.c - H5O__dtype_encode_helper() for H5T_ARRAY.
func EncodeAttributeFromStruct ¶
func EncodeAttributeFromStruct(attr *Attribute, sb *Superblock) ([]byte, error)
EncodeAttributeFromStruct encodes an Attribute struct to bytes (attribute message format).
This is a convenience wrapper around EncodeAttributeMessage (from messages_write.go) that takes an Attribute struct instead of individual parameters.
Parameters:
- attr: Attribute to encode
- sb: Superblock for endianness (currently unused as we use little-endian)
Returns:
- []byte: Encoded attribute message
- error: Non-nil if encoding fails
Reference: H5Oattr.c - H5O__attr_encode().
func EncodeAttributeInfoMessage ¶
func EncodeAttributeInfoMessage(aim *AttributeInfoMessage, sb *Superblock) ([]byte, error)
EncodeAttributeInfoMessage encodes Attribute Info Message for writing.
This implements the encoding logic matching the C reference H5Oainfo.c:H5O__ainfo_encode().
Format: - Version (1 byte): 0 - Flags (1 byte): creation order tracking/indexing - Max Compact (2 bytes): Optional, if bit 0 of flags set - Min Dense (2 bytes): Optional, if bit 0 of flags set - Fractal Heap Address (8 bytes) - B-tree Name Index Address (8 bytes) - B-tree Order Index Address (8 bytes): Optional, if bit 1 of flags set
Parameters:
- aim: AttributeInfoMessage to encode
- sb: Superblock for endianness and offset size
Returns:
- []byte: Encoded message
- error: Non-nil if encoding fails
Reference: H5Oainfo.c - H5O__ainfo_encode().
func EncodeAttributeMessage ¶
func EncodeAttributeMessage(name string, datatype *DatatypeMessage, dataspace *DataspaceMessage, data []byte) ([]byte, error)
EncodeAttributeMessage encodes an Attribute message for compact storage. This creates a version 3 attribute message (HDF5 1.8+).
Parameters:
- name: Attribute name (null-terminated in encoded form)
- datatype: Datatype message for the attribute value
- dataspace: Dataspace message for the attribute value
- data: Raw attribute data bytes
Returns:
- Encoded message bytes
- Error if encoding fails
Format (version 3):
- Version: 1 byte (3)
- Flags: 1 byte (0 for compact, no special features)
- Name size: 2 bytes (includes null terminator)
- Datatype size: 2 bytes
- Dataspace size: 2 bytes
- Name encoding: 1 byte (0=ASCII, 1=UTF-8)
- Name: variable (null-terminated)
- Datatype: variable (encoded datatype message)
- Dataspace: variable (encoded dataspace message)
- Data: variable (actual attribute value)
Reference: HDF5 spec III.M (Attribute Message) C Reference: H5Oattr.c - H5O__attr_encode()..
func EncodeCompoundDatatypeV1 ¶
func EncodeCompoundDatatypeV1(totalSize uint32, fields []CompoundFieldDef) ([]byte, error)
EncodeCompoundDatatypeV1 encodes a version 1 compound datatype message. This is the legacy format for HDF5 1.0-1.6 compatibility.
Format (version 1):
- Header (8 bytes):
- Byte 0-3: Class (4 bits) | Version (4 bits) | NumMembers (16 bits, low)
- Byte 4-7: Size (total compound size in bytes)
- For each member:
- Name (null-terminated, padded to 8-byte boundary)
- Offset (4 bytes, uint32)
- Array info (28 bytes, always present even for scalars)
- Member datatype (recursive, variable length, NO padding between members)
Parameters:
- totalSize: Total size of compound structure in bytes
- fields: List of field definitions
Returns:
- Encoded datatype message bytes
- Error if encoding fails
Reference: H5Odtype.c:360-481.
func EncodeCompoundDatatypeV3 ¶
func EncodeCompoundDatatypeV3(totalSize uint32, fields []CompoundFieldDef) ([]byte, error)
EncodeCompoundDatatypeV3 encodes a version 3 compound datatype message. This is the preferred format for new files (HDF5 1.8+).
Format (version 3):
- Header (8 bytes):
- Byte 0-3: Class (4 bits) | Version (4 bits) | Reserved (24 bits)
- Byte 4-7: Size (total compound size in bytes)
- Member count (4 bytes): Number of fields
- For each member:
- Name (null-terminated, NOT padded)
- Offset (4 bytes, uint32)
- Member datatype (recursive, variable length)
Parameters:
- totalSize: Total size of compound structure in bytes
- fields: List of field definitions with names, offsets, and types
Returns:
- Encoded datatype message bytes
- Error if encoding fails
Reference: HDF5 Format Spec III.C, H5Odtype.c:1630-1800.
func EncodeDataspaceMessage ¶
EncodeDataspaceMessage encodes a Dataspace message (simple N-dimensional array).
Parameters:
- dims: Dimensions of the dataspace
- maxDims: Maximum dimensions (nil if not resizable)
Returns:
- Encoded message bytes
- Error if encoding fails
Format (version 1):
- Version: 1 byte (1)
- Dimensionality: 1 byte
- Flags: 1 byte (0x01 if maxDims present)
- Reserved: 5 bytes (0)
- Dimensions: dimensionality * 8 bytes (uint64 for each dimension)
- Max Dimensions: dimensionality * 8 bytes (if flags & 0x01)
Reference: HDF5 spec III.A (Dataspace Message) C Reference: H5Osdspace.c - H5O__sdspace_encode()..
func EncodeDatatypeMessage ¶
func EncodeDatatypeMessage(dt *DatatypeMessage) ([]byte, error)
EncodeDatatypeMessage encodes a Datatype message. Supports primitive types: int8-64, uint8-64, float32, float64, and fixed-length strings.
Parameters:
- dt: Datatype message to encode
Returns:
- Encoded message bytes
- Error if datatype is not supported
Format:
- Bytes 0-3: Class (4 bits) | Version (4 bits) | Class Bit Field (24 bits)
- Bytes 4-7: Size (4 bytes)
- Bytes 8+: Properties (variable, depends on class)
Reference: HDF5 spec III.C (Datatype Message) C Reference: H5Odtype.c - H5O__dtype_encode()..
func EncodeEnumDatatypeMessage ¶
func EncodeEnumDatatypeMessage(baseType []byte, names []string, values []byte, enumSize uint32) ([]byte, error)
EncodeEnumDatatypeMessage encodes an enum datatype message. Enum datatypes map integer values to symbolic names.
Parameters:
- baseType: Encoded base integer datatype message
- names: Enum member names
- values: Enum member values (as bytes, size = nmembs * baseSize)
- enumSize: Size of enum (same as base type size)
Returns:
- Encoded message bytes (full datatype message with enum properties)
- Error if encoding fails
Format (version 3):
- Bytes 0-3: Class (4 bits) | Version (4 bits) | NumMembers (16 bits, in classBitField)
- Bytes 4-7: Size (base type size)
- Following: Base type message
- Following: For each member:
- Name (null-terminated, padded to multiple of 8)
- Value (size bytes)
Reference: HDF5 spec III.C (Datatype Message - Enum class). C Reference: H5Odtype.c - H5O__dtype_encode_helper() for H5T_ENUM.
func EncodeLayoutMessage ¶
func EncodeLayoutMessage( layoutClass DataLayoutClass, dataSize, dataAddress uint64, sb *Superblock, chunkDims []uint64, ) ([]byte, error)
EncodeLayoutMessage encodes a Data Layout message. This creates a version 3 layout message (most common format).
Parameters:
- layoutClass: Type of layout (contiguous, compact, chunked)
- dataSize: Size of the dataset data in bytes (for contiguous) or unused (for chunked)
- dataAddress: File address where data is stored (for contiguous) or B-tree root (for chunked)
- sb: Superblock for offset/length size encoding
- chunkDims: Chunk dimensions (required for chunked layout, nil otherwise)
Returns:
- Encoded message bytes
- Error if encoding fails
Format (version 3, contiguous):
- Version: 1 byte (3)
- Class: 1 byte (1 for contiguous, 2 for chunked)
- Data Address: offsetSize bytes
- Data Size: lengthSize bytes
Format (version 3, chunked):
- Version: 1 byte (3)
- Class: 1 byte (2 for chunked)
- Dimensionality: 1 byte
- B-tree Address: offsetSize bytes
- Chunk Dimensions: dimensionality * 4 bytes (uint32 each)
Reference: HDF5 spec III.D (Data Storage - Data Layout Message) C Reference: H5Olayout.c - H5O__layout_encode()..
func EncodeLinkInfoMessage ¶
func EncodeLinkInfoMessage(lim *LinkInfoMessage, sb *Superblock) ([]byte, error)
EncodeLinkInfoMessage encodes Link Info message for writing.
This implements the encoding logic matching the C reference H5Olinfo.c:H5O__linfo_encode().
Format:
- Version (1 byte): Always 0
- Flags (1 byte): Bit 0 = track creation order, Bit 1 = index creation order
- Max Creation Order (8 bytes, int64, optional): Present if bit 0 of flags is set
- Fractal Heap Address (offsetSize bytes): Always present
- Name B-tree Address (offsetSize bytes): Always present
- Creation Order B-tree Address (offsetSize bytes, optional): Present if bit 1 of flags is set
Parameters:
- lim: Link Info message to encode
- sb: Superblock for offset size and endianness
Returns:
- Encoded message bytes
- Error if encoding fails
Reference: H5Olinfo.c - H5O__linfo_encode().
func EncodeLinkMessage ¶
func EncodeLinkMessage(lm *LinkMessage, _ *Superblock) ([]byte, error)
EncodeLinkMessage encodes a link message for writing.
This implements the encoding logic matching the C reference H5Oint.c:H5O__link_encode().
Format:
- Version (1 byte): Always 1
- Flags (1 byte): Link type and flags
- Link Type (1 byte, optional): If bit 3 of flags is set
- Creation Order (8 bytes, optional): If bit 2 of flags is set
- Link Name Character Set (1 byte, optional): If bit 4 of flags is set
- Link Name Length (1-8 bytes): Depends on flags bits 0-1
- Link Name (variable): UTF-8 or ASCII
- Link Information (variable): Depends on link type
Parameters:
- lm: Link message to encode
- _ : Superblock (unused, kept for API consistency)
Returns:
- Encoded message bytes
- Error if encoding fails
Reference: H5Oint.c - H5O__link_encode().
func EncodeSymbolTableMessage ¶
EncodeSymbolTableMessage encodes a Symbol Table Message. This message is used in group object headers to point to the symbol table structure.
Message type: 0x11 (17)
Parameters:
- btreeAddr: Address of the B-tree v1 root node
- heapAddr: Address of the local heap
- offsetSize: Size of addresses in bytes (from superblock)
- lengthSize: Size of lengths in bytes (from superblock)
Returns:
- Encoded message bytes
Format:
- B-tree address: offsetSize bytes
- Local heap address: offsetSize bytes
Reference: HDF5 spec III.E (Symbol Table Message) C Reference: H5Ostab.c - H5O__stab_encode()..
func ModifyCompactAttribute ¶
func ModifyCompactAttribute(writer io.WriterAt, objectAddr uint64, name string, newAttr *Attribute, sb *Superblock) error
ModifyCompactAttribute modifies an existing compact attribute in an object header.
This function implements the attribute modification logic matching the C reference: - H5Oattribute.c:H5O__attr_write_cb() - Compact attribute modification - H5Aint.c:H5A__write() - Main write function
Algorithm:
- Find existing attribute message by name
- Encode new attribute value
- Compare sizes: a. Same size → Overwrite in-place (modify message data) b. Different size → Mark old as deleted, add new message
- Update object header checksum
- Write back to file
Parameters:
- writer: Writer for file I/O
- objectAddr: Address of object header
- name: Attribute name to modify
- newAttr: New attribute structure with updated data
- sb: Superblock for endianness
Returns:
- error: Non-nil if modification fails
Reference: H5Oattribute.c - H5O__attr_write_cb().
func ModifyDenseAttribute ¶
func ModifyDenseAttribute(heap HeapWriter, btree BTreeWriter, name string, newAttr *Attribute) error
ModifyDenseAttribute modifies an existing dense attribute.
This function implements Phase 2 of attribute modification: modifying attributes stored in dense storage (fractal heap + B-tree v2).
Algorithm (matching H5Adense.c:H5A__dense_write):
- Search B-tree v2 for attribute name → get heap ID
- Read old attribute from fractal heap
- Encode new attribute value
- Check sizes: a. Same size → Overwrite in heap (in-place, fast path) b. Different size → Delete old, insert new, update B-tree
- Write updated heap and B-tree back to file
Parameters:
- heap: Writable fractal heap (loaded from file)
- btree: Writable B-tree v2 (loaded from file)
- name: Attribute name to modify
- newAttr: New attribute structure with updated data
Returns:
- error: Non-nil if modification fails
Reference: H5Adense.c - H5A__dense_write().
func ReadDatasetFloat64 ¶
func ReadDatasetFloat64(r io.ReaderAt, header *ObjectHeader, sb *Superblock) ([]float64, error)
ReadDatasetFloat64 reads a dataset and returns values as float64 array. This is the main entry point for reading numerical datasets.
func ReadDatasetStrings ¶
func ReadDatasetStrings(r io.ReaderAt, header *ObjectHeader, sb *Superblock) ([]string, error)
ReadDatasetStrings reads a string dataset and returns values as string array. Supports both fixed-length and variable-length strings.
func RewriteObjectHeaderV2 ¶
func RewriteObjectHeaderV2(w io.WriterAt, r io.ReaderAt, addr uint64, sb *Superblock, newMessages []*HeaderMessage) error
RewriteObjectHeaderV2 rewrites an object header v2 with updated messages. This handles the case where we need to modify an existing object header by reading it, modifying it, and writing it back.
For MVP (v0.11.1-beta):
- Only supports v2 headers without continuation blocks
- Overwrites header at original location if size permits
- Returns error if new header doesn't fit in original space
Parameters:
- w: Writer with WriteAt capability
- r: Reader for reading current header
- addr: File address of object header
- sb: Superblock
- newMessages: Additional messages to add
Returns:
- error: Non-nil if operation fails
Note: This is a simplified version for MVP. Full implementation would:
- Support continuation blocks
- Handle header relocation if needed
- Support v1 headers
func WriteObjectHeader ¶
func WriteObjectHeader(w io.WriterAt, addr uint64, oh *ObjectHeader, sb *Superblock) error
WriteObjectHeader writes an object header back to disk at a given address. This is used when modifying object headers (e.g., adding attributes).
For MVP (v0.11.1-beta):
- Only object header v2 supported
- No continuation blocks
- Overwrites existing header at the same address
Parameters:
- w: Writer with WriteAt capability
- addr: File address where header is located
- oh: Object header to write
- sb: Superblock for encoding parameters
Returns:
- error: Non-nil if write fails
Reference: H5O.c - H5O_flush().
Types ¶
type Attribute ¶
type Attribute struct {
Name string
Datatype *DatatypeMessage
Dataspace *DataspaceMessage
Data []byte
// contains filtered or unexported fields
}
Attribute represents an HDF5 attribute with metadata and data.
func FindCompactAttribute ¶
func FindCompactAttribute(oh *ObjectHeader, name string, endianness binary.ByteOrder) (*Attribute, int, error)
FindCompactAttribute searches for an attribute by name in compact storage.
This is a helper function that finds an attribute message in an object header without modifying it. Useful for checking existence before modification.
Parameters:
- oh: Object header to search
- name: Attribute name
- sb: Superblock for endianness
Returns:
- *Attribute: Found attribute, or nil if not found
- int: Message index, or -1 if not found
- error: Non-nil if parsing fails
Reference: H5Oattribute.c - attribute iteration callbacks.
func ParseAttributeMessage ¶
ParseAttributeMessage parses an attribute message (type 0x000C). Format according to HDF5 spec: - Version (1 byte). - Flags (1 byte) - reserved, always 0. - Name size (2 bytes). - Datatype size (2 bytes). - Dataspace size (2 bytes). - Name encoding (1 byte) - for version 3+. - Name (variable, null-terminated). - Datatype message data. - Dataspace message data. - Data (variable).
func ParseAttributesFromMessages ¶
func ParseAttributesFromMessages(r io.ReaderAt, messages []*HeaderMessage, sb *Superblock) ([]*Attribute, error)
ParseAttributesFromMessages extracts all attributes from object header messages. Supports both compact attributes (stored in object header) and dense attributes (stored in fractal heap). Reference: H5Adense.c in C library.
type AttributeInfoMessage ¶
type AttributeInfoMessage struct {
Version uint8
Flags uint8
FractalHeapAddr uint64 // Address of fractal heap for dense attribute storage
BTreeNameIndexAddr uint64 // Address of B-tree v2 for indexing attributes by name
// Optional fields for creation order (if tracked):
MaxCreationIndex uint64 // Only present if creation order tracked
BTreeOrderIndexAddr uint64 // Only present if creation order indexed
}
AttributeInfoMessage represents the Attribute Info Message (0x000F). This message contains information about dense attribute storage. Reference: H5Adense.c in C library.
func ParseAttributeInfoMessage ¶
func ParseAttributeInfoMessage(data []byte, sb *Superblock) (*AttributeInfoMessage, error)
ParseAttributeInfoMessage parses an Attribute Info Message (0x000F). Reference: H5Oainfo.c in C library. Format: - Version (1 byte) - Flags (1 byte):
- Bit 0: Track creation order
- Bit 1: Index creation order
- Max Creation Index (2 bytes) - only if track creation order flag set - Fractal Heap Address (variable, based on superblock offset size) - B-tree Name Index Address (variable) - B-tree Order Index Address (variable) - only if index creation order flag set.
type BFloat16 ¶
type BFloat16 uint16
BFloat16 represents a 16-bit brain floating point value.
Format (16 bits total):
- Bit 15: Sign (1 bit)
- Bits 14-7: Exponent (8 bits, bias=127) - SAME as float32
- Bits 6-0: Mantissa (7 bits) - truncated from float32's 23 bits
Key property: bfloat16 is just the upper 16 bits of float32. This makes conversion extremely fast (just bit shifting).
Range: ±3.4e38 (same as float32) Precision: ~2 decimal digits (vs 7 for float32)
Used by: Google TPU, NVIDIA Tensor Cores, Intel AMX.
func DecodeBFloat16 ¶
DecodeBFloat16 decodes bytes to bfloat16 (little-endian).
func Float32ToBFloat16 ¶
Float32ToBFloat16 converts float32 to bfloat16 with rounding to nearest even.
Rounding mode: Round to nearest, ties to even (banker's rounding). This provides better accuracy than simple truncation.
type BTreeV1Node ¶
type BTreeV1Node struct {
Signature [4]byte // Should be "TREE".
NodeType uint8 // Type of B-tree node.
NodeLevel uint8 // Level of node (0 = leaf).
EntriesUsed uint16 // Number of entries currently used.
LeftSibling uint64 // Address of left sibling (or UNDEFINED).
RightSibling uint64 // Address of right sibling (or UNDEFINED).
// For chunk B-tree (type 1), keys are chunk coordinates.
// Each entry has: key + child pointer.
Keys []ChunkKey // Keys for this node.
Children []uint64 // Child node/chunk addresses.
}
BTreeV1Node represents a B-tree version 1 node. Used for indexing chunked dataset storage. Reference: H5Bpkg.h, H5Dbtree.c.
func ParseBTreeV1Node ¶
func ParseBTreeV1Node(r io.ReaderAt, address uint64, offsetSize uint8, ndims int, chunkDims []uint64) (*BTreeV1Node, error)
ParseBTreeV1Node parses a B-tree v1 node from file. chunkDims: chunk dimensions for converting byte offsets to scaled indices.
Note: B-tree coordinates are ALWAYS stored as uint64 in the file format. The chunk dimensions in the layout message can be uint32 or uint64 depending on file version, but the B-tree keys always use uint64 for backward compatibility.
func (*BTreeV1Node) CollectAllChunks ¶
func (node *BTreeV1Node) CollectAllChunks(r io.ReaderAt, offsetSize uint8, chunkDims []uint64) ([]ChunkEntry, error)
CollectAllChunks recursively collects all chunks from B-tree. This handles both leaf and non-leaf nodes.
func (*BTreeV1Node) FindChunk ¶
func (node *BTreeV1Node) FindChunk(r io.ReaderAt, coords []uint64, offsetSize uint8, chunkDims []uint64) (uint64, error)
FindChunk searches B-tree for chunk at given scaled coordinates. coords: scaled chunk indices (not byte offsets).
func (*BTreeV1Node) String ¶
func (node *BTreeV1Node) String() string
String returns human-readable B-tree node description.
type BTreeWriter ¶
type BTreeWriter interface {
SearchRecord(name string) ([]byte, bool)
UpdateRecord(name string, newHeapID uint64) error
DeleteRecord(name string) error
DeleteRecordWithRebalancing(name string) error
DeleteRecordLazy(name string) error
IsLazyRebalancingEnabled() bool
}
BTreeWriter interface for dense attribute modification. This abstracts B-tree v2 operations for testing and modularity.
type ChunkEntry ¶
type ChunkEntry struct {
Key ChunkKey // Chunk coordinates and metadata.
Address uint64 // Address of chunk data.
}
ChunkEntry represents a chunk location in the B-tree.
type ChunkKey ¶
type ChunkKey struct {
Scaled []uint64 // Scaled chunk indices [dim0, dim1, ...].
Nbytes uint32 // Size of stored chunk data in bytes.
FilterMask uint32 // Excluded filters mask.
}
ChunkKey represents coordinates for a chunk in N-dimensional space.
type CompoundFieldDef ¶
type CompoundFieldDef struct {
Name string // Field name (null-terminated in encoding).
Offset uint32 // Byte offset within compound structure.
Type *DatatypeMessage // Field datatype (can be nested compound).
}
CompoundFieldDef defines a field for compound datatype creation. This is used when creating new compound datatypes for writing.
type CompoundMember ¶
type CompoundMember struct {
Name string // Field name.
Offset uint32 // Byte offset within the compound structure.
Type *DatatypeMessage // Member datatype (can be any type, including nested compound).
}
CompoundMember represents a single field in a compound datatype.
type CompoundType ¶
type CompoundType struct {
Size uint32 // Total size of the compound structure in bytes.
Members []CompoundMember // List of members/fields.
}
CompoundType represents a parsed compound datatype with all its members.
func ParseCompoundType ¶
func ParseCompoundType(dt *DatatypeMessage) (*CompoundType, error)
ParseCompoundType parses compound datatype properties to extract member information. Properties format (version 1): - Bytes 0-1: Number of members (uint16). - For each member:
- Byte offset in structure (uint32 for version 1).
- Dimensionality (1 byte, usually 0 for scalar).
- Reserved bytes (3 bytes for version 1).
- Member name (null-terminated string, padded to multiple of 8 bytes).
- Member datatype (recursive datatype message, 8+ bytes).
func (*CompoundType) String ¶
func (ct *CompoundType) String() string
String returns human-readable compound type description.
type CompoundValue ¶
type CompoundValue map[string]interface{}
CompoundValue represents a single compound structure instance as a map of field names to values.
func ReadDatasetCompound ¶
func ReadDatasetCompound(r io.ReaderAt, header *ObjectHeader, sb *Superblock) ([]CompoundValue, error)
ReadDatasetCompound reads a dataset with compound datatype and returns array of compound values.
type DataLayoutClass ¶
type DataLayoutClass uint8
DataLayoutClass represents the storage layout type.
const ( LayoutCompact DataLayoutClass = 0 // Data stored in message. LayoutContiguous DataLayoutClass = 1 // Data stored contiguously in file. LayoutChunked DataLayoutClass = 2 // Data stored in chunks. LayoutVirtual DataLayoutClass = 3 // Virtual dataset (HDF5 1.10+). )
Data layout class constants define how dataset data is stored.
type DataLayoutMessage ¶
type DataLayoutMessage struct {
Version uint8
Class DataLayoutClass
DataAddress uint64 // Address where data is stored (for contiguous/chunked).
DataSize uint64 // Size of data (for contiguous).
CompactData []byte // Data itself (for compact layout).
ChunkSize []uint64 // Chunk dimensions (for chunked layout) - uint64 for HDF5 2.0.0+ support.
ChunkKeySize uint8 // Size of chunk keys in bytes: 4 (uint32) or 8 (uint64).
}
DataLayoutMessage represents HDF5 data layout message.
func ParseDataLayoutMessage ¶
func ParseDataLayoutMessage(data []byte, sb *Superblock) (*DataLayoutMessage, error)
ParseDataLayoutMessage parses a data layout message from header message data.
func (*DataLayoutMessage) IsChunked ¶
func (dl *DataLayoutMessage) IsChunked() bool
IsChunked returns true if layout is chunked.
func (*DataLayoutMessage) IsCompact ¶
func (dl *DataLayoutMessage) IsCompact() bool
IsCompact returns true if layout is compact (data in message).
func (*DataLayoutMessage) IsContiguous ¶
func (dl *DataLayoutMessage) IsContiguous() bool
IsContiguous returns true if layout is contiguous.
func (*DataLayoutMessage) String ¶
func (dl *DataLayoutMessage) String() string
String returns human-readable layout description.
type DatasetInfo ¶
type DatasetInfo struct {
Datatype *DatatypeMessage
Dataspace *DataspaceMessage
Layout *DataLayoutMessage
}
DatasetInfo holds metadata about a dataset.
func ReadDatasetInfo ¶
func ReadDatasetInfo(header *ObjectHeader, sb *Superblock) (*DatasetInfo, error)
ReadDatasetInfo returns dataset metadata without reading actual data.
func (*DatasetInfo) String ¶
func (di *DatasetInfo) String() string
String returns human-readable dataset info.
type DataspaceMessage ¶
type DataspaceMessage struct {
Version uint8
Type DataspaceType
Dimensions []uint64
MaxDims []uint64 // Maximum dimensions (optional, for resizable datasets).
}
DataspaceMessage represents HDF5 dataspace message.
func ParseDataspaceMessage ¶
func ParseDataspaceMessage(data []byte) (*DataspaceMessage, error)
ParseDataspaceMessage parses a dataspace message from header message data.
func (*DataspaceMessage) Is1D ¶
func (ds *DataspaceMessage) Is1D() bool
Is1D returns true if dataspace is 1-dimensional array.
func (*DataspaceMessage) Is2D ¶
func (ds *DataspaceMessage) Is2D() bool
Is2D returns true if dataspace is 2-dimensional array (matrix).
func (*DataspaceMessage) IsScalar ¶
func (ds *DataspaceMessage) IsScalar() bool
IsScalar returns true if dataspace is scalar (single value).
func (*DataspaceMessage) String ¶
func (ds *DataspaceMessage) String() string
String returns human-readable dataspace description.
func (*DataspaceMessage) TotalElements ¶
func (ds *DataspaceMessage) TotalElements() uint64
TotalElements calculates total number of elements in dataspace.
type DataspaceType ¶
type DataspaceType uint8
DataspaceType represents the type of dataspace.
const ( DataspaceScalar DataspaceType = 0 // Scalar (single value). DataspaceSimple DataspaceType = 1 // Simple (N-dimensional array). DataspaceNull DataspaceType = 2 // Null (no data). )
Dataspace type constants define the dimensionality of datasets.
type DatatypeClass ¶
type DatatypeClass uint8
DatatypeClass represents HDF5 datatype class.
const ( DatatypeFixed DatatypeClass = 0 // Fixed-point (integers). DatatypeFloat DatatypeClass = 1 // Floating-point. DatatypeTime DatatypeClass = 2 // Time. DatatypeString DatatypeClass = 3 // String. DatatypeBitfield DatatypeClass = 4 // Bitfield. DatatypeOpaque DatatypeClass = 5 // Opaque. DatatypeCompound DatatypeClass = 6 // Compound. DatatypeReference DatatypeClass = 7 // Reference. DatatypeEnum DatatypeClass = 8 // Enumerated. DatatypeVarLen DatatypeClass = 9 // Variable-length. DatatypeArray DatatypeClass = 10 // Array. DatatypeComplex DatatypeClass = 11 // Complex (HDF5 2.0+). )
Datatype class constants identify different HDF5 data types for datasets.
type DatatypeMessage ¶
type DatatypeMessage struct {
Class DatatypeClass
Version uint8
Size uint32
ClassBitField uint32
Properties []byte
}
DatatypeMessage represents HDF5 datatype message.
func CreateBasicDatatypeMessage ¶
func CreateBasicDatatypeMessage(class DatatypeClass, size uint32) (*DatatypeMessage, error)
CreateBasicDatatypeMessage creates a simple datatype message for basic types. This is a helper for creating member types in compound datatypes.
For integer types, properties are 4 bytes (bit offset + precision). For float types, properties are 12 bytes (full IEEE 754 info). For string types, properties are minimal (1 byte for padding/charset).
func CreateCompoundTypeFromFields ¶
func CreateCompoundTypeFromFields(fields []CompoundFieldDef) (*DatatypeMessage, error)
CreateCompoundTypeFromFields creates a DatatypeMessage for a compound type. This is a convenience function for creating compound datatypes with automatic offset calculation.
Parameters:
- fields: List of field definitions (offsets will be calculated)
Returns:
- DatatypeMessage ready for writing
- Error if creation fails.
func ParseDatatypeMessage ¶
func ParseDatatypeMessage(data []byte) (*DatatypeMessage, error)
ParseDatatypeMessage parses a datatype message from header message data.
func (*DatatypeMessage) GetByteOrder ¶
func (dt *DatatypeMessage) GetByteOrder() binary.ByteOrder
GetByteOrder returns byte order for numeric types.
func (*DatatypeMessage) GetEncodedSize ¶
func (dt *DatatypeMessage) GetEncodedSize() int
GetEncodedSize returns the total size of this datatype message when encoded. This includes the 8-byte header plus properties. Property sizes from HDF5 spec (H5Odtype.c:1630): - Integer: 4 bytes (offset + precision). - Float: 12 bytes (byte order, padding, mantissa, exponent info). - Bitfield: 4 bytes (offset + precision). - Time: 2 bytes. - String: variable (character set + padding type). - Compound: variable (member definitions).
func (*DatatypeMessage) GetStringPadding ¶
func (dt *DatatypeMessage) GetStringPadding() uint8
GetStringPadding returns the string padding type. 0 = null-terminated, 1 = null-padded, 2 = space-padded.
func (*DatatypeMessage) IsCompound ¶
func (dt *DatatypeMessage) IsCompound() bool
IsCompound checks if datatype is a compound type (struct).
func (*DatatypeMessage) IsFixedString ¶
func (dt *DatatypeMessage) IsFixedString() bool
IsFixedString checks if datatype is a fixed-length string.
func (*DatatypeMessage) IsFloat32 ¶
func (dt *DatatypeMessage) IsFloat32() bool
IsFloat32 checks if datatype is IEEE 754 single precision (32-bit).
func (*DatatypeMessage) IsFloat64 ¶
func (dt *DatatypeMessage) IsFloat64() bool
IsFloat64 checks if datatype is IEEE 754 double precision (64-bit).
func (*DatatypeMessage) IsInt32 ¶
func (dt *DatatypeMessage) IsInt32() bool
IsInt32 checks if datatype is 32-bit signed integer.
func (*DatatypeMessage) IsInt64 ¶
func (dt *DatatypeMessage) IsInt64() bool
IsInt64 checks if datatype is 64-bit signed integer.
func (*DatatypeMessage) IsString ¶
func (dt *DatatypeMessage) IsString() bool
IsString checks if datatype is a string type.
func (*DatatypeMessage) IsVariableString ¶
func (dt *DatatypeMessage) IsVariableString() bool
IsVariableString checks if datatype is a variable-length string. Reference: HDF5 Format Specification III.A.2.4.d (Variable-Length Types).
func (*DatatypeMessage) String ¶
func (dt *DatatypeMessage) String() string
String returns human-readable datatype description.
type FP8E4M3 ¶
type FP8E4M3 uint8
FP8E4M3 represents an 8-bit floating point value in E4M3 format.
Format (8 bits total):
- Bit 7: Sign (1 bit)
- Bits 6-3: Exponent (4 bits, bias=7)
- Bits 2-0: Mantissa (3 bits)
Special values:
- Exponent=15, Mantissa=7: Infinity
- Exponent=0, Mantissa=0: Zero
- Exponent=0, Mantissa≠0: Subnormal (denormalized)
Range: ±448 (max normal value). Precision: ~1 decimal digit.
func Float32ToFP8E4M3 ¶
Float32ToFP8E4M3 converts float32 to FP8 E4M3 with rounding.
type FP8E5M2 ¶
type FP8E5M2 uint8
FP8E5M2 represents an 8-bit floating point value in E5M2 format.
Format (8 bits total):
- Bit 7: Sign (1 bit)
- Bits 6-2: Exponent (5 bits, bias=15)
- Bits 1-0: Mantissa (2 bits)
Special values:
- Exponent=31, Mantissa=3: Infinity
- Exponent=0, Mantissa=0: Zero
- Exponent=0, Mantissa≠0: Subnormal (denormalized)
Range: ±57344 (max normal value). Precision: ~1 decimal digit.
func Float32ToFP8E5M2 ¶
Float32ToFP8E5M2 converts float32 to FP8 E5M2 with rounding.
type Filter ¶
type Filter struct {
ID FilterID
NameLength uint16
Flags uint16
NumClientData uint16
Name string
ClientData []uint32
}
Filter represents a single filter in the pipeline.
type FilterID ¶
type FilterID uint16
FilterID represents HDF5 filter identifiers.
const ( FilterDeflate FilterID = 1 // GZIP compression. FilterShuffle FilterID = 2 // Shuffle filter. FilterFletcher FilterID = 3 // Fletcher32 checksum. FilterSZIP FilterID = 4 // SZIP compression. FilterNBit FilterID = 5 // N-bit compression. FilterScaleOffset FilterID = 6 // Scale-offset filter. FilterBZIP2 FilterID = 307 // BZIP2 compression. FilterLZF FilterID = 32000 // LZF compression (PyTables/h5py). )
Filter identifier constants define compression and processing filters for datasets.
type FilterPipelineMessage ¶
FilterPipelineMessage represents the filter pipeline for a dataset.
func ParseFilterPipelineMessage ¶
func ParseFilterPipelineMessage(data []byte) (*FilterPipelineMessage, error)
ParseFilterPipelineMessage parses filter pipeline message (type 0x000B).
func (*FilterPipelineMessage) ApplyFilters ¶
func (fp *FilterPipelineMessage) ApplyFilters(data []byte) ([]byte, error)
ApplyFilters applies filter pipeline to decompress/decode chunk data.
type GlobalHeapCollection ¶
type GlobalHeapCollection struct {
Address uint64 // File address of this collection.
Size uint64 // Total size of collection in bytes.
Objects []GlobalHeapObject // Array of heap objects.
CollectionID uint16 // Collection ID for debugging.
}
GlobalHeapCollection represents a global heap collection (H5HG_heap_t in C). Format reference: H5HGpkg.h.
func ReadGlobalHeapCollection ¶
func ReadGlobalHeapCollection(r io.ReaderAt, address uint64, offsetSize int) (*GlobalHeapCollection, error)
ReadGlobalHeapCollection reads a global heap collection from the file. Collection format (H5HG.c:156-180):
- Signature (4 bytes): "GCOL".
- Version (1 byte): always 1.
- Reserved (3 bytes).
- Collection size (offset_size bytes).
- Then follows heap objects, each with:
- Object ID (2 bytes).
- Reference count (2 bytes).
- Reserved (4 bytes).
- Object size (offset_size bytes).
- Object data (size bytes, aligned to 8-byte boundary).
func (*GlobalHeapCollection) GetObject ¶
func (gc *GlobalHeapCollection) GetObject(index uint32) (*GlobalHeapObject, error)
GetObject retrieves an object from the collection by index.
type GlobalHeapObject ¶
type GlobalHeapObject struct {
Index int // Object index within collection.
Size uint64 // Size of object data.
Data []byte // Actual object data.
NRefs uint16 // Reference count.
}
GlobalHeapObject represents a single object in the global heap. Format reference: H5HGpkg.h:105-109.
type GlobalHeapReference ¶
type GlobalHeapReference struct {
HeapAddress uint64 // Address of heap collection.
ObjectIndex uint32 // Index within the collection.
}
GlobalHeapReference represents a reference to a global heap object. This is what's stored in dataset data for variable-length types. Format: heap_address (offset_size bytes) + object_index (4 bytes).
func ParseGlobalHeapReference ¶
func ParseGlobalHeapReference(data []byte, offsetSize int) (*GlobalHeapReference, error)
ParseGlobalHeapReference parses a global heap reference from raw bytes. Format: heap_address (offsetSize bytes) + object_index (4 bytes).
type HeaderMessage ¶
type HeaderMessage struct {
Type MessageType
Offset uint64
Data []byte
}
HeaderMessage represents a single message within an object header.
type HeapWriter ¶
type HeapWriter interface {
GetObject(heapID []byte) ([]byte, error)
OverwriteObject(heapID []byte, newData []byte) error
DeleteObject(heapID []byte) error
InsertObject(data []byte) ([]byte, error)
}
HeapWriter interface for dense attribute modification. This abstracts fractal heap operations for testing and modularity.
type LinkInfoMessage ¶
type LinkInfoMessage struct {
Version uint8 // Message version (always 0 for now)
Flags uint8 // Bit 0: creation order tracked, Bit 1: creation order indexed
// Max creation order value (present if bit 0 of flags is set)
// This is the highest creation order value seen so far in the group
MaxCreationOrder int64
// Addresses for dense link storage (0 if not present)
FractalHeapAddress uint64 // Address of fractal heap for link name storage
NameBTreeAddress uint64 // Address of B-tree v2 for link name index
CreationOrderBTreeAddress uint64 // Address of B-tree v2 for creation order index (optional)
}
LinkInfoMessage represents the Link Info message (HDF5 message type 0x0002). This message provides information about the storage of links in a group.
The Link Info message is used in modern HDF5 groups (1.8+) to indicate:
- Whether creation order is tracked and/or indexed
- The maximum creation order value seen so far
- Addresses of the fractal heap and B-tree v2 structures for "dense" link storage
Format:
- Version (1 byte): Always 0 for current spec
- Flags (1 byte): Bit 0 = track creation order, Bit 1 = index creation order
- Max Creation Order (8 bytes, optional): Present if bit 0 of flags is set
- Fractal Heap Address (offsetSize bytes): Address of fractal heap for link names
- Name B-tree v2 Address (offsetSize bytes): Address of B-tree for name index
- Creation Order B-tree v2 Address (offsetSize bytes, optional): Present if bit 1 of flags is set
Reference: HDF5 Format Spec Section IV.A.2.g (Link Info Message). C Reference: H5Olinfo.c - H5O_linfo_t structure and encoding/decoding functions.
func ParseLinkInfoMessage ¶
func ParseLinkInfoMessage(data []byte, sb *Superblock) (*LinkInfoMessage, error)
ParseLinkInfoMessage parses Link Info message from header message data.
This implements the decoding logic matching the C reference H5Olinfo.c:H5O__linfo_decode().
Format:
- Version (1 byte): Must be 0
- Flags (1 byte): Bit 0 = track creation order, Bit 1 = index creation order
- Max Creation Order (8 bytes, optional): Present if bit 0 of flags is set
- Fractal Heap Address (offsetSize bytes): Always present
- Name B-tree v2 Address (offsetSize bytes): Always present
- Creation Order B-tree v2 Address (offsetSize bytes, optional): Present if bit 1 of flags is set
Reference: H5Olinfo.c - H5O__linfo_decode().
func (*LinkInfoMessage) HasCreationOrderBTree ¶
func (lim *LinkInfoMessage) HasCreationOrderBTree() bool
HasCreationOrderBTree returns true if creation order B-tree address is set.
func (*LinkInfoMessage) HasCreationOrderIndex ¶
func (lim *LinkInfoMessage) HasCreationOrderIndex() bool
HasCreationOrderIndex returns true if creation order is indexed.
func (*LinkInfoMessage) HasCreationOrderTracking ¶
func (lim *LinkInfoMessage) HasCreationOrderTracking() bool
HasCreationOrderTracking returns true if creation order is tracked.
func (*LinkInfoMessage) HasFractalHeap ¶
func (lim *LinkInfoMessage) HasFractalHeap() bool
HasFractalHeap returns true if fractal heap address is valid (not undefined).
func (*LinkInfoMessage) HasNameBTree ¶
func (lim *LinkInfoMessage) HasNameBTree() bool
HasNameBTree returns true if name B-tree address is valid (not undefined).
type LinkMessage ¶
type LinkMessage struct {
Version uint8 // Message version (always 1 for now)
Flags uint8 // Link type and flags
Type LinkType // Link type (hard, soft, external)
CreationOrder uint64 // Creation order value (optional)
CharSet uint8 // Character set encoding (0=ASCII, 1=UTF-8)
Name string // Link name
LinkValue []byte // Link-specific data (depends on type)
}
LinkMessage represents a link message in an HDF5 file. Link messages are used in modern HDF5 groups (dense storage) to store information about links between objects.
Format (HDF5 Spec Section IV.A.2.f):
- Version (1 byte): Always 1 for current spec
- Flags (1 byte): Link type and creation order tracking
- Link Type (1 byte, optional): Present if bit 3 of flags is set
- Creation Order (8 bytes, optional): Present if bit 2 of flags is set
- Link Name Character Set (1 byte): 0=ASCII, 1=UTF-8
- Link Name Length (1, 2, 4, or 8 bytes): Size of link name encoding depends on flags
- Link Name (variable): UTF-8 or ASCII encoded name
- Link Information (variable): Format depends on link type
Reference: HDF5 Format Spec Section IV.A.2.f (Link Message). C Reference: H5Oint.c - H5O_link_t structure and encoding/decoding functions.
func ParseLinkMessage ¶
func ParseLinkMessage(data []byte, sb *Superblock) (*LinkMessage, error)
ParseLinkMessage parses a link message from header message data.
This implements the decoding logic matching the C reference H5Oint.c:H5O__link_decode().
Format:
- Version (1 byte): Must be 1
- Flags (1 byte): Link type and flags
- Link Type (1 byte, optional): If bit 3 of flags is set
- Creation Order (8 bytes, optional): If bit 2 of flags is set
- Link Name Character Set (1 byte, optional): If bit 4 of flags is set
- Link Name Length (1-8 bytes): Depends on flags bits 0-1
- Link Name (variable): UTF-8 or ASCII
- Link Information (variable): Depends on link type
Reference: H5Oint.c - H5O__link_decode().
func (*LinkMessage) GetExternalLinkInfo ¶
func (lm *LinkMessage) GetExternalLinkInfo() (string, string, error)
GetExternalLinkInfo extracts the file name and object path from an external link's LinkValue. Returns (fileName, objectPath, error).
func (*LinkMessage) GetHardLinkAddress ¶
func (lm *LinkMessage) GetHardLinkAddress(sb *Superblock) (uint64, error)
GetHardLinkAddress extracts the object address from a hard link's LinkValue. Returns the address and an error if the link is not a hard link or data is invalid.
func (*LinkMessage) GetLinkNameLengthSize ¶
func (lm *LinkMessage) GetLinkNameLengthSize() int
GetLinkNameLengthSize returns the size of the link name length field (1, 2, 4, or 8 bytes).
func (*LinkMessage) GetSoftLinkPath ¶
func (lm *LinkMessage) GetSoftLinkPath() (string, error)
GetSoftLinkPath extracts the target path from a soft link's LinkValue. Returns the path string and an error if the link is not a soft link or data is invalid.
func (*LinkMessage) HasCharSetField ¶
func (lm *LinkMessage) HasCharSetField() bool
HasCharSetField returns true if character set field is present.
func (*LinkMessage) HasCreationOrder ¶
func (lm *LinkMessage) HasCreationOrder() bool
HasCreationOrder returns true if creation order field is present.
func (*LinkMessage) HasLinkTypeField ¶
func (lm *LinkMessage) HasLinkTypeField() bool
HasLinkTypeField returns true if link type field is present.
type LinkType ¶
type LinkType uint8
LinkType defines the type of link (hard, soft, external).
type MessageType ¶
type MessageType uint16
MessageType identifies the type of message in an object header.
const ( MsgNil MessageType = 0 MsgDataspace MessageType = 1 MsgLinkInfo MessageType = 2 MsgDatatype MessageType = 3 MsgFillValueOld MessageType = 4 MsgFillValue MessageType = 5 // Alias for FillValueOld MsgDataLayout MessageType = 8 // Corrected: Data Layout is 0x0008 MsgFilterPipeline MessageType = 11 // Filter Pipeline (compression, etc) MsgAttribute MessageType = 12 MsgName MessageType = 13 // Corrected: Name is 0x000D MsgAttributeInfo MessageType = 21 // Attribute Info (0x0015) - for dense attribute storage MsgContinuation MessageType = 16 // Object header continuation (0x0010) MsgSymbolTable MessageType = 17 MsgLinkMessage MessageType = 6 MsgRefCount MessageType = 22 // Reference Count (0x0016) - for hard links (v2 only) )
Message type constants identify different types of header messages.
type MessageWriter ¶
type MessageWriter struct {
Type MessageType
Data []byte
}
MessageWriter represents a message that can be written to an object header.
type ObjectHeader ¶
type ObjectHeader struct {
Version uint8
Flags uint8
Type ObjectType
Messages []*HeaderMessage
Name string
Attributes []*Attribute
// ReferenceCount tracks the number of hard links to this object.
// For V1 headers: Stored directly in header (bytes 4-7).
// For V2 headers: Stored in RefCount message (type 0x0016) if >1.
// Default value is 1 (single link). Incremented when hard links are created.
ReferenceCount uint32
}
ObjectHeader represents an HDF5 object header containing metadata messages.
func ReadObjectHeader ¶
func ReadObjectHeader(r io.ReaderAt, address uint64, sb *Superblock) (*ObjectHeader, error)
ReadObjectHeader reads and parses an HDF5 object header from the specified address. It supports both version 1 and version 2 object header formats.
func (*ObjectHeader) DecrementReferenceCount ¶
func (oh *ObjectHeader) DecrementReferenceCount() uint32
DecrementReferenceCount decrements the reference count for this object header. This should be called when removing a hard link to the object.
Returns the new reference count after decrement, or 0 if already at 0.
func (*ObjectHeader) GetReferenceCount ¶
func (oh *ObjectHeader) GetReferenceCount() uint32
GetReferenceCount returns the current reference count for this object header. The reference count indicates how many hard links point to this object.
Returns:
- 0: Object has no links (should be deleted)
- 1: Object has one link (normal case)
- >1: Object has multiple hard links
func (*ObjectHeader) IncrementReferenceCount ¶
func (oh *ObjectHeader) IncrementReferenceCount() uint32
IncrementReferenceCount increments the reference count for this object header. This should be called when creating a new hard link to the object.
Reference counting behavior:
- Each object starts with refcount = 1 (original link)
- Each additional hard link increments the count
- When links are deleted, the count is decremented
- Object is deleted when refcount reaches 0
Returns the new reference count after increment.
type ObjectHeaderWriter ¶
type ObjectHeaderWriter struct {
Version uint8
Flags uint8
Messages []MessageWriter
// V1-specific fields (used only when Version == 1)
RefCount uint32 // Reference count (always 1 for new files)
}
ObjectHeaderWriter provides functionality for writing HDF5 object headers. Supports both v1 (legacy, for superblock v0) and v2 (modern) formats.
func NewMinimalRootGroupHeader ¶
func NewMinimalRootGroupHeader() *ObjectHeaderWriter
NewMinimalRootGroupHeader creates a minimal object header v2 for an empty root group. This is suitable for MVP file creation - just enough to make a valid HDF5 file.
The root group header contains:
- Object Header v2 with minimal flags (no times, no attribute phase change)
- Link Info message (empty, compact storage)
Returns an ObjectHeaderWriter ready to be written to file.
func (*ObjectHeaderWriter) Size ¶
func (ohw *ObjectHeaderWriter) Size() uint64
Size calculates the total size of the object header in bytes. This is used for pre-allocation before writing.
Returns:
- Total size in bytes
For object header v1:
- Header: 16 bytes (version, reserved, num_messages, ref_count, header_size, padding)
- Messages: sum of (2 + 2 + 1 + 3 + len(data)) for each message (8-byte aligned)
For object header v2:
- Header: 4 (signature) + 1 (version) + 1 (flags) + 1 (chunk size) = 7 bytes
- Messages: sum of (1 + 2 + 1 + len(data)) for each message
func (*ObjectHeaderWriter) WriteTo ¶
WriteTo writes the object header to the writer at the specified address. Returns the total size written (useful for allocation tracking).
Object Header v1 format:
- Version (1 byte)
- Reserved (1 byte)
- Number of Messages (2 bytes)
- Object Reference Count (4 bytes)
- Object Header Size (4 bytes)
- Padding to 8-byte alignment (4 bytes)
- Messages (each 8-byte aligned)
Object Header v2 format:
- Signature: "OHDR" (4 bytes)
- Version: 2 (1 byte)
- Flags: (1 byte)
- [Optional fields based on flags]
- Size of Chunk 0: (1, 2, 4, or 8 bytes based on flags bits 0-1)
- Messages: variable size
For MVP v2:
- No timestamp fields (flags bit 5 = 0)
- No attribute phase change (flags bit 4 = 0)
- Chunk size in 1 byte (flags bits 0-1 = 0)
type ObjectType ¶
type ObjectType uint8
ObjectType identifies the type of HDF5 object (group, dataset, datatype).
const ( ObjectTypeGroup ObjectType = iota ObjectTypeDataset ObjectTypeDatatype ObjectTypeUnknown )
Object type constants identify different HDF5 object types.
type Superblock ¶
type Superblock struct {
Version uint8
OffsetSize uint8
LengthSize uint8
BaseAddress uint64
RootGroup uint64
Endianness binary.ByteOrder
SuperExtension uint64
DriverInfo uint64
// V0-specific: Cached symbol table info for root group
// These are only used when Version == 0
RootBTreeAddr uint64 // B-tree address for root group (v0 only)
RootHeapAddr uint64 // Local heap address for root group (v0 only)
}
Superblock represents the HDF5 file superblock containing file-level metadata.
func ReadSuperblock ¶
func ReadSuperblock(r io.ReaderAt) (*Superblock, error)
ReadSuperblock reads and parses the HDF5 superblock from the file. It supports versions 0, 2, and 3 of the superblock format.
func (*Superblock) WriteTo ¶
func (sb *Superblock) WriteTo(w io.WriterAt, eofAddress uint64) error
WriteTo writes the superblock to the writer at offset 0. For MVP (v0.11.0-beta), only superblock v2 is supported for writing.
Superblock v2 format (48 bytes):
Bytes 0-7: Signature (\x89HDF\r\n\x1a\n) Byte 8: Version (2) Byte 9: Size of Offsets (8 bytes) Byte 10: Size of Lengths (8 bytes) Byte 11: File Consistency Flags (0) Bytes 12-19: Base Address (typically 0) Bytes 20-27: Superblock Extension Address (UNDEF if none) Bytes 28-35: End-of-File Address (file size) Bytes 36-43: Root Group Object Header Address Bytes 44-47: Superblock Checksum (CRC32)
Parameters:
- w: Writer (typically a FileWriter)
- eofAddress: Current end-of-file address
Returns error if write fails or if superblock version is not supported.
Source Files
¶
- attribute.go
- attribute_modify.go
- btree_v1.go
- datalayout.go
- dataset_reader.go
- dataset_reader_compound.go
- dataset_reader_strings.go
- dataspace.go
- datatype.go
- datatype_bfloat16.go
- datatype_compound.go
- datatype_compound_write.go
- datatype_fp8.go
- filterpipeline.go
- globalheap.go
- link_message.go
- linkinfo.go
- messages_write.go
- objectheader.go
- objectheader_v1.go
- objectheader_write.go
- superblock.go