From ae64fb51b6c76a990c8484967028c05d57feae39 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 13 Apr 2026 17:12:56 -0700 Subject: [PATCH 01/11] add hdf5 client-side detect_config --- python/lib/sift_client/_internal/util/hdf5.py | 174 ++++++++++++++++++ .../lib/sift_client/resources/data_imports.py | 4 + .../lib/sift_client/sift_types/data_import.py | 12 ++ 3 files changed, 190 insertions(+) create mode 100644 python/lib/sift_client/_internal/util/hdf5.py diff --git a/python/lib/sift_client/_internal/util/hdf5.py b/python/lib/sift_client/_internal/util/hdf5.py new file mode 100644 index 000000000..eba35ba0f --- /dev/null +++ b/python/lib/sift_client/_internal/util/hdf5.py @@ -0,0 +1,174 @@ +from __future__ import annotations + +from pathlib import Path + +import h5py +import numpy as np + +from sift_client.sift_types.channel import ChannelDataType +from sift_client.sift_types.data_import import Hdf5DataColumn, Hdf5ImportConfig, TimeFormat + +# Common HDF5 attribute names used to detect channel metadata. +_NAME_ATTRS = ["Name", "name", "Title", "title", "Sensor", "sensor", "Channel", "channel"] +_UNIT_ATTRS = ["Unit", "unit", "Units", "units"] +_DESCRIPTION_ATTRS = ["Description", "description"] + +_NUMPY_TO_SIFT: dict[type, ChannelDataType] = { + np.bool_: ChannelDataType.BOOL, + np.int8: ChannelDataType.INT_32, + np.int16: ChannelDataType.INT_32, + np.int32: ChannelDataType.INT_32, + np.int64: ChannelDataType.INT_64, + np.uint8: ChannelDataType.UINT_32, + np.uint16: ChannelDataType.UINT_32, + np.uint32: ChannelDataType.UINT_32, + np.uint64: ChannelDataType.UINT_64, + np.float32: ChannelDataType.FLOAT, + np.float64: ChannelDataType.DOUBLE, + np.datetime64: ChannelDataType.INT_64, + np.complex64: ChannelDataType.FLOAT, + np.complex128: ChannelDataType.DOUBLE, + np.str_: ChannelDataType.STRING, + np.bytes_: ChannelDataType.STRING, + np.object_: ChannelDataType.STRING, + np.void: ChannelDataType.BYTES, +} + + +def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str: + """Return the first matching HDF5 attribute value, or *default*.""" + for attr in candidates: + val = dataset.attrs.get(attr) + if val is not None: + return ( + val.decode() + if isinstance(val, bytes) + else str(val) + if not isinstance(val, str) + else val + ) + return default + + +def _numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: + """Map a numpy dtype to a Sift ChannelDataType.""" + sift_type = _NUMPY_TO_SIFT.get(dtype.type) + if sift_type is None: + raise ValueError(f"Unsupported numpy dtype: {dtype}") + return sift_type + + +def detect_hdf5_config(file_path: str | Path) -> Hdf5ImportConfig: + """Detect an HDF5 import config by inspecting the file's datasets. + + Traverses the HDF5 file and produces (time dataset, value dataset) pairs. + For compound datasets with multiple fields, the first field is assumed to + be time and remaining fields become value channels. For simple datasets, + a root-level ``time`` dataset is used if present. + """ + path = Path(file_path) + + with h5py.File(path, "r") as h5file: + columns: list[Hdf5DataColumn] = [] + seen_names: set[str] = set() + has_root_time = "time" in h5file + + def _visit(dataset_name: str, obj: object) -> None: + if not isinstance(obj, h5py.Dataset): + return + + leaf_name = dataset_name.rsplit("/", 1)[-1] + + # Skip root "time" dataset — it's used as the time source, not a value channel. + if dataset_name == "time" and obj.parent == h5file: + return + + # Skip "timestamps" datasets — they're time sources, not value channels. + if leaf_name == "timestamps": + return + + n_fields = len(obj.dtype.names) if obj.dtype.names else 0 + + if n_fields > 1: + # Compound type: first field is time, remaining are value channels. + for value_index in range(1, n_fields): + channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name) + if channel_name in seen_names: + channel_name = f"{channel_name}.{dataset_name}.{value_index}" + + columns.append( + Hdf5DataColumn( + name=channel_name, + data_type=_numpy_to_sift_type(obj.dtype[value_index]), + units=_detect_attr(obj, _UNIT_ATTRS), + description=_detect_attr(obj, _DESCRIPTION_ATTRS), + time_dataset=dataset_name, + value_dataset=dataset_name, + time_index=0, + value_index=0, + time_field=obj.dtype.names[0], + value_field=obj.dtype.names[value_index], + ) + ) + seen_names.add(channel_name) + + elif n_fields in (0, 1): + # Resolve time dataset: prefer sibling "timestamps", fall back to root "time". + group = obj.parent + time_dataset = "" + if "timestamps" in group: + group_name = dataset_name.rsplit("/", 1)[0] if "/" in dataset_name else "" + time_dataset = f"{group_name}/timestamps" if group_name else "timestamps" + elif has_root_time: + time_dataset = "time" + + # For 2D datasets (N x 2), treat column 0 as time and column 1 as value. + if obj.ndim == 2 and obj.shape[1] == 2: + channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name) + if channel_name in seen_names: + channel_name = f"{channel_name}.{dataset_name}" + + columns.append( + Hdf5DataColumn( + name=channel_name, + data_type=_numpy_to_sift_type(obj.dtype), + units=_detect_attr(obj, _UNIT_ATTRS), + description=_detect_attr(obj, _DESCRIPTION_ATTRS), + time_dataset=dataset_name, + value_dataset=dataset_name, + time_index=0, + value_index=1, + ) + ) + seen_names.add(channel_name) + else: + # Use the group name as channel name for "values" leaf datasets. + default_name = dataset_name + if leaf_name == "values" and "/" in dataset_name: + default_name = dataset_name.rsplit("/", 1)[0] + + channel_name = _detect_attr(obj, _NAME_ATTRS, default_name) + if channel_name in seen_names: + channel_name = f"{channel_name}.{dataset_name}" + + columns.append( + Hdf5DataColumn( + name=channel_name, + data_type=_numpy_to_sift_type(obj.dtype), + units=_detect_attr(obj, _UNIT_ATTRS), + description=_detect_attr(obj, _DESCRIPTION_ATTRS), + time_dataset=time_dataset, + value_dataset=dataset_name, + time_index=0, + value_index=0, + ) + ) + seen_names.add(channel_name) + + h5file.visititems(_visit) + + return Hdf5ImportConfig( + asset_name="", + time_format=TimeFormat.ABSOLUTE_UNIX_NANOSECONDS, + data=columns, + ) diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py index f40876234..1c1a03b4b 100644 --- a/python/lib/sift_client/resources/data_imports.py +++ b/python/lib/sift_client/resources/data_imports.py @@ -6,6 +6,7 @@ from sift_client._internal.low_level_wrappers.data_imports import DataImportsLowLevelClient from sift_client._internal.util.executor import run_sync_function from sift_client._internal.util.file import extract_parquet_footer, upload_file +from sift_client._internal.util.hdf5 import detect_hdf5_config from sift_client.resources._base import ResourceBase from sift_client.sift_types.asset import Asset from sift_client.sift_types.channel import ChannelDataType @@ -243,6 +244,9 @@ async def detect_config( data_type_key = _resolve_data_type_key(path.suffix.lower(), data_type) + if data_type_key == DataTypeKey.HDF5: + return await run_sync_function(lambda: detect_hdf5_config(path)) + is_parquet = data_type_key in ( DataTypeKey.PARQUET_FLATDATASET, DataTypeKey.PARQUET_SINGLE_CHANNEL_PER_ROW, diff --git a/python/lib/sift_client/sift_types/data_import.py b/python/lib/sift_client/sift_types/data_import.py index 62208a678..73bc64454 100644 --- a/python/lib/sift_client/sift_types/data_import.py +++ b/python/lib/sift_client/sift_types/data_import.py @@ -593,6 +593,18 @@ class Hdf5ImportConfig(ImportConfigBase): time_format: TimeFormat relative_start_time: datetime | None = None + def __getitem__(self, name: str) -> Hdf5DataColumn: + """Look up a data column by channel name. + + Example:: + + config["temperature"].data_type = ChannelDataType.FLOAT + """ + for dc in self.data: + if dc.name == name: + return dc + raise KeyError(f"No data column named '{name}'") + @model_validator(mode="after") def _check_relative_start_time(self) -> Hdf5ImportConfig: if self.time_format.name.startswith("RELATIVE_") and self.relative_start_time is None: From 13053b4a4fb1fe01517ffa98bc24048300ea7f3f Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Mon, 13 Apr 2026 17:42:16 -0700 Subject: [PATCH 02/11] mypy fix --- python/lib/sift_client/_internal/util/hdf5.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/python/lib/sift_client/_internal/util/hdf5.py b/python/lib/sift_client/_internal/util/hdf5.py index eba35ba0f..a4861da3d 100644 --- a/python/lib/sift_client/_internal/util/hdf5.py +++ b/python/lib/sift_client/_internal/util/hdf5.py @@ -40,13 +40,7 @@ def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "" for attr in candidates: val = dataset.attrs.get(attr) if val is not None: - return ( - val.decode() - if isinstance(val, bytes) - else str(val) - if not isinstance(val, str) - else val - ) + return val return default From f6845043ee1b9b2b155be8cbcdcbf3015d378172 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 10:12:29 -0700 Subject: [PATCH 03/11] update detect_config and helpers --- python/lib/sift_client/_internal/util/hdf5.py | 80 +++++-------------- 1 file changed, 19 insertions(+), 61 deletions(-) diff --git a/python/lib/sift_client/_internal/util/hdf5.py b/python/lib/sift_client/_internal/util/hdf5.py index a4861da3d..3ea7fc03d 100644 --- a/python/lib/sift_client/_internal/util/hdf5.py +++ b/python/lib/sift_client/_internal/util/hdf5.py @@ -37,11 +37,8 @@ def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str: """Return the first matching HDF5 attribute value, or *default*.""" - for attr in candidates: - val = dataset.attrs.get(attr) - if val is not None: - return val - return default + possible = [dataset.attrs.get(attr) for attr in candidates if dataset.attrs.get(attr)] + return possible[0] if possible else default def _numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: @@ -71,16 +68,10 @@ def _visit(dataset_name: str, obj: object) -> None: if not isinstance(obj, h5py.Dataset): return - leaf_name = dataset_name.rsplit("/", 1)[-1] - # Skip root "time" dataset — it's used as the time source, not a value channel. if dataset_name == "time" and obj.parent == h5file: return - # Skip "timestamps" datasets — they're time sources, not value channels. - if leaf_name == "timestamps": - return - n_fields = len(obj.dtype.names) if obj.dtype.names else 0 if n_fields > 1: @@ -107,57 +98,24 @@ def _visit(dataset_name: str, obj: object) -> None: seen_names.add(channel_name) elif n_fields in (0, 1): - # Resolve time dataset: prefer sibling "timestamps", fall back to root "time". - group = obj.parent - time_dataset = "" - if "timestamps" in group: - group_name = dataset_name.rsplit("/", 1)[0] if "/" in dataset_name else "" - time_dataset = f"{group_name}/timestamps" if group_name else "timestamps" - elif has_root_time: - time_dataset = "time" - - # For 2D datasets (N x 2), treat column 0 as time and column 1 as value. - if obj.ndim == 2 and obj.shape[1] == 2: - channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name) - if channel_name in seen_names: - channel_name = f"{channel_name}.{dataset_name}" - - columns.append( - Hdf5DataColumn( - name=channel_name, - data_type=_numpy_to_sift_type(obj.dtype), - units=_detect_attr(obj, _UNIT_ATTRS), - description=_detect_attr(obj, _DESCRIPTION_ATTRS), - time_dataset=dataset_name, - value_dataset=dataset_name, - time_index=0, - value_index=1, - ) + # Single column. Use root "time" as time dataset if available. + channel_name = _detect_attr(obj, _NAME_ATTRS, dataset_name) + if channel_name in seen_names: + channel_name = f"{channel_name}.{dataset_name}" + + columns.append( + Hdf5DataColumn( + name=channel_name, + data_type=_numpy_to_sift_type(obj.dtype), + units=_detect_attr(obj, _UNIT_ATTRS), + description=_detect_attr(obj, _DESCRIPTION_ATTRS), + time_dataset="time" if has_root_time else "", + value_dataset=dataset_name, + time_index=0, + value_index=0, ) - seen_names.add(channel_name) - else: - # Use the group name as channel name for "values" leaf datasets. - default_name = dataset_name - if leaf_name == "values" and "/" in dataset_name: - default_name = dataset_name.rsplit("/", 1)[0] - - channel_name = _detect_attr(obj, _NAME_ATTRS, default_name) - if channel_name in seen_names: - channel_name = f"{channel_name}.{dataset_name}" - - columns.append( - Hdf5DataColumn( - name=channel_name, - data_type=_numpy_to_sift_type(obj.dtype), - units=_detect_attr(obj, _UNIT_ATTRS), - description=_detect_attr(obj, _DESCRIPTION_ATTRS), - time_dataset=time_dataset, - value_dataset=dataset_name, - time_index=0, - value_index=0, - ) - ) - seen_names.add(channel_name) + ) + seen_names.add(channel_name) h5file.visititems(_visit) From d9517a2300196ce547afd1f7d4d1c3a2e72a240c Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 10:19:41 -0700 Subject: [PATCH 04/11] update docs and generate sync stubs --- python/lib/sift_client/resources/data_imports.py | 10 +++++----- .../lib/sift_client/resources/sync_stubs/__init__.pyi | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/python/lib/sift_client/resources/data_imports.py b/python/lib/sift_client/resources/data_imports.py index 1c1a03b4b..6f77260ae 100644 --- a/python/lib/sift_client/resources/data_imports.py +++ b/python/lib/sift_client/resources/data_imports.py @@ -62,8 +62,8 @@ async def import_from_path( completion before proceeding. When ``config`` is omitted the file format is auto-detected via - ``detect_config`` (CSV and Parquet only). For other formats - (TDMS and HDF5), ``config`` must be provided. + ``detect_config`` (CSV, Parquet, and HDF5). For other formats + (TDMS), ``config`` must be provided. When ``asset`` is provided it overrides the config value; otherwise the config's ``asset_name`` is used. If neither ``run`` nor ``run_name`` is provided (and none is @@ -199,9 +199,9 @@ async def detect_config( is inferred from the file extension when ``data_type`` is not provided. - Only CSV and Parquet files are currently supported for auto-detection. - For other formats (TDMS, HDF5), create the config manually - using ``TdmsImportConfig`` or ``Hdf5ImportConfig``. + CSV, Parquet, and HDF5 files are supported for auto-detection. + For other formats (TDMS), create the config manually + using ``TdmsImportConfig``. For CSV files, the server scans the first two rows for an optional JSON metadata row. Row 1 is checked first; row 2 is checked only diff --git a/python/lib/sift_client/resources/sync_stubs/__init__.pyi b/python/lib/sift_client/resources/sync_stubs/__init__.pyi index 62fe9d87a..ea57a41de 100644 --- a/python/lib/sift_client/resources/sync_stubs/__init__.pyi +++ b/python/lib/sift_client/resources/sync_stubs/__init__.pyi @@ -653,9 +653,9 @@ class DataImportAPI: is inferred from the file extension when ``data_type`` is not provided. - Only CSV and Parquet files are currently supported for auto-detection. - For other formats (TDMS, HDF5), create the config manually - using ``TdmsImportConfig`` or ``Hdf5ImportConfig``. + CSV, Parquet, and HDF5 files are supported for auto-detection. + For other formats (TDMS), create the config manually + using ``TdmsImportConfig``. For CSV files, the server scans the first two rows for an optional JSON metadata row. Row 1 is checked first; row 2 is checked only @@ -733,8 +733,8 @@ class DataImportAPI: completion before proceeding. When ``config`` is omitted the file format is auto-detected via - ``detect_config`` (CSV and Parquet only). For other formats - (TDMS and HDF5), ``config`` must be provided. + ``detect_config`` (CSV, Parquet, and HDF5). For other formats + (TDMS), ``config`` must be provided. When ``asset`` is provided it overrides the config value; otherwise the config's ``asset_name`` is used. If neither ``run`` nor ``run_name`` is provided (and none is From 3e15a6ba64607b1a8142264a9f4058b70934a7ad Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 10:35:24 -0700 Subject: [PATCH 05/11] hdf5 tests --- .../lib/sift_client/_tests/util/test_hdf5.py | 140 ++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 python/lib/sift_client/_tests/util/test_hdf5.py diff --git a/python/lib/sift_client/_tests/util/test_hdf5.py b/python/lib/sift_client/_tests/util/test_hdf5.py new file mode 100644 index 000000000..49cda8cd8 --- /dev/null +++ b/python/lib/sift_client/_tests/util/test_hdf5.py @@ -0,0 +1,140 @@ +"""Tests for detect_hdf5_config. + +These tests verify that the client-side detect_hdf5_config matches the +backend hdf5.py detect_config 1-to-1. Any client-specific heuristics +(e.g. sibling "timestamps" resolution, 2D dataset handling, "values" +leaf naming) are intentionally NOT present and should not be added. +""" + +import h5py +import numpy as np +import pytest + +from sift_client._internal.util.hdf5 import detect_hdf5_config +from sift_client.sift_types.channel import ChannelDataType +from sift_client.sift_types.data_import import TimeFormat + + +@pytest.fixture +def create_hdf5_file(tmp_path): + """Return a helper that writes an HDF5 file and returns its path.""" + file_path = tmp_path / "test.h5" + + def _create(populate): + with h5py.File(file_path, "w") as hdf5_file: + populate(hdf5_file) + return file_path + + return _create + + +class TestDetectHdf5Config: + def test_compound_dataset(self, create_hdf5_file): + """Compound type: first field is time, remaining fields become value channels.""" + compound_dtype = np.dtype([("timestamp_ns", " Date: Tue, 14 Apr 2026 10:44:23 -0700 Subject: [PATCH 06/11] add hdf5 to mypy overrides --- python/pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/pyproject.toml b/python/pyproject.toml index d3fc9ab96..baaf34b88 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -282,6 +282,10 @@ exclude = [ # No official typing stubs for Python gRPC libraries yet. # https://github.com/grpc/grpc/issues/29041 +[[tool.mypy.overrides]] +module = "h5py" +ignore_missing_imports = true + [[tool.mypy.overrides]] module = "grpc_testing" ignore_missing_imports = true From 751fa0a3adbcee34544197ba3848f0fc6ca7ce23 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 10:48:21 -0700 Subject: [PATCH 07/11] linting --- python/lib/sift_client/_internal/util/hdf5.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lib/sift_client/_internal/util/hdf5.py b/python/lib/sift_client/_internal/util/hdf5.py index 3ea7fc03d..f2eef8c68 100644 --- a/python/lib/sift_client/_internal/util/hdf5.py +++ b/python/lib/sift_client/_internal/util/hdf5.py @@ -38,7 +38,7 @@ def _detect_attr(dataset: h5py.Dataset, candidates: list[str], default: str = "") -> str: """Return the first matching HDF5 attribute value, or *default*.""" possible = [dataset.attrs.get(attr) for attr in candidates if dataset.attrs.get(attr)] - return possible[0] if possible else default + return str(possible[0]) if possible else default def _numpy_to_sift_type(dtype: np.dtype) -> ChannelDataType: From d770ac44cddd9a58220fdd9727a6a26a56cac3b9 Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Tue, 14 Apr 2026 11:54:55 -0700 Subject: [PATCH 08/11] update tests --- .../lib/sift_client/_tests/util/test_hdf5.py | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/python/lib/sift_client/_tests/util/test_hdf5.py b/python/lib/sift_client/_tests/util/test_hdf5.py index 49cda8cd8..1f633d177 100644 --- a/python/lib/sift_client/_tests/util/test_hdf5.py +++ b/python/lib/sift_client/_tests/util/test_hdf5.py @@ -1,10 +1,4 @@ -"""Tests for detect_hdf5_config. - -These tests verify that the client-side detect_hdf5_config matches the -backend hdf5.py detect_config 1-to-1. Any client-specific heuristics -(e.g. sibling "timestamps" resolution, 2D dataset handling, "values" -leaf naming) are intentionally NOT present and should not be added. -""" +"""Tests for detect_hdf5_config.""" import h5py import numpy as np @@ -12,7 +6,6 @@ from sift_client._internal.util.hdf5 import detect_hdf5_config from sift_client.sift_types.channel import ChannelDataType -from sift_client.sift_types.data_import import TimeFormat @pytest.fixture @@ -128,13 +121,12 @@ def populate(hdf5_file): assert config.data[0].units == "V" assert config.data[0].description == "Supply voltage" - def test_returns_correct_wrapper_type(self, create_hdf5_file): - """Config wrapper uses correct time format and empty asset_name.""" + def test_unsupported_dtype_raises(self, create_hdf5_file): + """Unsupported numpy dtypes raise ValueError rather than silently dropping data.""" def populate(hdf5_file): - hdf5_file.create_dataset("x", data=np.array([1.0, 2.0])) - - config = detect_hdf5_config(create_hdf5_file(populate)) + hdf5_file.create_dataset("time", data=np.arange(5, dtype=" Date: Tue, 14 Apr 2026 14:37:15 -0700 Subject: [PATCH 09/11] moved hdf5 test file to _tests/_internal --- python/lib/sift_client/_tests/{util => _internal}/test_hdf5.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename python/lib/sift_client/_tests/{util => _internal}/test_hdf5.py (100%) diff --git a/python/lib/sift_client/_tests/util/test_hdf5.py b/python/lib/sift_client/_tests/_internal/test_hdf5.py similarity index 100% rename from python/lib/sift_client/_tests/util/test_hdf5.py rename to python/lib/sift_client/_tests/_internal/test_hdf5.py From e805edff0e844719fa038a27d90ca925b615e86d Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 16 Apr 2026 09:48:54 -0700 Subject: [PATCH 10/11] add explanation for numpy-sift conversions --- python/lib/sift_client/_internal/util/hdf5.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/lib/sift_client/_internal/util/hdf5.py b/python/lib/sift_client/_internal/util/hdf5.py index f2eef8c68..c0edfe820 100644 --- a/python/lib/sift_client/_internal/util/hdf5.py +++ b/python/lib/sift_client/_internal/util/hdf5.py @@ -29,7 +29,11 @@ np.complex64: ChannelDataType.FLOAT, np.complex128: ChannelDataType.DOUBLE, np.str_: ChannelDataType.STRING, + # HDF5/TDMS fixed-length strings are stored as np.bytes_; use STRING, not + # BYTES (np.void below handles truly opaque binary data). np.bytes_: ChannelDataType.STRING, + # Numpy uses object dtype for variable-length strings; TDMS/HDF5 files + # cannot produce non-string object arrays. np.object_: ChannelDataType.STRING, np.void: ChannelDataType.BYTES, } From 889b145456894d796e04cd731626d5639a5e716e Mon Sep 17 00:00:00 2001 From: Wei Qi Lu Date: Thu, 16 Apr 2026 10:11:16 -0700 Subject: [PATCH 11/11] documentation --- python/pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index baaf34b88..7a681373f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -168,7 +168,7 @@ file-imports = [ ] hdf5 = [ 'h5py~=3.11', - 'polars~=1.8', + 'polars~=1.8', # only used by sift_py; remove once sift_py is fully deprecated ] openssl = [ 'cffi~=1.14', @@ -219,7 +219,7 @@ openssl = ["pyOpenSSL<24.0.0", "types-pyOpenSSL<24.0.0", "cffi~=1.14"] tdms = ["npTDMS~=1.9"] rosbags = ["rosbags~=0.0"] sift-stream = ["sift-stream-bindings==0.2.2"] -hdf5 = ["h5py~=3.11", "polars~=1.8"] +hdf5 = ["h5py~=3.11", "polars~=1.8"] # polars is only used by sift_py; remove once sift_py is fully deprecated data-review = ["pyarrow>=17.0.0"] [tool.sift.extras.combine]