From c6b880199520e81c65050a2712ad540ebcb011eb Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Tue, 26 Aug 2025 16:44:08 +0200 Subject: [PATCH 01/13] Export inferred parameters related to dependencies --- .../dataset/exporters/export_to_xarray.py | 43 ++++++++++++++++--- .../test_inferred_parameters_fix.py | 21 ++++++--- 2 files changed, 53 insertions(+), 11 deletions(-) diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index dd20c988240..7e64f68e828 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -161,21 +161,52 @@ def _xarray_data_array_direct( import xarray as xr meas_paramspec = dataset.description.interdeps.graph.nodes[name]["value"] - _, deps, _ = dataset.description.interdeps.all_parameters_in_tree_by_group( + _, deps, inferred = dataset.description.interdeps.all_parameters_in_tree_by_group( meas_paramspec ) - dep_axis = {} + # Build coordinate axes from direct dependencies preserving their order + dep_axis: dict[str, npt.NDArray] = {} for axis, dep in enumerate(deps): dep_array = subdict[dep.name] dep_axis[dep.name] = dep_array[ tuple(slice(None) if i == axis else 0 for i in range(dep_array.ndim)) ] - da = xr.Dataset( + extra_coords: dict[str, tuple[tuple[str, ...], npt.NDArray]] = {} + for inf in inferred: + # skip parameters already used as primary coordinate axes + if inf.name in dep_axis: + continue + # add only if data for this parameter is available + if inf.name not in subdict: + continue + + inf_related = dataset.description.interdeps.find_all_parameters_in_tree(inf) + + related_deps = inf_related.intersection(set(deps)) + related_top_level = inf_related.intersection({meas_paramspec}) + + if len(related_top_level) > 0: + raise NotImplementedError( + "Adding inferred coords related to top level param is not yet supported" + ) + + inf_data = subdict[inf.name][ + tuple(slice(None) if dep in related_deps else 0 for dep in deps) + ] + inf_coords = [dep.name for dep in deps if dep in related_deps] + + extra_coords[inf.name] = (tuple(inf_coords), inf_data) + + # Compose coordinates dict including dependency axes and extra inferred coords + coords: dict[str, tuple[tuple[str, ...], npt.NDArray] | npt.NDArray] + coords = {**dep_axis, **extra_coords} + + ds = xr.Dataset( {name: (tuple(dep_axis.keys()), subdict[name])}, - coords=dep_axis, - )[name] - return da + coords=coords, + ) + return ds[name] def load_to_xarray_dataarray_dict( diff --git a/tests/dataset/measurement/test_inferred_parameters_fix.py b/tests/dataset/measurement/test_inferred_parameters_fix.py index 93ecc324d5c..6e61754dc2f 100644 --- a/tests/dataset/measurement/test_inferred_parameters_fix.py +++ b/tests/dataset/measurement/test_inferred_parameters_fix.py @@ -305,14 +305,25 @@ def test_inferred_parameters_in_actual_measurement_2d( ) assert "meas_parameter" in xarr.data_vars + assert "del_param_1" in xarr.coords + assert xarr.coords["del_param_1"].shape == (num_points_x,) + assert xarr.coords["del_param_1"].dims == ("del_param_1",) + assert "del_param_2" in xarr.coords - # inferred (basis) parameters not exported - assert "dummy_dac_ch1" not in xarr.coords - assert "dummy_dac_ch2" not in xarr.coords + assert xarr.coords["del_param_2"].shape == (num_points_y,) + assert xarr.coords["del_param_2"].dims == ("del_param_2",) + + assert "dummy_dac_ch1" in xarr.coords + assert xarr.coords["dummy_dac_ch1"].shape == (num_points_x,) + assert xarr.coords["dummy_dac_ch1"].dims == ("del_param_1",) + + assert "dummy_dac_ch2" in xarr.coords + assert xarr.coords["dummy_dac_ch2"].shape == (num_points_y,) + assert xarr.coords["dummy_dac_ch2"].dims == ("del_param_2",) - assert xarr.meas_parameter.dims == ("del_param_1", "del_param_2") - assert xarr.meas_parameter.shape == (num_points_x, num_points_y) + assert xarr["meas_parameter"].dims == ("del_param_1", "del_param_2") + assert xarr["meas_parameter"].shape == (num_points_x, num_points_y) # pandas export df = dataset.to_pandas_dataframe() From ea73ec109971f0b0574dae26245821c9c340dc31 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 28 Aug 2025 10:57:45 +0200 Subject: [PATCH 02/13] Add test for inference --- tests/dataset/test_dataset_export.py | 139 +++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index 04415fa3c28..d75be27c7f5 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -1848,3 +1848,142 @@ def test_netcdf_export_with_mixed_timestamp_raw( # Verify timestamp_raw values are correct assert loaded_ds.run_timestamp_raw == ds.run_timestamp_raw assert loaded_ds.completed_timestamp_raw is None + + +@given(data=hst.data()) +@settings( + max_examples=10, + suppress_health_check=(HealthCheck.function_scoped_fixture,), + deadline=None, +) +def test_measurement_hypothesis_nd_grid_with_inferred_param( + data: hst.DataObject, experiment: Experiment, caplog: LogCaptureFixture +) -> None: + """ + Randomized ND sweep using Measurement context manager with an inferred parameter: + - Draw N in [2, 4] + - For each dimension i, draw number of points n_i in [1, 5] + - Sweep each ManualParameter over a linspace of length n_i + - Choose m in [1, N-1] and a subset of m swept parameters for an inferred coord + - Register an inferred parameter depending on that subset and add its values + - Measure a deterministic function of the setpoints + - Assert xarray dims, coords (including inferred), and data match expectation + """ + # number of dimensions and points per dimension + n_dims = data.draw(hst.integers(min_value=2, max_value=4), label="n_dims") + points_per_dim = [ + data.draw(hst.integers(min_value=1, max_value=5), label=f"n_points_dim_{i}") + for i in range(n_dims) + ] + + # build setpoint arrays and names + sp_names = [f"x{i}" for i in range(n_dims)] + sp_values: list[np.ndarray] = [ + np.linspace(0.0, float(npts - 1), npts) for npts in points_per_dim + ] + + # choose subset for inferred parameter (strict subset) + m = data.draw(hst.integers(min_value=1, max_value=n_dims - 1), label="m") + inf_indices = sorted( + data.draw( + hst.lists( + hst.integers(min_value=0, max_value=n_dims - 1), + min_size=m, + max_size=m, + unique=True, + ), + label="inf_indices", + ) + ) + inf_sp_names = [sp_names[i] for i in inf_indices] + + # weights for measured signal + weights = [(i + 1) for i in range(n_dims)] + + # Setup measurement with shapes so xarray direct path is used + meas = Measurement(exp=experiment, name="nd_grid_with_inferred") + # register setpoints + for name in sp_names: + meas.register_custom_parameter(name, paramtype="numeric") + # register inferred parameter (from subset of setpoints) + meas.register_custom_parameter( + "inf", basis=tuple(inf_sp_names), paramtype="numeric" + ) + # register measured parameter depending on all setpoints + meas.register_custom_parameter( + "signal", setpoints=tuple(sp_names), paramtype="numeric" + ) + meas.set_shapes({"signal": tuple(points_per_dim)}) + + # run measurement over full grid + with meas.run() as datasaver: + # iterate over grid indices + for idx in np.ndindex(*points_per_dim): + # collect setpoint values for this point + sp_items: list[tuple[str, float]] = [ + (sp_names[k], float(sp_values[k][idx[k]])) for k in range(n_dims) + ] + # measured signal: weighted sum of all setpoints + signal_val = float( + sum(weights[k] * float(sp_values[k][idx[k]]) for k in range(n_dims)) + ) + # inferred value: sum over selected subset of setpoints + inf_val = float(sum(float(sp_values[k][idx[k]]) for k in inf_indices)) + results: list[tuple[str, float]] = [ + *sp_items, + ("inf", inf_val), + ("signal", signal_val), + ] + datasaver.add_result(*results) + + ds = datasaver.dataset + + # export to xarray and ensure direct path used + caplog.clear() + with caplog.at_level(logging.INFO): + xr_ds = ds.to_xarray_dataset() + + assert any( + "Exporting signal to xarray using direct method" in record.message + for record in caplog.records + ) + + # Expected sizes per coordinate (all setpoints) + expected_sizes = {name: len(vals) for name, vals in zip(sp_names, sp_values)} + assert xr_ds.sizes == expected_sizes + + # Check setpoint coords contents and order + for name, vals in zip(sp_names, sp_values): + assert name in xr_ds.coords + np.testing.assert_allclose(xr_ds.coords[name].values, vals) + + # Measured data dims and values + assert "signal" in xr_ds.data_vars + assert xr_ds["signal"].dims == tuple(sp_names) + + grids_all = np.meshgrid(*sp_values, indexing="ij") + expected_signal = np.zeros(tuple(points_per_dim), dtype=float) + for i, grid in enumerate(grids_all): + expected_signal += weights[i] * grid.astype(float) + np.testing.assert_allclose(xr_ds["signal"].values, expected_signal) + + # Inferred coord should be present with dims equal to the subset order + assert "inf" in xr_ds.coords + expected_inf_dims = tuple(inf_sp_names) + assert xr_ds.coords["inf"].dims == expected_inf_dims + + # Build expected inferred grid based only on the subset dims + subset_values = [sp_values[i] for i in inf_indices] + grids_subset = np.meshgrid(*subset_values, indexing="ij") if subset_values else [] + expected_inf = np.zeros(tuple(points_per_dim[i] for i in inf_indices), dtype=float) + for grid in grids_subset: + expected_inf += grid.astype(float) + np.testing.assert_allclose(xr_ds.coords["inf"].values, expected_inf) + + # The indexes of the inferred coord must correspond to the axes it depends on + # i.e., keys should match the inferred-from setpoint names, and each index equal + # to the dataset's index for that dimension + inf_indexes = xr_ds.coords["inf"].indexes + assert set(inf_indexes.keys()) == set(inf_sp_names) + for dim in inf_sp_names: + assert inf_indexes[dim].equals(xr_ds.indexes[dim]) From 14c1e9fb2e3645a85c96757bb23c8c6858031c2c Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 28 Aug 2025 13:29:58 +0200 Subject: [PATCH 03/13] Add test where loop is over infeered parameter --- tests/dataset/test_dataset_export.py | 77 ++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index d75be27c7f5..5c9007cfc62 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -1987,3 +1987,80 @@ def test_measurement_hypothesis_nd_grid_with_inferred_param( assert set(inf_indexes.keys()) == set(inf_sp_names) for dim in inf_sp_names: assert inf_indexes[dim].equals(xr_ds.indexes[dim]) + + +def test_measurement_2d_with_inferred_setpoint( + experiment: Experiment, caplog: LogCaptureFixture +) -> None: + """ + Sweep two parameters (x, y) where y is inferred from one or more basis parameters. + Verify that xarray export uses direct method, signal dims match, and basis + parameters appear as inferred coordinates with indexes corresponding to y. + """ + # Grid sizes + nx, ny = 3, 4 + x_vals = np.linspace(0.0, 2.0, nx) + # Define basis parameters for y and compute y from these + y_b0_vals = np.linspace(10.0, 13.0, ny) + y_b1_vals = np.linspace(-1.0, 2.0, ny) + # y is inferred from (y_b0, y_b1) + y_vals = y_b0_vals + 2.0 * y_b1_vals + + meas = Measurement(exp=experiment, name="2d_with_inferred_setpoint") + # Register setpoint x + meas.register_custom_parameter("x", paramtype="numeric") + # Register basis params for y + meas.register_custom_parameter("y_b0", paramtype="numeric") + meas.register_custom_parameter("y_b1", paramtype="numeric") + # Register y as setpoint inferred from basis + meas.register_custom_parameter("y", basis=("y_b0", "y_b1"), paramtype="numeric") + # Register measured parameter depending on (x, y) + meas.register_custom_parameter("signal", setpoints=("x", "y"), paramtype="numeric") + meas.set_shapes({"signal": (nx, ny)}) + + with meas.run() as datasaver: + for ix in range(nx): + for iy in range(ny): + x = float(x_vals[ix]) + y_b0 = float(y_b0_vals[iy]) + y_b1 = float(y_b1_vals[iy]) + y = float(y_vals[iy]) + signal = x + 3.0 * y # deterministic function + datasaver.add_result( + ("x", x), + ("y_b0", y_b0), + ("y_b1", y_b1), + ("y", y), + ("signal", signal), + ) + + ds = datasaver.dataset + + caplog.clear() + with caplog.at_level(logging.INFO): + xr_ds = ds.to_xarray_dataset() + + assert any( + "Exporting signal to xarray using direct method" in record.message + for record in caplog.records + ) + + # Sizes and coords + assert xr_ds.sizes == {"x": nx, "y": ny} + np.testing.assert_allclose(xr_ds.coords["x"].values, x_vals) + np.testing.assert_allclose(xr_ds.coords["y"].values, y_vals) + + # Signal dims and values + assert xr_ds["signal"].dims == ("x", "y") + expected_signal = x_vals[:, None] + 3.0 * y_vals[None, :] + np.testing.assert_allclose(xr_ds["signal"].values, expected_signal) + + # Inferred coords for y_b0 and y_b1 exist with dims only along y + for name, vals in ("y_b0", y_b0_vals), ("y_b1", y_b1_vals): + assert name in xr_ds.coords + assert xr_ds.coords[name].dims == ("y",) + np.testing.assert_allclose(xr_ds.coords[name].values, vals) + # Indexes of inferred coords should correspond to the y axis index + inf_idx = xr_ds.coords[name].indexes + assert set(inf_idx.keys()) == {"y"} + assert inf_idx["y"].equals(xr_ds.indexes["y"]) From 4a2fa75e55f81fe14b2ff2b2d12e689b6d1d5a2a Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 28 Aug 2025 14:03:06 +0200 Subject: [PATCH 04/13] Insert derived parameters in data --- .../dataset/exporters/export_to_xarray.py | 47 +++++++++++----- tests/dataset/test_dataset_export.py | 56 +++++++++++++++++++ 2 files changed, 89 insertions(+), 14 deletions(-) diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index 7e64f68e828..fcd42704726 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -173,6 +173,7 @@ def _xarray_data_array_direct( ] extra_coords: dict[str, tuple[tuple[str, ...], npt.NDArray]] = {} + extra_data_vars: dict[str, tuple[tuple[str, ...], npt.NDArray]] = {} for inf in inferred: # skip parameters already used as primary coordinate axes if inf.name in dep_axis: @@ -187,26 +188,37 @@ def _xarray_data_array_direct( related_top_level = inf_related.intersection({meas_paramspec}) if len(related_top_level) > 0: - raise NotImplementedError( - "Adding inferred coords related to top level param is not yet supported" - ) - - inf_data = subdict[inf.name][ - tuple(slice(None) if dep in related_deps else 0 for dep in deps) - ] - inf_coords = [dep.name for dep in deps if dep in related_deps] + # If inferred param is related to the top-level measurement parameter, + # add it as a data variable with the full dependency dimensions + inf_data_full = subdict[inf.name] + inf_dims_full = tuple(dep_axis.keys()) + extra_data_vars[inf.name] = (inf_dims_full, inf_data_full) + else: + # Otherwise, add as a coordinate along the related dependency axes only + inf_data = subdict[inf.name][ + tuple(slice(None) if dep in related_deps else 0 for dep in deps) + ] + inf_coords = [dep.name for dep in deps if dep in related_deps] - extra_coords[inf.name] = (tuple(inf_coords), inf_data) + extra_coords[inf.name] = (tuple(inf_coords), inf_data) # Compose coordinates dict including dependency axes and extra inferred coords coords: dict[str, tuple[tuple[str, ...], npt.NDArray] | npt.NDArray] coords = {**dep_axis, **extra_coords} - ds = xr.Dataset( - {name: (tuple(dep_axis.keys()), subdict[name])}, - coords=coords, - ) - return ds[name] + # Compose data variables dict including measured var and any inferred data vars + data_vars: dict[str, tuple[tuple[str, ...], npt.NDArray]] = { + name: (tuple(dep_axis.keys()), subdict[name]) + } + data_vars.update(extra_data_vars) + + ds = xr.Dataset(data_vars, coords=coords) + da = ds[name] + if len(extra_data_vars) > 0: + # stash extra data vars to be added at dataset assembly time + # mapping: var_name -> (dims_tuple, numpy array) + da.attrs["_qcodes_extra_data_vars"] = extra_data_vars + return da def load_to_xarray_dataarray_dict( @@ -277,6 +289,13 @@ def load_to_xarray_dataset( # and python/typing#445 are resolved. xrdataset = xr.Dataset(cast("dict[Hashable, xr.DataArray]", data_xrdarray_dict)) + # add any stashed extra data variables created during direct export + for _, dataarray in data_xrdarray_dict.items(): + extras = dataarray.attrs.pop("_qcodes_extra_data_vars", None) + if isinstance(extras, dict): + for var_name, (dims, values) in extras.items(): + xrdataset[var_name] = (dims, values) + _add_param_spec_to_xarray_coords(dataset, xrdataset) _add_param_spec_to_xarray_data_vars(dataset, xrdataset) _add_metadata_to_xarray(dataset, xrdataset) diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index 5c9007cfc62..a259b726cc6 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -2064,3 +2064,59 @@ def test_measurement_2d_with_inferred_setpoint( inf_idx = xr_ds.coords[name].indexes assert set(inf_idx.keys()) == {"y"} assert inf_idx["y"].equals(xr_ds.indexes["y"]) + + +def test_measurement_2d_top_level_inferred_is_data_var( + experiment: Experiment, caplog: LogCaptureFixture +) -> None: + """ + If an inferred parameter is related to the top-level measured parameter, + it must be exported as a data variable (not a coordinate) with the full + dependency dimensions. + """ + nx, ny = 2, 3 + x_vals = np.linspace(0.0, 1.0, nx) + y_vals = np.linspace(10.0, 12.0, ny) + + # Define a measured signal and an inferred param both defined on (x, y) + # The inferred param is related to the measured top-level param in the graph + meas = Measurement(exp=experiment, name="2d_top_level_inferred") + meas.register_custom_parameter("x", paramtype="numeric") + meas.register_custom_parameter("y", paramtype="numeric") + # Register measured top-level + meas.register_custom_parameter("signal", setpoints=("x", "y"), paramtype="numeric") + # Register inferred related to top-level (basis includes the measured top-level) + meas.register_custom_parameter("derived", basis=("signal",), paramtype="numeric") + meas.set_shapes({"signal": (nx, ny)}) + + with meas.run() as datasaver: + for ix in range(nx): + for iy in range(ny): + x = float(x_vals[ix]) + y = float(y_vals[iy]) + signal = x + y + derived = 2.0 * signal # inferred from top-level + datasaver.add_result( + ("x", x), ("y", y), ("signal", signal), ("derived", derived) + ) + + ds = datasaver.dataset + caplog.clear() + with caplog.at_level(logging.INFO): + xr_ds = ds.to_xarray_dataset() + + # Direct path log should be present + assert any( + "Exporting signal to xarray using direct method" in record.message + for record in caplog.records + ) + + # The derived param should be a data variable with dims (x, y), not a coord + assert "derived" in xr_ds.data_vars + assert "derived" not in xr_ds.coords + assert xr_ds["derived"].dims == ("x", "y") + + expected_signal = x_vals[:, None] + y_vals[None, :] + expected_derived = 2.0 * expected_signal + np.testing.assert_allclose(xr_ds["signal"].values, expected_signal) + np.testing.assert_allclose(xr_ds["derived"].values, expected_derived) From 7ce4a0251b052d84f10c5913537390420c79f0b6 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Wed, 3 Sep 2025 08:33:55 +0200 Subject: [PATCH 05/13] Merge datasets pr top level param in xarray export --- src/qcodes/dataset/data_set_cache.py | 11 +-- .../dataset/exporters/export_to_xarray.py | 73 +++++++------------ 2 files changed, 34 insertions(+), 50 deletions(-) diff --git a/src/qcodes/dataset/data_set_cache.py b/src/qcodes/dataset/data_set_cache.py index 915195020cf..dc3266bb541 100644 --- a/src/qcodes/dataset/data_set_cache.py +++ b/src/qcodes/dataset/data_set_cache.py @@ -79,7 +79,7 @@ def data(self) -> ParameterData: Loads data from the database on disk if needed and returns the cached data. The cached data is in almost the same format as :py:class:`.DataSet.get_parameter_data`. However if a shape is provided - as part of the dataset metadata and fewer datapoints than expected are + as part of the dataset metadata and fewer data points than expected are returned the missing values will be replaced by `NaN` or zeroes depending on the datatype. @@ -118,7 +118,7 @@ def _empty_data_dict( def prepare(self) -> None: """ - Set up the internal datastructure of the cache. + Set up the internal data structure of the cache. Must be called after the dataset has been setup with interdependencies but before data is added to the dataset. """ @@ -200,9 +200,10 @@ def to_xarray_dataarray_dict( """ data = self.data() - return load_to_xarray_dataarray_dict( + data_dict = load_to_xarray_dataarray_dict( self._dataset, data, use_multi_index=use_multi_index ) + return data_dict def to_xarray_dataset( self, *, use_multi_index: Literal["auto", "always", "never"] = "auto" @@ -503,11 +504,11 @@ def load_data_from_db(self) -> None: ) def _load_xr_dataset(self) -> xr.Dataset: - import cf_xarray as cfxr + import cf_xarray as cf_xr import xarray as xr loaded_data = xr.load_dataset(self._xr_dataset_path, engine="h5netcdf") - loaded_data = cfxr.coding.decode_compress_to_multi_index(loaded_data) + loaded_data = cf_xr.coding.decode_compress_to_multi_index(loaded_data) export_info = ExportInfo.from_str(loaded_data.attrs.get("export_info", "")) export_info.export_paths["nc"] = str(self._xr_dataset_path) loaded_data.attrs["export_info"] = export_info.to_str() diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index fcd42704726..5c0a86f7657 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -4,7 +4,7 @@ import warnings from importlib.metadata import version from math import prod -from typing import TYPE_CHECKING, Literal, cast +from typing import TYPE_CHECKING, Literal from packaging import version as pversion @@ -61,20 +61,18 @@ def _calculate_index_shape(idx: pd.Index | pd.MultiIndex) -> dict[Hashable, int] return expanded_shape -def _load_to_xarray_dataarray_dict_no_metadata( +def _load_to_xarray_dataset_dict_no_metadata( dataset: DataSetProtocol, datadict: Mapping[str, Mapping[str, npt.NDArray]], *, use_multi_index: Literal["auto", "always", "never"] = "auto", -) -> dict[str, xr.DataArray]: - import xarray as xr - +) -> dict[str, xr.Dataset]: if use_multi_index not in ("auto", "always", "never"): raise ValueError( f"Invalid value for use_multi_index. Expected one of 'auto', 'always', 'never' but got {use_multi_index}" ) - data_xrdarray_dict: dict[str, xr.DataArray] = {} + data_xrdarray_dict: dict[str, xr.Dataset] = {} for name, subdict in datadict.items(): shape_is_consistent = ( @@ -96,11 +94,9 @@ def _load_to_xarray_dataarray_dict_no_metadata( ) if index is None: - xrdarray: xr.DataArray = ( - _data_to_dataframe(subdict, index=index) - .to_xarray() - .get(name, xr.DataArray()) - ) + xrdarray: xr.Dataset = _data_to_dataframe( + subdict, index=index + ).to_xarray() data_xrdarray_dict[name] = xrdarray elif index_is_unique: df = _data_to_dataframe(subdict, index) @@ -109,9 +105,7 @@ def _load_to_xarray_dataarray_dict_no_metadata( ) else: df = _data_to_dataframe(subdict, index) - xrdata_temp = df.reset_index().to_xarray() - for _name in subdict: - data_xrdarray_dict[_name] = xrdata_temp[_name] + data_xrdarray_dict[name] = df.reset_index().to_xarray() return data_xrdarray_dict @@ -122,7 +116,7 @@ def _xarray_data_array_from_pandas_multi_index( name: str, df: pd.DataFrame, index: pd.Index | pd.MultiIndex, -) -> xr.DataArray: +) -> xr.Dataset: import pandas as pd import xarray as xr @@ -148,16 +142,16 @@ def _xarray_data_array_from_pandas_multi_index( ) coords = xr.Coordinates.from_pandas_multiindex(df.index, "multi_index") - xrdarray = xr.DataArray(df[name], coords=coords) + xrdarray = xr.DataArray(df[name], coords=coords).to_dataset(name=name) else: - xrdarray = df.to_xarray().get(name, xr.DataArray()) + xrdarray = df.to_xarray() return xrdarray def _xarray_data_array_direct( dataset: DataSetProtocol, name: str, subdict: Mapping[str, npt.NDArray] -) -> xr.DataArray: +) -> xr.Dataset: import xarray as xr meas_paramspec = dataset.description.interdeps.graph.nodes[name]["value"] @@ -213,12 +207,7 @@ def _xarray_data_array_direct( data_vars.update(extra_data_vars) ds = xr.Dataset(data_vars, coords=coords) - da = ds[name] - if len(extra_data_vars) > 0: - # stash extra data vars to be added at dataset assembly time - # mapping: var_name -> (dims_tuple, numpy array) - da.attrs["_qcodes_extra_data_vars"] = extra_data_vars - return da + return ds def load_to_xarray_dataarray_dict( @@ -227,17 +216,20 @@ def load_to_xarray_dataarray_dict( *, use_multi_index: Literal["auto", "always", "never"] = "auto", ) -> dict[str, xr.DataArray]: - dataarrays = _load_to_xarray_dataarray_dict_no_metadata( + xr_datasets = _load_to_xarray_dataset_dict_no_metadata( dataset, datadict, use_multi_index=use_multi_index ) + data_arrays: dict[str, xr.DataArray] = {} - for dataname, dataarray in dataarrays.items(): - _add_param_spec_to_xarray_coords(dataset, dataarray) + for dataname, xr_dataset in xr_datasets.items(): + data_array = xr_dataset[dataname] + _add_param_spec_to_xarray_coords(dataset, data_array) paramspec_dict = _paramspec_dict_with_extras(dataset, str(dataname)) - dataarray.attrs.update(paramspec_dict.items()) - _add_metadata_to_xarray(dataset, dataarray) + data_array.attrs.update(paramspec_dict.items()) + _add_metadata_to_xarray(dataset, data_array) + data_arrays[dataname] = data_array - return dataarrays + return data_arrays def _add_metadata_to_xarray( @@ -281,26 +273,17 @@ def load_to_xarray_dataset( ) -> xr.Dataset: import xarray as xr - data_xrdarray_dict = _load_to_xarray_dataarray_dict_no_metadata( + xr_dataset_dict = _load_to_xarray_dataset_dict_no_metadata( dataset, data, use_multi_index=use_multi_index ) - # Casting Hashable for the key type until python/mypy#1114 - # and python/typing#445 are resolved. - xrdataset = xr.Dataset(cast("dict[Hashable, xr.DataArray]", data_xrdarray_dict)) - - # add any stashed extra data variables created during direct export - for _, dataarray in data_xrdarray_dict.items(): - extras = dataarray.attrs.pop("_qcodes_extra_data_vars", None) - if isinstance(extras, dict): - for var_name, (dims, values) in extras.items(): - xrdataset[var_name] = (dims, values) + xr_dataset = xr.merge(xr_dataset_dict.values(), compat="equals", join="outer") - _add_param_spec_to_xarray_coords(dataset, xrdataset) - _add_param_spec_to_xarray_data_vars(dataset, xrdataset) - _add_metadata_to_xarray(dataset, xrdataset) + _add_param_spec_to_xarray_coords(dataset, xr_dataset) + _add_param_spec_to_xarray_data_vars(dataset, xr_dataset) + _add_metadata_to_xarray(dataset, xr_dataset) - return xrdataset + return xr_dataset def _add_param_spec_to_xarray_coords( From 025f90d275a754cbd494cd3c8daa54ea45ecaf1d Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Wed, 3 Sep 2025 11:58:01 +0200 Subject: [PATCH 06/13] Fix typos in export_to_xarray --- .../dataset/exporters/export_to_xarray.py | 82 ++++++++++--------- 1 file changed, 43 insertions(+), 39 deletions(-) diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index 5c0a86f7657..16c320505c2 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -6,7 +6,7 @@ from math import prod from typing import TYPE_CHECKING, Literal -from packaging import version as pversion +from packaging import version as p_version from qcodes.dataset.linked_datasets.links import links_to_str @@ -72,42 +72,42 @@ def _load_to_xarray_dataset_dict_no_metadata( f"Invalid value for use_multi_index. Expected one of 'auto', 'always', 'never' but got {use_multi_index}" ) - data_xrdarray_dict: dict[str, xr.Dataset] = {} + xr_dataset_dict: dict[str, xr.Dataset] = {} - for name, subdict in datadict.items(): + for name, sub_dict in datadict.items(): shape_is_consistent = ( dataset.description.shapes is not None and name in dataset.description.shapes - and subdict[name].shape == dataset.description.shapes[name] + and sub_dict[name].shape == dataset.description.shapes[name] ) if shape_is_consistent and use_multi_index != "always": _LOG.info("Exporting %s to xarray using direct method", name) - data_xrdarray_dict[name] = _xarray_data_array_direct(dataset, name, subdict) + xr_dataset_dict[name] = _xarray_data_array_direct(dataset, name, sub_dict) else: _LOG.info("Exporting %s to xarray via pandas index", name) index = _generate_pandas_index( - subdict, dataset.description.interdeps, top_level_param_name=name + sub_dict, dataset.description.interdeps, top_level_param_name=name ) index_is_unique = ( len(index.unique()) == len(index) if index is not None else False ) if index is None: - xrdarray: xr.Dataset = _data_to_dataframe( - subdict, index=index + xr_dataset: xr.Dataset = _data_to_dataframe( + data=sub_dict, index=index ).to_xarray() - data_xrdarray_dict[name] = xrdarray + xr_dataset_dict[name] = xr_dataset elif index_is_unique: - df = _data_to_dataframe(subdict, index) - data_xrdarray_dict[name] = _xarray_data_array_from_pandas_multi_index( + df = _data_to_dataframe(sub_dict, index) + xr_dataset_dict[name] = _xarray_data_array_from_pandas_multi_index( dataset, use_multi_index, name, df, index ) else: - df = _data_to_dataframe(subdict, index) - data_xrdarray_dict[name] = df.reset_index().to_xarray() + df = _data_to_dataframe(sub_dict, index) + xr_dataset_dict[name] = df.reset_index().to_xarray() - return data_xrdarray_dict + return xr_dataset_dict def _xarray_data_array_from_pandas_multi_index( @@ -142,15 +142,15 @@ def _xarray_data_array_from_pandas_multi_index( ) coords = xr.Coordinates.from_pandas_multiindex(df.index, "multi_index") - xrdarray = xr.DataArray(df[name], coords=coords).to_dataset(name=name) + xr_dataset = xr.DataArray(df[name], coords=coords).to_dataset(name=name) else: - xrdarray = df.to_xarray() + xr_dataset = df.to_xarray() - return xrdarray + return xr_dataset def _xarray_data_array_direct( - dataset: DataSetProtocol, name: str, subdict: Mapping[str, npt.NDArray] + dataset: DataSetProtocol, name: str, sub_dict: Mapping[str, npt.NDArray] ) -> xr.Dataset: import xarray as xr @@ -161,7 +161,7 @@ def _xarray_data_array_direct( # Build coordinate axes from direct dependencies preserving their order dep_axis: dict[str, npt.NDArray] = {} for axis, dep in enumerate(deps): - dep_array = subdict[dep.name] + dep_array = sub_dict[dep.name] dep_axis[dep.name] = dep_array[ tuple(slice(None) if i == axis else 0 for i in range(dep_array.ndim)) ] @@ -173,7 +173,7 @@ def _xarray_data_array_direct( if inf.name in dep_axis: continue # add only if data for this parameter is available - if inf.name not in subdict: + if inf.name not in sub_dict: continue inf_related = dataset.description.interdeps.find_all_parameters_in_tree(inf) @@ -184,12 +184,12 @@ def _xarray_data_array_direct( if len(related_top_level) > 0: # If inferred param is related to the top-level measurement parameter, # add it as a data variable with the full dependency dimensions - inf_data_full = subdict[inf.name] + inf_data_full = sub_dict[inf.name] inf_dims_full = tuple(dep_axis.keys()) extra_data_vars[inf.name] = (inf_dims_full, inf_data_full) else: # Otherwise, add as a coordinate along the related dependency axes only - inf_data = subdict[inf.name][ + inf_data = sub_dict[inf.name][ tuple(slice(None) if dep in related_deps else 0 for dep in deps) ] inf_coords = [dep.name for dep in deps if dep in related_deps] @@ -202,7 +202,7 @@ def _xarray_data_array_direct( # Compose data variables dict including measured var and any inferred data vars data_vars: dict[str, tuple[tuple[str, ...], npt.NDArray]] = { - name: (tuple(dep_axis.keys()), subdict[name]) + name: (tuple(dep_axis.keys()), sub_dict[name]) } data_vars.update(extra_data_vars) @@ -233,9 +233,9 @@ def load_to_xarray_dataarray_dict( def _add_metadata_to_xarray( - dataset: DataSetProtocol, xrdataset: xr.Dataset | xr.DataArray + dataset: DataSetProtocol, xr_dataset: xr.Dataset | xr.DataArray ) -> None: - xrdataset.attrs.update( + xr_dataset.attrs.update( { "ds_name": dataset.name, "sample_name": dataset.sample_name, @@ -252,17 +252,17 @@ def _add_metadata_to_xarray( } ) # Use -1 as sentinel value for None timestamps since NetCDF doesn't support None - xrdataset.attrs["run_timestamp_raw"] = ( + xr_dataset.attrs["run_timestamp_raw"] = ( dataset.run_timestamp_raw if dataset.run_timestamp_raw is not None else -1 ) - xrdataset.attrs["completed_timestamp_raw"] = ( + xr_dataset.attrs["completed_timestamp_raw"] = ( dataset.completed_timestamp_raw if dataset.completed_timestamp_raw is not None else -1 ) if len(dataset.metadata) > 0: for metadata_tag, metadata in dataset.metadata.items(): - xrdataset.attrs[metadata_tag] = metadata + xr_dataset.attrs[metadata_tag] = metadata def load_to_xarray_dataset( @@ -287,20 +287,20 @@ def load_to_xarray_dataset( def _add_param_spec_to_xarray_coords( - dataset: DataSetProtocol, xrdataset: xr.Dataset | xr.DataArray + dataset: DataSetProtocol, xr_dataset: xr.Dataset | xr.DataArray ) -> None: - for coord in xrdataset.coords: + for coord in xr_dataset.coords: if coord not in ("index", "multi_index"): paramspec_dict = _paramspec_dict_with_extras(dataset, str(coord)) - xrdataset.coords[str(coord)].attrs.update(paramspec_dict.items()) + xr_dataset.coords[str(coord)].attrs.update(paramspec_dict.items()) def _add_param_spec_to_xarray_data_vars( - dataset: DataSetProtocol, xrdataset: xr.Dataset + dataset: DataSetProtocol, xr_dataset: xr.Dataset ) -> None: - for data_var in xrdataset.data_vars: + for data_var in xr_dataset.data_vars: paramspec_dict = _paramspec_dict_with_extras(dataset, str(data_var)) - xrdataset.data_vars[str(data_var)].attrs.update(paramspec_dict.items()) + xr_dataset.data_vars[str(data_var)].attrs.update(paramspec_dict.items()) def _paramspec_dict_with_extras( @@ -318,7 +318,7 @@ def _paramspec_dict_with_extras( def xarray_to_h5netcdf_with_complex_numbers( xarray_dataset: xr.Dataset, file_path: str | Path, compute: bool = True ) -> None: - import cf_xarray as cfxr + import cf_xarray as cf_xr from pandas import MultiIndex has_multi_index = any( @@ -329,7 +329,7 @@ def xarray_to_h5netcdf_with_complex_numbers( if has_multi_index: # as of xarray 2023.8.0 there is no native support # for multi index so use cf_xarray for that - internal_ds = cfxr.coding.encode_multi_index_as_compress( + internal_ds = cf_xr.coding.encode_multi_index_as_compress( xarray_dataset, ) else: @@ -343,11 +343,15 @@ def xarray_to_h5netcdf_with_complex_numbers( # these are the versions of xarray / h5netcdf respectively required to support complex # values without fallback to invalid features. Once these are the min versions supported # we can drop the fallback code here including the warning suppression. - xarry_too_old = pversion.Version(version("xarray")) < pversion.Version("2024.10.0") - h5netcdf_too_old = pversion.Version(version("h5netcdf")) < pversion.Version("1.4.0") + xarray_too_old = p_version.Version(version("xarray")) < p_version.Version( + "2024.10.0" + ) + h5netcdf_too_old = p_version.Version(version("h5netcdf")) < p_version.Version( + "1.4.0" + ) allow_invalid_netcdf = dataset_has_complex_vals and ( - xarry_too_old or h5netcdf_too_old + xarray_too_old or h5netcdf_too_old ) with warnings.catch_warnings(): From af09b1f0d19fa6fecc26447313e36320b9b29a30 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 4 Sep 2025 11:28:26 +0200 Subject: [PATCH 07/13] Add test that coords are not duplicated --- tests/dataset/test_dataset_export.py | 57 ++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index a259b726cc6..9331d7a7687 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -2066,6 +2066,63 @@ def test_measurement_2d_with_inferred_setpoint( assert inf_idx["y"].equals(xr_ds.indexes["y"]) +def test_measurement_2d_with_inferred_setpoint_from_setpoint( + experiment: Experiment, caplog: LogCaptureFixture +) -> None: + """ + This is not a good idea but a user can do this + """ + # Grid sizes + nx, ny = 3, 4 + x_vals = np.linspace(0.0, 2.0, nx) + y_vals = np.linspace(10.0, 13.0, ny) + + meas = Measurement(exp=experiment, name="2d_with_inferred_setpoint") + # Register setpoint x + meas.register_custom_parameter("x", paramtype="numeric") + + # Register y as setpoint inferred from basis + meas.register_custom_parameter("y", basis=("x"), paramtype="numeric") + # Register measured parameter depending on (x, y) + meas.register_custom_parameter("signal", setpoints=("x", "y"), paramtype="numeric") + meas.set_shapes({"signal": (nx, ny)}) + + with meas.run() as datasaver: + for ix in range(nx): + for iy in range(ny): + x = float(x_vals[ix]) + y = float(y_vals[iy]) + signal = x + 3.0 * y # deterministic function + datasaver.add_result( + ("x", x), + ("y", y), + ("signal", signal), + ) + + ds = datasaver.dataset + + caplog.clear() + with caplog.at_level(logging.INFO): + xr_ds = ds.to_xarray_dataset() + + assert any( + "Exporting signal to xarray using direct method" in record.message + for record in caplog.records + ) + + # Sizes and coords + assert xr_ds.sizes == {"x": nx, "y": ny} + np.testing.assert_allclose(xr_ds.coords["x"].values, x_vals) + np.testing.assert_allclose(xr_ds.coords["y"].values, y_vals) + + assert len(xr_ds.coords) == 2 + + # Signal dims and values + assert xr_ds["signal"].dims == ("x", "y") + expected_signal = x_vals[:, None] + 3.0 * y_vals[None, :] + np.testing.assert_allclose(xr_ds["signal"].values, expected_signal) + + def test_measurement_2d_top_level_inferred_is_data_var( experiment: Experiment, caplog: LogCaptureFixture ) -> None: From 0d6590f945b5bd2bab74d27d829809a484cbc093 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 18 Sep 2025 14:37:21 +0200 Subject: [PATCH 08/13] Add to xarray_dataset_dict methods --- src/qcodes/dataset/data_set.py | 68 +++++++++++++++++++ src/qcodes/dataset/data_set_cache.py | 21 ++++++ src/qcodes/dataset/data_set_in_memory.py | 10 +++ src/qcodes/dataset/data_set_protocol.py | 8 +++ .../dataset/exporters/export_to_xarray.py | 19 ++++++ 5 files changed, 126 insertions(+) diff --git a/src/qcodes/dataset/data_set.py b/src/qcodes/dataset/data_set.py index 8ccd7dd91bc..f4072826698 100644 --- a/src/qcodes/dataset/data_set.py +++ b/src/qcodes/dataset/data_set.py @@ -97,6 +97,7 @@ from .exporters.export_to_xarray import ( load_to_xarray_dataarray_dict, load_to_xarray_dataset, + load_to_xarray_dataset_dict, xarray_to_h5netcdf_with_complex_numbers, ) from .subscriber import _Subscriber @@ -1031,6 +1032,73 @@ def to_xarray_dataarray_dict( return datadict + def to_xarray_dataset_dict( + self, + *params: str | ParamSpec | ParameterBase, + start: int | None = None, + end: int | None = None, + use_multi_index: Literal["auto", "always", "never"] = "auto", + ) -> dict[str, xr.Dataset]: + """ + Returns the values stored in the :class:`.DataSet` for the specified parameters + and their dependencies as a dict of :py:class:`xr.DataSet` s + Each element in the dict is indexed by the names of the requested + parameters. + + If no parameters are supplied data will be be + returned for all parameters in the :class:`.DataSet` that are not them self + dependencies of other parameters. + + If provided, the start and end arguments select a range of results + by result count (index). If the range is empty - that is, if the end is + less than or equal to the start, or if start is after the current end + of the :class:`.DataSet` - then a dict of empty :py:class:`xr.Dataset` s is + returned. + + The dependent parameters of the Dataset are normally used as coordinates of the + XArray dataframe. However if non unique values are found for the dependent parameter + values we will fall back to using an index as coordinates. + + Args: + *params: string parameter names, QCoDeS Parameter objects, and + ParamSpec objects. If no parameters are supplied data for + all parameters that are not a dependency of another + parameter will be returned. + start: start value of selection range (by result count); ignored + if None + end: end value of selection range (by results count); ignored if + None + use_multi_index: Should the data be exported using a multi index + rather than regular cartesian indexes. With regular cartesian + coordinates, the xarray dimensions are calculated from the sets or all + values along the setpoint axis of the QCoDeS dataset. Any position + in this grid not corresponding to a measured value will be filled + with a placeholder (typically NaN) potentially creating a sparse + dataset with significant storage overhead. + Multi index avoids this and is therefor better + suited for data that is known to not be on a grid. + If set to "auto" multi index will be used if projecting the data onto + a grid requires filling non measured values with NaN and the shapes + of the data has not been set in the run description. + + Returns: + Dictionary from requested parameter names to :py:class:`xr.Dataset` s + with the requested parameter(s) as a column(s) and coordinates + formed by the dependencies. + + Example: + Return a dict of xr.Dataset with + + dataset_dict = ds.to_xarray_dataset_dict() + + """ + data = self.get_parameter_data(*params, start=start, end=end) + datadict = load_to_xarray_dataset_dict( + self, data, use_multi_index=use_multi_index + ) + + return datadict + def to_xarray_dataset( self, *params: str | ParamSpec | ParameterBase, diff --git a/src/qcodes/dataset/data_set_cache.py b/src/qcodes/dataset/data_set_cache.py index dc3266bb541..c9f27ec3547 100644 --- a/src/qcodes/dataset/data_set_cache.py +++ b/src/qcodes/dataset/data_set_cache.py @@ -17,6 +17,7 @@ from .exporters.export_to_xarray import ( load_to_xarray_dataarray_dict, load_to_xarray_dataset, + load_to_xarray_dataset_dict, ) if TYPE_CHECKING: @@ -205,6 +206,26 @@ def to_xarray_dataarray_dict( ) return data_dict + def to_xarray_dataset_dict( + self, *, use_multi_index: Literal["auto", "always", "never"] = "auto" + ) -> dict[str, xr.Dataset]: + """ + Returns the values stored in the :class:`.dataset.data_set.DataSet` as a dict of + :py:class:`xr.DataArray` s + Each element in the dict is indexed by the names of the dependent parameters. + + Returns: + Dictionary from requested parameter names to :py:class:`xr.DataArray` s + with the requested parameter(s) as a column(s) and coordinates + formed by the dependencies. + + """ + data = self.data() + data_dict = load_to_xarray_dataset_dict( + self._dataset, data, use_multi_index=use_multi_index + ) + return data_dict + def to_xarray_dataset( self, *, use_multi_index: Literal["auto", "always", "never"] = "auto" ) -> xr.Dataset: diff --git a/src/qcodes/dataset/data_set_in_memory.py b/src/qcodes/dataset/data_set_in_memory.py index 4bd9bc122df..7ecfc3aa9c6 100644 --- a/src/qcodes/dataset/data_set_in_memory.py +++ b/src/qcodes/dataset/data_set_in_memory.py @@ -858,6 +858,16 @@ def to_xarray_dataarray_dict( self._warn_if_set(*params, start=start, end=end) return self.cache.to_xarray_dataarray_dict() + def to_xarray_dataset_dict( + self, + *params: str | ParamSpec | ParameterBase, + start: int | None = None, + end: int | None = None, + use_multi_index: Literal["auto", "always", "never"] = "auto", + ) -> dict[str, xr.Dataset]: + self._warn_if_set(*params, start=start, end=end) + return self.cache.to_xarray_dataset_dict(use_multi_index=use_multi_index) + def to_xarray_dataset( self, *params: str | ParamSpec | ParameterBase, diff --git a/src/qcodes/dataset/data_set_protocol.py b/src/qcodes/dataset/data_set_protocol.py index 3b3fc1e6da6..8c8d94277af 100644 --- a/src/qcodes/dataset/data_set_protocol.py +++ b/src/qcodes/dataset/data_set_protocol.py @@ -233,6 +233,14 @@ def to_xarray_dataarray_dict( use_multi_index: Literal["auto", "always", "never"] = "auto", ) -> dict[str, xr.DataArray]: ... + def to_xarray_dataset_dict( + self, + *params: str | ParamSpec | ParameterBase, + start: int | None = None, + end: int | None = None, + use_multi_index: Literal["auto", "always", "never"] = "auto", + ) -> dict[str, xr.Dataset]: ... + def to_xarray_dataset( self, *params: str | ParamSpec | ParameterBase, diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index 16c320505c2..39430d052c6 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -232,6 +232,25 @@ def load_to_xarray_dataarray_dict( return data_arrays +def load_to_xarray_dataset_dict( + dataset: DataSetProtocol, + datadict: Mapping[str, Mapping[str, npt.NDArray]], + *, + use_multi_index: Literal["auto", "always", "never"] = "auto", +) -> dict[str, xr.Dataset]: + xr_datasets = _load_to_xarray_dataset_dict_no_metadata( + dataset, datadict, use_multi_index=use_multi_index + ) + + for dataname, xr_dataset in xr_datasets.items(): + _add_param_spec_to_xarray_coords(dataset, xr_dataset[dataname]) + paramspec_dict = _paramspec_dict_with_extras(dataset, str(dataname)) + xr_dataset[dataname].attrs.update(paramspec_dict.items()) + _add_metadata_to_xarray(dataset, xr_dataset[dataname]) + + return xr_datasets + + def _add_metadata_to_xarray( dataset: DataSetProtocol, xr_dataset: xr.Dataset | xr.DataArray ) -> None: From 157bf63d5438195a831df87ac8abbed9072966f9 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 18 Sep 2025 14:47:20 +0200 Subject: [PATCH 09/13] Deprecate to_xarray_dataarray_dict --- src/qcodes/dataset/data_set.py | 10 ++++++++-- src/qcodes/dataset/data_set_cache.py | 10 ++++++++-- src/qcodes/dataset/data_set_in_memory.py | 9 +++++++-- src/qcodes/dataset/exporters/export_to_xarray.py | 6 ++++++ 4 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/qcodes/dataset/data_set.py b/src/qcodes/dataset/data_set.py index f4072826698..9ca0fdc59e6 100644 --- a/src/qcodes/dataset/data_set.py +++ b/src/qcodes/dataset/data_set.py @@ -15,6 +15,7 @@ import numpy import numpy.typing as npt from tqdm.auto import trange +from typing_extensions import deprecated import qcodes from qcodes.dataset.data_set_protocol import ( @@ -84,6 +85,7 @@ from qcodes.utils import ( NumpyJSONEncoder, ) +from qcodes.utils.deprecate import QCoDeSDeprecationWarning from .data_set_cache import DataSetCacheWithDBBackend from .data_set_in_memory import DataSetInMem, load_from_file @@ -95,7 +97,7 @@ load_to_dataframe_dict, ) from .exporters.export_to_xarray import ( - load_to_xarray_dataarray_dict, + load_to_xarray_dataarray_dict, # pyright: ignore[reportDeprecated] load_to_xarray_dataset, load_to_xarray_dataset_dict, xarray_to_h5netcdf_with_complex_numbers, @@ -965,6 +967,10 @@ def to_pandas_dataframe( datadict = self.get_parameter_data(*params, start=start, end=end) return load_to_concatenated_dataframe(datadict, self.description.interdeps) + @deprecated( + "to_xarray_dataarray_dict is deprecated, use to_xarray_dataset_dict instead", + category=QCoDeSDeprecationWarning, + ) def to_xarray_dataarray_dict( self, *params: str | ParamSpec | ParameterBase, @@ -1026,7 +1032,7 @@ def to_xarray_dataarray_dict( """ data = self.get_parameter_data(*params, start=start, end=end) - datadict = load_to_xarray_dataarray_dict( + datadict = load_to_xarray_dataarray_dict( # pyright: ignore[reportDeprecated] self, data, use_multi_index=use_multi_index ) diff --git a/src/qcodes/dataset/data_set_cache.py b/src/qcodes/dataset/data_set_cache.py index c9f27ec3547..198c156144f 100644 --- a/src/qcodes/dataset/data_set_cache.py +++ b/src/qcodes/dataset/data_set_cache.py @@ -6,16 +6,18 @@ import numpy as np import numpy.typing as npt +from typing_extensions import deprecated from qcodes.dataset.exporters.export_info import ExportInfo from qcodes.dataset.sqlite.queries import completed, load_new_data_for_rundescriber +from qcodes.utils import QCoDeSDeprecationWarning from .exporters.export_to_pandas import ( load_to_concatenated_dataframe, load_to_dataframe_dict, ) from .exporters.export_to_xarray import ( - load_to_xarray_dataarray_dict, + load_to_xarray_dataarray_dict, # pyright: ignore[reportDeprecated] load_to_xarray_dataset, load_to_xarray_dataset_dict, ) @@ -186,6 +188,10 @@ def to_pandas_dataframe(self) -> pd.DataFrame: data = self.data() return load_to_concatenated_dataframe(data, self.rundescriber.interdeps) + @deprecated( + "to_xarray_dataarray_dict is deprecated, use to_xarray_dataset_dict instead", + category=QCoDeSDeprecationWarning, + ) def to_xarray_dataarray_dict( self, *, use_multi_index: Literal["auto", "always", "never"] = "auto" ) -> dict[str, xr.DataArray]: @@ -201,7 +207,7 @@ def to_xarray_dataarray_dict( """ data = self.data() - data_dict = load_to_xarray_dataarray_dict( + data_dict = load_to_xarray_dataarray_dict( # pyright: ignore[reportDeprecated] self._dataset, data, use_multi_index=use_multi_index ) return data_dict diff --git a/src/qcodes/dataset/data_set_in_memory.py b/src/qcodes/dataset/data_set_in_memory.py index 7ecfc3aa9c6..89b465e27f0 100644 --- a/src/qcodes/dataset/data_set_in_memory.py +++ b/src/qcodes/dataset/data_set_in_memory.py @@ -11,6 +11,7 @@ import numpy as np import numpy.typing as npt +from typing_extensions import deprecated from qcodes.dataset.data_set_protocol import ( SPECS, @@ -40,7 +41,7 @@ update_parent_datasets, update_run_description, ) -from qcodes.utils import NumpyJSONEncoder +from qcodes.utils import NumpyJSONEncoder, QCoDeSDeprecationWarning from .data_set_cache import DataSetCacheDeferred, DataSetCacheInMem from .dataset_helpers import _add_run_to_runs_table @@ -848,6 +849,10 @@ def _parameters(self) -> str | None: else: return None + @deprecated( + "to_xarray_dataarray_dict is deprecated, use to_xarray_dataset_dict instead", + category=QCoDeSDeprecationWarning, + ) def to_xarray_dataarray_dict( self, *params: str | ParamSpec | ParameterBase, @@ -856,7 +861,7 @@ def to_xarray_dataarray_dict( use_multi_index: Literal["auto", "always", "never"] = "auto", ) -> dict[str, xr.DataArray]: self._warn_if_set(*params, start=start, end=end) - return self.cache.to_xarray_dataarray_dict() + return self.cache.to_xarray_dataarray_dict() # pyright: ignore[reportDeprecated] def to_xarray_dataset_dict( self, diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index 39430d052c6..02ff85712e1 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -7,8 +7,10 @@ from typing import TYPE_CHECKING, Literal from packaging import version as p_version +from typing_extensions import deprecated from qcodes.dataset.linked_datasets.links import links_to_str +from qcodes.utils import QCoDeSDeprecationWarning from ..descriptions.versioning import serialization as serial from .export_to_pandas import ( @@ -210,6 +212,10 @@ def _xarray_data_array_direct( return ds +@deprecated( + "load_to_xarray_dataarray_dict is deprecated, use load_to_xarray_dataarray_dict instead", + category=QCoDeSDeprecationWarning, +) def load_to_xarray_dataarray_dict( dataset: DataSetProtocol, datadict: Mapping[str, Mapping[str, npt.NDArray]], From 0cbbb94fe93511d8e5763a0de5c20a981678e19d Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 18 Sep 2025 15:05:50 +0200 Subject: [PATCH 10/13] Add types to export tests --- tests/dataset/test_dataset_export.py | 141 +++++++++++++++++---------- 1 file changed, 88 insertions(+), 53 deletions(-) diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index 9331d7a7687..8075146b158 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -5,7 +5,7 @@ import os import re from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd @@ -34,6 +34,7 @@ from qcodes.dataset.descriptions.dependencies import InterDependencies_ from qcodes.dataset.descriptions.param_spec import ParamSpecBase from qcodes.dataset.descriptions.versioning import serialization as serial +from qcodes.dataset.experiment_container import Experiment from qcodes.dataset.export_config import DataExportType from qcodes.dataset.exporters.export_to_pandas import _generate_pandas_index from qcodes.dataset.exporters.export_to_xarray import _calculate_index_shape @@ -41,12 +42,16 @@ from qcodes.parameters import ManualParameter, Parameter if TYPE_CHECKING: + from collections.abc import Hashable + + from pytest_mock import MockerFixture + from qcodes.dataset.data_set import DataSet from qcodes.dataset.experiment_container import Experiment @pytest.fixture(name="mock_empty_dataset") -def _make_mock_empty_dataset(experiment) -> DataSet: +def _make_mock_empty_dataset(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -76,7 +81,7 @@ def _make_mock_dataset(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_nonunique") -def _make_mock_dataset_nonunique_index(experiment) -> DataSet: +def _make_mock_dataset_nonunique_index(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -92,7 +97,7 @@ def _make_mock_dataset_nonunique_index(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_label_unit") -def _make_mock_dataset_label_unit(experiment) -> DataSet: +def _make_mock_dataset_label_unit(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric", label="x label", unit="x unit") yparam = ParamSpecBase("y", "numeric", label="y label", unit="y unit") @@ -108,7 +113,7 @@ def _make_mock_dataset_label_unit(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_complex") -def _make_mock_dataset_complex(experiment) -> DataSet: +def _make_mock_dataset_complex(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "complex") @@ -123,7 +128,7 @@ def _make_mock_dataset_complex(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_grid") -def _make_mock_dataset_grid(experiment) -> DataSet: +def _make_mock_dataset_grid(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -141,7 +146,7 @@ def _make_mock_dataset_grid(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_in_mem_grid") -def _make_mock_dataset_in_mem_grid(experiment) -> DataSetProtocol: +def _make_mock_dataset_in_mem_grid(experiment: Experiment) -> DataSetProtocol: meas = Measurement(exp=experiment, name="in_mem_ds") meas.register_custom_parameter("x", paramtype="numeric") meas.register_custom_parameter("y", paramtype="numeric") @@ -156,7 +161,7 @@ def _make_mock_dataset_in_mem_grid(experiment) -> DataSetProtocol: @pytest.fixture(name="mock_dataset_grid_with_shapes") -def _make_mock_dataset_grid_with_shapes(experiment) -> DataSet: +def _make_mock_dataset_grid_with_shapes(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -174,7 +179,7 @@ def _make_mock_dataset_grid_with_shapes(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_grid_incomplete") -def _make_mock_dataset_grid_incomplete(experiment) -> DataSet: +def _make_mock_dataset_grid_incomplete(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -199,7 +204,7 @@ def _make_mock_dataset_grid_incomplete(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_grid_incomplete_with_shapes") -def _make_mock_dataset_grid_incomplete_with_shapes(experiment) -> DataSet: +def _make_mock_dataset_grid_incomplete_with_shapes(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -224,7 +229,7 @@ def _make_mock_dataset_grid_incomplete_with_shapes(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_numpy") -def _make_mock_dataset_numpy(experiment) -> DataSet: +def _make_mock_dataset_numpy(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric", label="x label", unit="x unit") yparam = ParamSpecBase("y", "array", label="y label", unit="y unit") @@ -242,7 +247,7 @@ def _make_mock_dataset_numpy(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_numpy_complex") -def _make_mock_dataset_numpy_complex(experiment) -> DataSet: +def _make_mock_dataset_numpy_complex(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric", label="x label", unit="x unit") yparam = ParamSpecBase("y", "array", label="y label", unit="y unit") @@ -260,7 +265,7 @@ def _make_mock_dataset_numpy_complex(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_non_grid") -def _make_mock_dataset_non_grid(experiment) -> DataSet: +def _make_mock_dataset_non_grid(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -285,7 +290,7 @@ def _make_mock_dataset_non_grid(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_non_grid_in_mem") -def _make_mock_dataset_non_grid_in_mem(experiment) -> DataSetProtocol: +def _make_mock_dataset_non_grid_in_mem(experiment: Experiment) -> DataSetProtocol: meas = Measurement(exp=experiment, name="in_mem_ds") num_samples = 50 @@ -308,7 +313,7 @@ def _make_mock_dataset_non_grid_in_mem(experiment) -> DataSetProtocol: @pytest.fixture(name="mock_dataset_non_grid_in_grid") -def _make_mock_dataset_non_grid_in_grid(experiment) -> DataSet: +def _make_mock_dataset_non_grid_in_grid(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") y1param = ParamSpecBase("y1", "numeric") @@ -333,7 +338,7 @@ def _make_mock_dataset_non_grid_in_grid(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_grid_in_non_grid") -def _make_mock_dataset_grid_in_non_grid(experiment) -> DataSet: +def _make_mock_dataset_grid_in_non_grid(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") x1param = ParamSpecBase("x1", "numeric") x2param = ParamSpecBase("x2", "numeric") @@ -358,7 +363,7 @@ def _make_mock_dataset_grid_in_non_grid(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_non_grid_in_non_grid") -def _make_mock_dataset_non_grid_in_non_grid(experiment) -> DataSet: +def _make_mock_dataset_non_grid_in_non_grid(experiment: Experiment) -> DataSet: dataset = new_data_set("dataset") x1param = ParamSpecBase("x1", "numeric") x2param = ParamSpecBase("x2", "numeric") @@ -389,7 +394,7 @@ def _make_mock_dataset_non_grid_in_non_grid(experiment) -> DataSet: @pytest.fixture(name="mock_dataset_inverted_coords") -def _make_mock_dataset_inverted_coords(experiment) -> DataSet: +def _make_mock_dataset_inverted_coords(experiment: Experiment) -> DataSet: # this dataset is constructed such # that the two z parameters have inverted # coordinates. You almost certainly @@ -416,7 +421,7 @@ def _make_mock_dataset_inverted_coords(experiment) -> DataSet: @pytest.mark.usefixtures("experiment") -def test_write_data_to_text_file_save(tmp_path_factory) -> None: +def test_write_data_to_text_file_save(tmp_path_factory: TempPathFactory) -> None: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -436,7 +441,7 @@ def test_write_data_to_text_file_save(tmp_path_factory) -> None: def test_write_data_to_text_file_save_multi_keys( - tmp_path_factory, mock_dataset + tmp_path_factory: TempPathFactory, mock_dataset: DataSet ) -> None: tmp_path = tmp_path_factory.mktemp("data_to_text_file_save_multi_keys") path = str(tmp_path) @@ -449,7 +454,7 @@ def test_write_data_to_text_file_save_multi_keys( def test_write_data_to_text_file_save_single_file( - tmp_path_factory, mock_dataset + tmp_path_factory: TempPathFactory, mock_dataset: DataSet ) -> None: tmp_path = tmp_path_factory.mktemp("to_text_file_save_single_file") path = str(tmp_path) @@ -462,7 +467,7 @@ def test_write_data_to_text_file_save_single_file( @pytest.mark.usefixtures("experiment") -def test_write_data_to_text_file_length_exception(tmp_path) -> None: +def test_write_data_to_text_file_length_exception(tmp_path: Path) -> None: dataset = new_data_set("dataset") xparam = ParamSpecBase("x", "numeric") yparam = ParamSpecBase("y", "numeric") @@ -486,7 +491,9 @@ def test_write_data_to_text_file_length_exception(tmp_path) -> None: ) -def test_write_data_to_text_file_name_exception(tmp_path, mock_dataset) -> None: +def test_write_data_to_text_file_name_exception( + tmp_path: Path, mock_dataset: DataSet +) -> None: temp_dir = str(tmp_path) with pytest.raises(Exception, match="desired file name"): mock_dataset.write_data_to_text_file( @@ -494,7 +501,9 @@ def test_write_data_to_text_file_name_exception(tmp_path, mock_dataset) -> None: ) -def test_export_csv(tmp_path_factory, mock_dataset, caplog: LogCaptureFixture) -> None: +def test_export_csv( + tmp_path_factory: TempPathFactory, mock_dataset: DataSet, caplog: LogCaptureFixture +) -> None: tmp_path = tmp_path_factory.mktemp("export_csv") path = str(tmp_path) with caplog.at_level(logging.INFO): @@ -515,7 +524,7 @@ def test_export_csv(tmp_path_factory, mock_dataset, caplog: LogCaptureFixture) - def test_export_netcdf( - tmp_path_factory, mock_dataset, caplog: LogCaptureFixture + tmp_path_factory: TempPathFactory, mock_dataset: DataSet, caplog: LogCaptureFixture ) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") path = str(tmp_path) @@ -546,7 +555,7 @@ def test_export_netcdf( def test_export_netcdf_default_dir( - tmp_path_factory: TempPathFactory, mock_dataset + tmp_path_factory: TempPathFactory, mock_dataset: DataSet ) -> None: qcodes.config.dataset.export_path = "{db_location}" mock_dataset.export(export_type="netcdf", prefix="qcodes_") @@ -562,7 +571,9 @@ def test_export_netcdf_default_dir( assert exported_dir == get_data_export_path() -def test_export_netcdf_csv(tmp_path_factory, mock_dataset) -> None: +def test_export_netcdf_csv( + tmp_path_factory: TempPathFactory, mock_dataset: DataSet +) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") path = str(tmp_path) csv_path = os.path.join( @@ -602,7 +613,9 @@ def test_export_netcdf_csv(tmp_path_factory, mock_dataset) -> None: assert loaded_new_xr_ds.attrs["metadata_added_after_export_2"] == 696 -def test_export_netcdf_complex_data(tmp_path_factory, mock_dataset_complex) -> None: +def test_export_netcdf_complex_data( + tmp_path_factory: TempPathFactory, mock_dataset_complex: DataSet +) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") path = str(tmp_path) mock_dataset_complex.export(export_type="netcdf", path=path, prefix="qcodes_") @@ -620,7 +633,7 @@ def test_export_netcdf_complex_data(tmp_path_factory, mock_dataset_complex) -> N def test_export_no_or_nonexistent_type_specified( - tmp_path_factory, mock_dataset + tmp_path_factory: TempPathFactory, mock_dataset: DataSet ) -> None: with pytest.raises(ValueError, match="No data export type specified"): mock_dataset.export() @@ -629,7 +642,9 @@ def test_export_no_or_nonexistent_type_specified( mock_dataset.export(export_type="foo") -def test_export_from_config(tmp_path_factory, mock_dataset, mocker) -> None: +def test_export_from_config( + tmp_path_factory: TempPathFactory, mock_dataset: DataSet, mocker: MockerFixture +) -> None: tmp_path = tmp_path_factory.mktemp("export_from_config") path = str(tmp_path) mock_type = mocker.patch("qcodes.dataset.data_set_protocol.get_data_export_type") @@ -643,7 +658,7 @@ def test_export_from_config(tmp_path_factory, mock_dataset, mocker) -> None: def test_export_from_config_set_name_elements( - tmp_path_factory, mock_dataset, mocker + tmp_path_factory: TempPathFactory, mock_dataset: DataSet, mocker: MockerFixture ) -> None: tmp_path = tmp_path_factory.mktemp("export_from_config") path = str(tmp_path) @@ -844,7 +859,7 @@ def test_partally_overlapping_setpoint_xarray_export_two_params_partial( assert filtered_data.shape == (5, 2) -def test_export_to_xarray_dataset_empty_ds(mock_empty_dataset) -> None: +def test_export_to_xarray_dataset_empty_ds(mock_empty_dataset: DataSet) -> None: ds = mock_empty_dataset.to_xarray_dataset() assert len(ds) == 2 assert len(ds.coords) == 1 @@ -852,7 +867,7 @@ def test_export_to_xarray_dataset_empty_ds(mock_empty_dataset) -> None: _assert_xarray_metadata_is_as_expected(ds, mock_empty_dataset) -def test_export_to_xarray_dataarray_empty_ds(mock_empty_dataset) -> None: +def test_export_to_xarray_dataarray_empty_ds(mock_empty_dataset: DataSet) -> None: dad = mock_empty_dataset.to_xarray_dataarray_dict() assert len(dad) == 2 assert len(dad["y"].coords) == 1 @@ -861,7 +876,7 @@ def test_export_to_xarray_dataarray_empty_ds(mock_empty_dataset) -> None: assert "x" in dad["z"].coords -def test_export_to_xarray(mock_dataset) -> None: +def test_export_to_xarray(mock_dataset: DataSet) -> None: ds = mock_dataset.to_xarray_dataset() assert len(ds) == 2 assert "index" not in ds.coords @@ -870,7 +885,7 @@ def test_export_to_xarray(mock_dataset) -> None: def test_export_to_xarray_non_unique_dependent_parameter( - mock_dataset_nonunique, + mock_dataset_nonunique: DataSet, ) -> None: """When x (the dependent parameter) contains non unique values it cannot be used as coordinates in xarray so check that we fall back to using an counter as index""" @@ -884,7 +899,7 @@ def test_export_to_xarray_non_unique_dependent_parameter( assert "snapshot" not in ds[array_name].attrs.keys() -def test_export_to_xarray_extra_metadata(mock_dataset) -> None: +def test_export_to_xarray_extra_metadata(mock_dataset: DataSet) -> None: mock_dataset.add_metadata("mytag", "somestring") mock_dataset.add_metadata("myothertag", 1) ds = mock_dataset.to_xarray_dataset() @@ -895,7 +910,7 @@ def test_export_to_xarray_extra_metadata(mock_dataset) -> None: assert "snapshot" not in ds[array_name].attrs.keys() -def test_export_to_xarray_ds_dict_extra_metadata(mock_dataset) -> None: +def test_export_to_xarray_ds_dict_extra_metadata(mock_dataset: DataSet) -> None: mock_dataset.add_metadata("mytag", "somestring") mock_dataset.add_metadata("myothertag", 1) da_dict = mock_dataset.to_xarray_dataarray_dict() @@ -904,7 +919,9 @@ def test_export_to_xarray_ds_dict_extra_metadata(mock_dataset) -> None: _assert_xarray_metadata_is_as_expected(datarray, mock_dataset) -def test_export_to_xarray_extra_metadata_can_be_stored(mock_dataset, tmp_path) -> None: +def test_export_to_xarray_extra_metadata_can_be_stored( + mock_dataset: DataSet, tmp_path: Path +) -> None: nt_metadata = { "foo": { "bar": {"baz": "test"}, @@ -938,7 +955,9 @@ def test_export_to_xarray_extra_metadata_can_be_stored(mock_dataset, tmp_path) - assert loaded_data.attrs == data_as_xarray.attrs -def test_to_xarray_ds_paramspec_metadata_is_preserved(mock_dataset_label_unit) -> None: +def test_to_xarray_ds_paramspec_metadata_is_preserved( + mock_dataset_label_unit: DataSet, +) -> None: xr_ds = mock_dataset_label_unit.to_xarray_dataset() assert len(xr_ds.dims) == 1 for param_name in xr_ds.dims: @@ -952,7 +971,7 @@ def test_to_xarray_ds_paramspec_metadata_is_preserved(mock_dataset_label_unit) - def test_to_xarray_da_dict_paramspec_metadata_is_preserved( - mock_dataset_label_unit, + mock_dataset_label_unit: DataSet, ) -> None: xr_das = mock_dataset_label_unit.to_xarray_dataarray_dict() @@ -996,7 +1015,9 @@ def test_export_2d_dataset( def test_export_dataset_small_no_delated( - tmp_path_factory: TempPathFactory, mock_dataset_numpy: DataSet, caplog + tmp_path_factory: TempPathFactory, + mock_dataset_numpy: DataSet, + caplog: LogCaptureFixture, ) -> None: """ Test that a 'small' dataset does not use the delayed export. @@ -1009,7 +1030,9 @@ def test_export_dataset_small_no_delated( def test_export_dataset_delayed_off_by_default( - tmp_path_factory: TempPathFactory, mock_dataset_grid: DataSet, caplog + tmp_path_factory: TempPathFactory, + mock_dataset_grid: DataSet, + caplog: LogCaptureFixture, ) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") qcodes.config.dataset.export_chunked_threshold = 0 @@ -1021,7 +1044,9 @@ def test_export_dataset_delayed_off_by_default( def test_export_dataset_delayed_numeric( - tmp_path_factory: TempPathFactory, mock_dataset_grid: DataSet, caplog + tmp_path_factory: TempPathFactory, + mock_dataset_grid: DataSet, + caplog: LogCaptureFixture, ) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") qcodes.config.dataset.export_chunked_threshold = 0 @@ -1057,7 +1082,9 @@ def test_export_dataset_delayed_numeric( def test_export_dataset_delayed( - tmp_path_factory: TempPathFactory, mock_dataset_numpy: DataSet, caplog + tmp_path_factory: TempPathFactory, + mock_dataset_numpy: DataSet, + caplog: LogCaptureFixture, ) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") qcodes.config.dataset.export_chunked_threshold = 0 @@ -1094,7 +1121,9 @@ def test_export_dataset_delayed( def test_export_dataset_delayed_complex( - tmp_path_factory: TempPathFactory, mock_dataset_numpy_complex: DataSet, caplog + tmp_path_factory: TempPathFactory, + mock_dataset_numpy_complex: DataSet, + caplog: LogCaptureFixture, ) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") qcodes.config.dataset.export_chunked_threshold = 0 @@ -1357,7 +1386,7 @@ def test_export_non_grid_in_non_grid_dataset( def test_inverted_coords_perserved_on_netcdf_roundtrip( - tmp_path_factory: TempPathFactory, mock_dataset_inverted_coords + tmp_path_factory: TempPathFactory, mock_dataset_inverted_coords: DataSet ) -> None: tmp_path = tmp_path_factory.mktemp("export_netcdf") path = str(tmp_path) @@ -1381,7 +1410,7 @@ def test_inverted_coords_perserved_on_netcdf_roundtrip( assert xr_ds.identical(xr_ds_reimported) -def _get_expected_param_spec_attrs(dataset, dim): +def _get_expected_param_spec_attrs(dataset: DataSet, dim: Hashable) -> dict[str, Any]: expected_attrs = dict(dataset.paramspecs[str(dim)]._to_dict()) expected_attrs["units"] = expected_attrs["unit"] expected_attrs["long_name"] = expected_attrs["label"] @@ -1389,7 +1418,9 @@ def _get_expected_param_spec_attrs(dataset, dim): return expected_attrs -def _assert_xarray_metadata_is_as_expected(xarray_ds, qc_dataset): +def _assert_xarray_metadata_is_as_expected( + xarray_ds: xr.Dataset | xr.DataArray, qc_dataset: DataSet +) -> None: assert xarray_ds.ds_name == qc_dataset.name assert xarray_ds.sample_name == qc_dataset.sample_name assert xarray_ds.exp_name == qc_dataset.exp_name @@ -1412,7 +1443,7 @@ def _assert_xarray_metadata_is_as_expected(xarray_ds, qc_dataset): ) -def test_multi_index_options_grid(mock_dataset_grid) -> None: +def test_multi_index_options_grid(mock_dataset_grid: DataSet) -> None: assert mock_dataset_grid.description.shapes is None xds = mock_dataset_grid.to_xarray_dataset() @@ -1428,7 +1459,9 @@ def test_multi_index_options_grid(mock_dataset_grid) -> None: assert xds_always.sizes == {"multi_index": 50} -def test_multi_index_options_grid_with_shape(mock_dataset_grid_with_shapes) -> None: +def test_multi_index_options_grid_with_shape( + mock_dataset_grid_with_shapes: DataSet, +) -> None: assert mock_dataset_grid_with_shapes.description.shapes == {"z": (10, 5)} xds = mock_dataset_grid_with_shapes.to_xarray_dataset() @@ -1446,7 +1479,9 @@ def test_multi_index_options_grid_with_shape(mock_dataset_grid_with_shapes) -> N assert xds_always.sizes == {"multi_index": 50} -def test_multi_index_options_incomplete_grid(mock_dataset_grid_incomplete) -> None: +def test_multi_index_options_incomplete_grid( + mock_dataset_grid_incomplete: DataSet, +) -> None: assert mock_dataset_grid_incomplete.description.shapes is None xds = mock_dataset_grid_incomplete.to_xarray_dataset() @@ -1760,7 +1795,7 @@ def test_dond_hypothesis_nd_grid( def test_netcdf_export_with_none_timestamp_raw( - tmp_path_factory: TempPathFactory, experiment + tmp_path_factory: TempPathFactory, experiment: Experiment ) -> None: """ Test that datasets with None timestamp_raw values export correctly to NetCDF @@ -1803,7 +1838,7 @@ def test_netcdf_export_with_none_timestamp_raw( def test_netcdf_export_with_mixed_timestamp_raw( - tmp_path_factory: TempPathFactory, experiment + tmp_path_factory: TempPathFactory, experiment: Experiment ) -> None: """ Test NetCDF export/import with one timestamp_raw being None and one being set. From a8136682df03d04def34f8a4d4f49054c562824b Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Thu, 18 Sep 2025 15:14:26 +0200 Subject: [PATCH 11/13] Wip update tests --- .../dataset/exporters/export_to_xarray.py | 9 ++-- tests/dataset/test_dataset_export.py | 49 +++++++++++++++++-- 2 files changed, 48 insertions(+), 10 deletions(-) diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index 02ff85712e1..bda5aa77c6a 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -248,11 +248,10 @@ def load_to_xarray_dataset_dict( dataset, datadict, use_multi_index=use_multi_index ) - for dataname, xr_dataset in xr_datasets.items(): - _add_param_spec_to_xarray_coords(dataset, xr_dataset[dataname]) - paramspec_dict = _paramspec_dict_with_extras(dataset, str(dataname)) - xr_dataset[dataname].attrs.update(paramspec_dict.items()) - _add_metadata_to_xarray(dataset, xr_dataset[dataname]) + for xr_dataset in xr_datasets.values(): + _add_param_spec_to_xarray_coords(dataset, xr_dataset) + _add_param_spec_to_xarray_data_vars(dataset, xr_dataset) + _add_metadata_to_xarray(dataset, xr_dataset) return xr_datasets diff --git a/tests/dataset/test_dataset_export.py b/tests/dataset/test_dataset_export.py index 8075146b158..59477a87a2b 100644 --- a/tests/dataset/test_dataset_export.py +++ b/tests/dataset/test_dataset_export.py @@ -40,6 +40,7 @@ from qcodes.dataset.exporters.export_to_xarray import _calculate_index_shape from qcodes.dataset.linked_datasets.links import links_to_str from qcodes.parameters import ManualParameter, Parameter +from qcodes.utils.deprecate import QCoDeSDeprecationWarning if TYPE_CHECKING: from collections.abc import Hashable @@ -868,7 +869,17 @@ def test_export_to_xarray_dataset_empty_ds(mock_empty_dataset: DataSet) -> None: def test_export_to_xarray_dataarray_empty_ds(mock_empty_dataset: DataSet) -> None: - dad = mock_empty_dataset.to_xarray_dataarray_dict() + with pytest.warns(QCoDeSDeprecationWarning, match="to_xarray_dataarray_dict"): + dad = mock_empty_dataset.to_xarray_dataarray_dict() # pyright: ignore[reportDeprecated] + assert len(dad) == 2 + assert len(dad["y"].coords) == 1 + assert "x" in dad["y"].coords + assert len(dad["z"].coords) == 1 + assert "x" in dad["z"].coords + + +def test_export_to_xarray_dataset_dict_empty_ds(mock_empty_dataset: DataSet) -> None: + dad = mock_empty_dataset.to_xarray_dataset_dict() assert len(dad) == 2 assert len(dad["y"].coords) == 1 assert "x" in dad["y"].coords @@ -910,10 +921,20 @@ def test_export_to_xarray_extra_metadata(mock_dataset: DataSet) -> None: assert "snapshot" not in ds[array_name].attrs.keys() +def test_export_to_xarray_da_dict_extra_metadata(mock_dataset: DataSet) -> None: + mock_dataset.add_metadata("mytag", "somestring") + mock_dataset.add_metadata("myothertag", 1) + with pytest.warns(QCoDeSDeprecationWarning, match="to_xarray_dataarray_dict"): + da_dict = mock_dataset.to_xarray_dataarray_dict() # pyright: ignore[reportDeprecated] + + for datarray in da_dict.values(): + _assert_xarray_metadata_is_as_expected(datarray, mock_dataset) + + def test_export_to_xarray_ds_dict_extra_metadata(mock_dataset: DataSet) -> None: mock_dataset.add_metadata("mytag", "somestring") mock_dataset.add_metadata("myothertag", 1) - da_dict = mock_dataset.to_xarray_dataarray_dict() + da_dict = mock_dataset.to_xarray_dataset_dict() for datarray in da_dict.values(): _assert_xarray_metadata_is_as_expected(datarray, mock_dataset) @@ -973,7 +994,8 @@ def test_to_xarray_ds_paramspec_metadata_is_preserved( def test_to_xarray_da_dict_paramspec_metadata_is_preserved( mock_dataset_label_unit: DataSet, ) -> None: - xr_das = mock_dataset_label_unit.to_xarray_dataarray_dict() + with pytest.warns(QCoDeSDeprecationWarning, match="to_xarray_dataarray_dict"): + xr_das = mock_dataset_label_unit.to_xarray_dataarray_dict() # pyright: ignore[reportDeprecated] for outer_param_name, xr_da in xr_das.items(): for param_name in xr_da.dims: @@ -987,6 +1009,23 @@ def test_to_xarray_da_dict_paramspec_metadata_is_preserved( assert xr_da.attrs[spec_name] == spec_value +def test_to_xarray_ds_dict_paramspec_metadata_is_preserved( + mock_dataset_label_unit: DataSet, +) -> None: + xr_das = mock_dataset_label_unit.to_xarray_dataset_dict() + + for outer_param_name, xr_da in xr_das.items(): + for param_name in xr_da.dims: + assert xr_da.coords[param_name].attrs == _get_expected_param_spec_attrs( + mock_dataset_label_unit, param_name + ) + expected_param_spec_attrs = _get_expected_param_spec_attrs( + mock_dataset_label_unit, outer_param_name + ) + for spec_name, spec_value in expected_param_spec_attrs.items(): + assert xr_da[outer_param_name].attrs[spec_name] == spec_value + + def test_export_2d_dataset( tmp_path_factory: TempPathFactory, mock_dataset_grid: DataSet ) -> None: @@ -1597,7 +1636,7 @@ def test_geneate_pandas_index() -> None: @given( function_name=hst.sampled_from( [ - "to_xarray_dataarray_dict", + "to_xarray_dataset_dict", "to_pandas_dataframe", "to_pandas_dataframe_dict", "get_parameter_data", @@ -1642,7 +1681,7 @@ def test_export_lazy_load( @given( function_name=hst.sampled_from( [ - "to_xarray_dataarray_dict", + "to_xarray_dataset_dict", "to_pandas_dataframe", "to_pandas_dataframe_dict", "get_parameter_data", From 49cb1b6388f80d20f6cf5709662655a051ab4f73 Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Tue, 23 Sep 2025 11:04:02 +0200 Subject: [PATCH 12/13] Rename private methods to reflect type --- src/qcodes/dataset/exporters/export_to_xarray.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/qcodes/dataset/exporters/export_to_xarray.py b/src/qcodes/dataset/exporters/export_to_xarray.py index bda5aa77c6a..773c91c3844 100644 --- a/src/qcodes/dataset/exporters/export_to_xarray.py +++ b/src/qcodes/dataset/exporters/export_to_xarray.py @@ -85,7 +85,7 @@ def _load_to_xarray_dataset_dict_no_metadata( if shape_is_consistent and use_multi_index != "always": _LOG.info("Exporting %s to xarray using direct method", name) - xr_dataset_dict[name] = _xarray_data_array_direct(dataset, name, sub_dict) + xr_dataset_dict[name] = _xarray_data_set_direct(dataset, name, sub_dict) else: _LOG.info("Exporting %s to xarray via pandas index", name) index = _generate_pandas_index( @@ -102,7 +102,7 @@ def _load_to_xarray_dataset_dict_no_metadata( xr_dataset_dict[name] = xr_dataset elif index_is_unique: df = _data_to_dataframe(sub_dict, index) - xr_dataset_dict[name] = _xarray_data_array_from_pandas_multi_index( + xr_dataset_dict[name] = _xarray_data_set_from_pandas_multi_index( dataset, use_multi_index, name, df, index ) else: @@ -112,7 +112,7 @@ def _load_to_xarray_dataset_dict_no_metadata( return xr_dataset_dict -def _xarray_data_array_from_pandas_multi_index( +def _xarray_data_set_from_pandas_multi_index( dataset: DataSetProtocol, use_multi_index: Literal["auto", "always", "never"], name: str, @@ -151,7 +151,7 @@ def _xarray_data_array_from_pandas_multi_index( return xr_dataset -def _xarray_data_array_direct( +def _xarray_data_set_direct( dataset: DataSetProtocol, name: str, sub_dict: Mapping[str, npt.NDArray] ) -> xr.Dataset: import xarray as xr From 946f52a788611209c7cf12f44feb8f361a03149d Mon Sep 17 00:00:00 2001 From: "Jens H. Nielsen" Date: Tue, 23 Sep 2025 11:25:28 +0200 Subject: [PATCH 13/13] Add types to tests --- .../test_inferred_parameters_fix.py | 65 +++++++++++++------ 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/tests/dataset/measurement/test_inferred_parameters_fix.py b/tests/dataset/measurement/test_inferred_parameters_fix.py index 6e61754dc2f..effeb079d64 100644 --- a/tests/dataset/measurement/test_inferred_parameters_fix.py +++ b/tests/dataset/measurement/test_inferred_parameters_fix.py @@ -9,9 +9,12 @@ from typing import TYPE_CHECKING import numpy as np +import pytest from qcodes.dataset import Measurement from qcodes.dataset.descriptions.detect_shapes import detect_shape_of_measurement +from qcodes.dataset.experiment_container import Experiment +from qcodes.instrument_drivers.mock_instruments import DummyInstrument from qcodes.parameters import DelegateParameter, ManualParameter, Parameter if TYPE_CHECKING: @@ -21,7 +24,9 @@ from qcodes.instrument_drivers.mock_instruments import DummyInstrument -def test_inferred_parameters_transitively_collected(experiment, DAC): +def test_inferred_parameters_transitively_collected( + experiment: "Experiment", DAC: "DummyInstrument" +) -> None: """ Test that parameters inferred from dependencies are properly collected when enqueuing results. @@ -80,7 +85,9 @@ def test_inferred_parameters_transitively_collected(experiment, DAC): ) -def test_inferred_parameters_in_actual_measurement_0d(experiment, DAC): +def test_inferred_parameters_in_actual_measurement_0d( + experiment: "Experiment", DAC: "DummyInstrument" +) -> None: """ Test the full measurement flow to ensure inferred parameters are saved correctly. """ @@ -136,7 +143,9 @@ def test_inferred_parameters_in_actual_measurement_0d(experiment, DAC): assert len(param_data["del_param_1"]["dummy_dac_ch1"]) == 1 -def test_inferred_parameters_in_actual_measurement_1d(experiment, DAC): +def test_inferred_parameters_in_actual_measurement_1d( + experiment: "Experiment", DAC: "DummyInstrument" +) -> None: """ Test the full measurement flow to ensure inferred parameters are saved correctly. """ @@ -223,8 +232,12 @@ def test_inferred_parameters_in_actual_measurement_1d(experiment, DAC): assert len(df) == num_points +@pytest.mark.parametrize("set_shape", [True, False]) def test_inferred_parameters_in_actual_measurement_2d( - experiment: "Experiment", DAC: "DummyInstrument", caplog: "LogCaptureFixture" + experiment: "Experiment", + DAC: "DummyInstrument", + caplog: "LogCaptureFixture", + set_shape: bool, ) -> None: """ 2D version: both axes are DelegateParameters inferred from DAC channels. @@ -252,9 +265,10 @@ def test_inferred_parameters_in_actual_measurement_2d( # Register measurement parameter with 2D setpoints meas.register_parameter(meas_parameter, setpoints=(del_param_1, del_param_2)) - meas.set_shapes( - detect_shape_of_measurement([meas_parameter], (num_points_x, num_points_y)) - ) + if set_shape: + meas.set_shapes( + detect_shape_of_measurement([meas_parameter], (num_points_x, num_points_y)) + ) with meas.run() as datasaver: for x in np.linspace(0, 1, num_points_x): @@ -299,10 +313,16 @@ def test_inferred_parameters_in_actual_measurement_2d( xarr = dataset.to_xarray_dataset() assert len(caplog.records) == 1 - assert ( - caplog.records[0].message - == "Exporting meas_parameter to xarray using direct method" - ) + if set_shape: + assert ( + caplog.records[0].message + == "Exporting meas_parameter to xarray using direct method" + ) + else: + assert ( + caplog.records[0].message + == "Exporting meas_parameter to xarray via pandas index" + ) assert "meas_parameter" in xarr.data_vars @@ -314,14 +334,17 @@ def test_inferred_parameters_in_actual_measurement_2d( assert xarr.coords["del_param_2"].shape == (num_points_y,) assert xarr.coords["del_param_2"].dims == ("del_param_2",) - assert "dummy_dac_ch1" in xarr.coords - assert xarr.coords["dummy_dac_ch1"].shape == (num_points_x,) - assert xarr.coords["dummy_dac_ch1"].dims == ("del_param_1",) - - assert "dummy_dac_ch2" in xarr.coords - assert xarr.coords["dummy_dac_ch2"].shape == (num_points_y,) - assert xarr.coords["dummy_dac_ch2"].dims == ("del_param_2",) - + if set_shape: + assert "dummy_dac_ch1" in xarr.coords + assert xarr.coords["dummy_dac_ch1"].shape == (num_points_x,) + assert xarr.coords["dummy_dac_ch1"].dims == ("del_param_1",) + + assert "dummy_dac_ch2" in xarr.coords + assert xarr.coords["dummy_dac_ch2"].shape == (num_points_y,) + assert xarr.coords["dummy_dac_ch2"].dims == ("del_param_2",) + else: + assert "dummy_dac_ch1" not in xarr.coords + assert "dummy_dac_ch2" not in xarr.coords assert xarr["meas_parameter"].dims == ("del_param_1", "del_param_2") assert xarr["meas_parameter"].shape == (num_points_x, num_points_y) @@ -334,7 +357,9 @@ def test_inferred_parameters_in_actual_measurement_2d( assert len(df) == total_points -def test_multiple_dependent_parameters_no_cross_contamination(experiment): +def test_multiple_dependent_parameters_no_cross_contamination( + experiment: "Experiment", +) -> None: """ Test that multiple dependent parameters that depend on the same independent parameter don't get mixed into each other's trees.