From d5081d949da30d237f20de34cc8a9b8e1bb9fdd0 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Mon, 25 Sep 2023 16:11:51 -0400
Subject: [PATCH 01/12] lazy save_to_netcdf

---
 xscen/io.py | 44 +++++++++++++++++++-------------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/xscen/io.py b/xscen/io.py
index 6bde0cc4..b859ae3f 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -274,12 +274,24 @@ def clean_incomplete(path: Union[str, os.PathLike], complete: Sequence[str]) ->
             sh.rmtree(fold)
 
 
+def _coerce_attrs(attrs):
+    """Ensure no funky objects in attrs."""
+    for k in list(attrs.keys()):
+        if not (
+            isinstance(attrs[k], (str, float, int, np.ndarray))
+            or isinstance(attrs[k], (tuple, list))
+            and isinstance(attrs[k][0], (str, float, int))
+        ):
+            attrs[k] = str(attrs[k])
+
+
 @parse_config
 def save_to_netcdf(
     ds: xr.Dataset,
     filename: str,
     *,
     rechunk: Optional[dict] = None,
+    compute: bool = True,
     netcdf_kwargs: Optional[dict] = None,
 ) -> None:
     """Save a Dataset to NetCDF, rechunking if requested.
@@ -295,6 +307,8 @@ def save_to_netcdf(
         Spatial dimensions can be generalized as 'X' and 'Y', which will be mapped to the actual grid type's
         dimension names.
         Rechunking is only done on *data* variables sharing dimensions with this argument.
+    compute : bool
+        Whether to start the computation or return a delayed object.
     netcdf_kwargs : dict, optional
         Additional arguments to send to_netcdf()
 
@@ -317,21 +331,11 @@ def save_to_netcdf(
     netcdf_kwargs.setdefault("engine", "h5netcdf")
     netcdf_kwargs.setdefault("format", "NETCDF4")
 
-    # Ensure no funky objects in attrs:
-    def coerce_attrs(attrs):
-        for k in attrs.keys():
-            if not (
-                isinstance(attrs[k], (str, float, int, np.ndarray))
-                or isinstance(attrs[k], (tuple, list))
-                and isinstance(attrs[k][0], (str, float, int))
-            ):
-                attrs[k] = str(attrs[k])
-
-    coerce_attrs(ds.attrs)
+    _coerce_attrs(ds.attrs)
     for var in ds.variables.values():
-        coerce_attrs(var.attrs)
+        _coerce_attrs(var.attrs)
 
-    ds.to_netcdf(filename, **netcdf_kwargs)
+    return ds.to_netcdf(filename, compute=compute, **netcdf_kwargs)
 
 
 @parse_config
@@ -438,19 +442,9 @@ def _skip(var):
     if len(ds.data_vars) == 0:
         return None
 
-    # Ensure no funky objects in attrs:
-    def coerce_attrs(attrs):
-        for k in list(attrs.keys()):
-            if not (
-                isinstance(attrs[k], (str, float, int, np.ndarray))
-                or isinstance(attrs[k], (tuple, list))
-                and isinstance(attrs[k][0], (str, float, int))
-            ):
-                attrs[k] = str(attrs[k])
-
-    coerce_attrs(ds.attrs)
+    _coerce_attrs(ds.attrs)
     for var in ds.variables.values():
-        coerce_attrs(var.attrs)
+        _coerce_attrs(var.attrs)
 
     if itervar:
         zarr_kwargs["compute"] = True

From ba8d219f770a9cb99cee06cce59f213209966f00 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Tue, 26 Sep 2023 16:28:52 -0400
Subject: [PATCH 02/12] Complete version

---
 xscen/io.py | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 223 insertions(+), 1 deletion(-)

diff --git a/xscen/io.py b/xscen/io.py
index b859ae3f..06e0575d 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -4,11 +4,12 @@
 import shutil as sh
 from collections.abc import Sequence
 from pathlib import Path
-from typing import Optional, Union
+from typing import List, Optional, Union
 
 import h5py
 import netCDF4
 import numpy as np
+import pandas as pd
 import xarray as xr
 import zarr
 from rechunker import rechunk as _rechunk
@@ -487,6 +488,227 @@ def _skip(var):
             raise
 
 
+def _to_dataframe(
+    data: xr.DataArray,
+    index: list[str],
+    column: list[str],
+    coords: list[str],
+    coords_dims: dict,
+):
+    """Convert a DataArray to a DataFrame with support for MultiColumn."""
+    df = data.to_dataframe()
+    if not column:
+        # Fast track for the easy case where xarray's default is already what we want.
+        return df
+    df_data = (
+        df[[data.name]]
+        .reset_index()
+        .pivot(index=index, columns=column)
+        .droplevel(None, axis=1)
+    )
+    dfs = []
+    for v in coords:
+        drop_cols = [c for c in column if c not in coords_dims[v]]
+        cols = [c for c in column if c in coords_dims[v]]
+        dfc = (
+            df[[v]]
+            .reset_index()
+            .drop(columns=drop_cols)
+            .pivot(index=index, columns=cols)
+        )
+        cols = dfc.columns
+        # The "None" level has the aux coord name we want it either at the same level as variable, or at lowest missing level otherwise.
+        varname_lvl = "variable" if "variable" in drop_cols else drop_cols[-1]
+        cols = cols.rename(
+            varname_lvl
+            if not isinstance(cols, pd.MultiIndex)
+            else [nm or varname_lvl for nm in cols.name]
+        )
+        if isinstance(df_data.columns, pd.MultiIndex) or isinstance(
+            cols, pd.MultiIndex
+        ):
+            # handle different depth of multicolumns, expand MultiCol of coord with None for missing levels.
+            cols = pd.MultiIndex.from_arrays(
+                [
+                    cols.get_level_values(lvl) if lvl in cols.names else [None]
+                    for lvl in df_data.columns.names
+                ]
+            )
+        dfc.columns = cols
+        dfs.append(
+            dfc[~dfc.index.duplicated()]
+        )  # We dropped columns thus the index is not unique anymore
+    dfs.append(df_data)
+    return pd.concat(dfs, axis=1)
+
+
+def to_table(
+    ds: Union[xr.Dataset, xr.DataArray],
+    *,
+    index: Union[None, str, Sequence[str]] = None,
+    column: Union[None, str, Sequence[str]] = None,
+    sheet: Union[None, str, Sequence[str]] = None,
+    coords: Union[bool, Sequence[str]] = True,
+) -> Union[pd.DataFrame, dict]:
+    """Convert a dataset to a pandas DataFrame with support for multicolumns and multisheet.
+
+    This function will trigger a computation of the dataset.
+
+    Parameters
+    ----------
+    ds : xr.Dataset or xr.DataArray
+      Dataset or DataArray to be saved.
+      If a Dataset with more than one variable is given, the dimension "variable"
+      must appear in one of `index`, `column` or `sheet`.
+    index : str or sequence of str, optional
+      Name of the dimension(s) to use as index.
+      Default is all data dimensions.
+    column : str or sequence of str, optional
+      Name of the dimension(s) to use as index.
+      Default is "variable", i.e. the name of the variable(s).
+    sheet : str or sequence of str, optional
+      Name of the dimension(s) to use as sheet names.
+    coords: bool or sequence of str
+      A list of auxiliary coordinates to add to the columns (as would variables).
+      If True, all (if any) are added.
+
+    Returns
+    -------
+    pd.DataFrame or dict
+      DataFrame with a MultiIndex with levels `index` and MultiColumn with levels `column`.
+      If `sheet` is given, the output is dictionary with keys for each unique "sheet" dimensions tuple, values are DataFrames.
+    """
+    if isinstance(ds, xr.Dataset):
+        da = ds.to_array(name="data")
+        if len(ds) == 1:
+            da = da.isel(variable=0).rename(data=da.variable.values[0])
+
+    def _ensure_list(seq):
+        if isinstance(seq, str):
+            return [seq]
+        return list(seq)
+
+    index = _ensure_list(index or (set(da.dims) - {"variable"}))
+    column = _ensure_list(column or (["variable"] if len(ds) > 1 else []))
+    sheet = _ensure_list(sheet or [])
+
+    needed_dims = index + column + sheet
+    if len(set(needed_dims)) != len(needed_dims):
+        raise ValueError(
+            f"Repeated dimension names. Got index={index}, column={column} and sheet={sheet}."
+            "Each dimension should appear only once."
+        )
+    if set(needed_dims) != set(da.dims):
+        raise ValueError(
+            f"Passed index, column and sheet do not match available dimensions. Got {needed_dims}, data has {da.dims}."
+        )
+
+    coords = coords or []
+    if coords is not True:
+        drop = set(ds.coords.keys()) - set(da.dims) - set(coords)
+        da = da.drop_vars(drop)
+    else:
+        coords = list(set(ds.coords.keys()) - set(da.dims))
+    if len(coords) > 1 and "variable" in index:
+        raise NotImplementedError(
+            "Keeping auxiliary coords is implemented when 'variable' is in the index."
+        )
+
+    table_kwargs = dict(
+        index=index,
+        column=column,
+        coords=coords,
+        coords_dims={c: ds[c].dims for c in coords},
+    )
+    if sheet:
+        out = {}
+        das = da.stack(sheet=sheet)
+        for elem in das.sheet:
+            out[elem.item()] = _to_dataframe(
+                das.sel(sheet=elem, drop=True), **table_kwargs
+            )
+        return out
+    return _to_dataframe(da, **table_kwargs)
+
+
+TABLE_FORMATS = {".csv": "csv", ".xls": "excel", ".xlsx": "excel"}
+
+
+def save_to_table(
+    ds: Union[xr.Dataset, xr.DataArray],
+    filename: str,
+    output_format: Optional[str] = None,
+    *,
+    index: Union[None, str, Sequence[str]] = None,
+    column: Union[None, str, Sequence[str]] = "variable",
+    sheet: Union[None, str, Sequence[str]] = None,
+    coords: Union[bool, Sequence[str]] = True,
+    sep: str = "_",
+    **kwargs,
+):
+    """Save the dataset to a tabular file (csv, excel, ...).
+
+    This function will trigger a computation of the dataset.
+
+    Parameters
+    ----------
+    ds : xr.Dataset or xr.DataArray
+      Dataset or DataArray to be saved.
+      If a Dataset with more than one variable is given, the dimension "variable"
+      must appear in one of `index`, `column` or `sheet`.
+    filename : str
+      Name of the file to be saved.
+    output_format: {'csv', 'excel', ...}, optional
+      The output format. If None (default), it is inferred
+      from the extension of `filename`. Not all possible output format are supported for inference.
+      Valid values are any that matches a :py:class:`pandas.DataFrame` method like "df.to_{format}".
+    index : str or sequence of str, optional
+      Name of the dimension(s) to use as index.
+      Default is all data dimensions.
+    column : str or sequence of str, optional
+      Name of the dimension(s) to use as index.
+      Default is "variable", i.e. the name of the variable(s).
+    sheet : str or sequence of str, optional
+      Name of the dimension(s) to use as sheet names.
+      Only valid if the output format is excel.
+    coords: bool or sequence of str
+      A list of auxiliary coordinates to add to the columns (as would variables).
+      If True, all (if any) are added.
+    sep : str
+      For output formats other than excel and for sheet names,
+      index, column and sheet names from multiple dimensions are
+      constructed by concatenating values with this separator.
+    kwargs:
+      Other arguments passed to the panda function.
+    """
+    filename = Path(filename)
+
+    if output_format is None:
+        output_format = TABLE_FORMATS.get(filename.suffix)
+    if output_format is None:
+        raise ValueError(
+            f"Output format could not be inferred from filename {filename.name}. Please pass `output_format`."
+        )
+
+    if sheet is not None and output_format != "excel":
+        raise ValueError(
+            f"Argument `sheet` is only valid with excel as the output format. Got {output_format}."
+        )
+
+    out = to_table(ds, index=index, column=column, sheet=sheet, coords=coords)
+
+    if sheet:
+        with pd.ExcelWriter(filename, engine=kwargs.get("engine")) as writer:
+            for sheet_name, df in out.items():
+                df.to_excel(writer, sheet_name=sep.join(sheet_name), **kwargs)
+    else:
+        if isinstance(out.columns, pd.MultiIndex):
+            out.columns = out.columns.map(lambda lvls: sep.join(map(str, lvls)))
+        if isinstance(out.index, pd.MultiIndex):
+            out.index = out.index.map(lambda lvls: sep.join(map(str, lvls)))
+        getattr(out, f"to_{output_format}")(filename, **kwargs)
+
+
 def rechunk_for_saving(ds, rechunk):
     """Rechunk before saving to .zarr or .nc, generalized as Y/X for different axes lat/lon, rlat/rlon.
 

From e3c9fd81ab213443ea435543ff58d3ad4b456510 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Tue, 26 Sep 2023 16:30:21 -0400
Subject: [PATCH 03/12] upd hist

---
 HISTORY.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/HISTORY.rst b/HISTORY.rst
index 56c6c41b..75b1ccef 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -15,6 +15,7 @@ New features and enhancements
 * Added the ability to search for simulations that reach a given warming level. (:pull:`251`).
 * ``xs.spatial_mean`` now accepts the ``region="global"`` keyword to perform a global average (:issue:`94`, :pull:`260`).
 * ``xs.spatial_mean`` with ``method='xESMF'`` will also automatically segmentize polygons (down to a 1° resolution) to ensure a correct average (:pull:`260`).
+* ``xs.io.save_to_table`` and ``xs.io.to_table`` to transform datasets and arrays to DataFrames, but with support for multi-columns and multi-sheets.
 
 Breaking changes
 ^^^^^^^^^^^^^^^^

From ed97cd81476befb20e2ae1b3461378b94803d0ac Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 28 Sep 2023 11:52:03 -0400
Subject: [PATCH 04/12] Add a test - season sort key

---
 tests/test_io.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 xscen/io.py      | 16 ++++++++++------
 xscen/utils.py   | 36 ++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/tests/test_io.py b/tests/test_io.py
index 6fa6efaa..ef9359f3 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+import xarray as xr
 
 import xscen as xs
 
@@ -66,3 +67,49 @@ def test_variables(self, datablock_3d):
         for v in ds_ch.data_vars:
             for dim, chunks in zip(list(ds.dims), ds_ch[v].chunks):
                 assert chunks[0] == new_chunks[v][dim]
+
+
+class TestToTable:
+    ds = xs.utils.unstack_dates(
+        xr.merge(
+            [
+                xs.testing.datablock_3d(
+                    np.random.random_sample((20, 3, 2)),
+                    v,
+                    "lon",
+                    0,
+                    "lat",
+                    0,
+                    1,
+                    1,
+                    "1993-01-01",
+                    "QS-JAN",
+                )
+                for v in ["tas", "pr", "snw"]
+            ]
+        )
+        .stack(site=["lat", "lon"])
+        .reset_index("site")
+        .assign_coords(site=list("abcdef"))
+    ).transpose("season", "time", "site")
+
+    def test_normal(self):
+        # Default
+        tab = xs.io.to_table(self.ds)
+        assert tab.shape == (120, 5)  # 3 vars + 2 aux coords
+        assert tab.columns.names == ["variable"]
+        assert tab.index.names == ["season", "time", "site"]
+        # Season order is chronological, rather than alphabetical
+        assert tab.xs("1993", level="time").xs(
+            "a", level="site"
+        ).index.get_level_values("season") == ["JFM", "AMJ", "JAS", "OND"]
+
+        # Variable in the index, thus no coords
+        tab = xs.io.to_table(
+            self.ds, index=["time", "variable"], column=["season", "site"], coords=False
+        )
+        assert tab.shape == (15, 24)
+        assert tab.columns.names == ["season", "site"]
+        np.testing.assert_array_equal(
+            tab.loc[("1993", "pr"), ("JFM",)], self.ds.pr.sel(time="1993", season="JFM")
+        )
diff --git a/xscen/io.py b/xscen/io.py
index 06e0575d..11cb8d2f 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -4,7 +4,7 @@
 import shutil as sh
 from collections.abc import Sequence
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Optional, Union
 
 import h5py
 import netCDF4
@@ -17,7 +17,7 @@
 
 from .config import parse_config
 from .scripting import TimeoutException
-from .utils import translate_time_chunk
+from .utils import season_sort_key, translate_time_chunk
 
 logger = logging.getLogger(__name__)
 
@@ -27,10 +27,12 @@
     "estimate_chunks",
     "get_engine",
     "rechunk",
+    "rechunk_for_saving",
+    "save_to_table",
     "save_to_netcdf",
     "save_to_zarr",
     "subset_maxsize",
-    "rechunk_for_saving",
+    "to_table",
 ]
 
 
@@ -532,14 +534,15 @@ def _to_dataframe(
                 [
                     cols.get_level_values(lvl) if lvl in cols.names else [None]
                     for lvl in df_data.columns.names
-                ]
+                ],
+                names=df_data.columns.names,
             )
         dfc.columns = cols
         dfs.append(
             dfc[~dfc.index.duplicated()]
         )  # We dropped columns thus the index is not unique anymore
     dfs.append(df_data)
-    return pd.concat(dfs, axis=1)
+    return pd.concat(dfs, axis=1).sort_index(level=index, key=season_sort_key)
 
 
 def to_table(
@@ -577,6 +580,7 @@ def to_table(
     pd.DataFrame or dict
       DataFrame with a MultiIndex with levels `index` and MultiColumn with levels `column`.
       If `sheet` is given, the output is dictionary with keys for each unique "sheet" dimensions tuple, values are DataFrames.
+      The DataFrames are always sorted with level priority as given in `index` and in ascending order,.
     """
     if isinstance(ds, xr.Dataset):
         da = ds.to_array(name="data")
@@ -611,7 +615,7 @@ def _ensure_list(seq):
         coords = list(set(ds.coords.keys()) - set(da.dims))
     if len(coords) > 1 and "variable" in index:
         raise NotImplementedError(
-            "Keeping auxiliary coords is implemented when 'variable' is in the index."
+            "Keeping auxiliary coords is not implemented when 'variable' is in the index. Pass `coords=False` or put 'variable' in `column` instead."
         )
 
     table_kwargs = dict(
diff --git a/xscen/utils.py b/xscen/utils.py
index cf1e2571..0ad5a790 100644
--- a/xscen/utils.py
+++ b/xscen/utils.py
@@ -1238,3 +1238,39 @@ def standardize_periods(periods, multiple=True):
                 f"'period' should be a single instance of [start, end], received {len(periods)}."
             )
         return periods[0]
+
+
+def season_sort_key(idx: pd.Index, name: str = None):
+    """Get a proper sort key for a "season"  or "month" index to avoid alphabetical sorting.
+
+    If any of the values in the index is not recognized as a 3-letter
+    season code or a 3-letter month abbreviation, the operation is
+    aborted and the index is returned untouched.
+    DJF is the first season of the year.
+
+    Parameters
+    ----------
+    idx : pd.Index
+      Any array that implements a `map` method.
+      If name is "month", index elements are expected to be 3-letter month abbreviations, uppercase (JAN, FEB, etc).
+      If name is "season", index elements are expected to be 3-letter season abbreviations, uppercase (DJF, AMJ, OND, etc.)
+      If anything else, the index is returned untouched.
+    name : str, optional
+      The index name. By default, the `name` attribute of the index is used, if present.
+
+    Returns
+    -------
+    idx : Integer sort key for months and seasons, the input index untouched otherwise.
+    """
+    try:
+        if (name or getattr(idx, "name", None)) == "season":
+            m = "DJFMAMJJASONDJ"
+            return idx.map(m.index)
+        if (name or getattr(idx, "name", None)) == "month":
+            m = list(xr.coding.cftime_offsets._MONTH_ABBREVIATIONS.values())
+            return idx.map(m.index)
+    except (ValueError, TypeError):
+        # ValueError if string not in seasons, or value not in months
+        # TypeError if season element was not a string.
+        pass
+    return idx

From 3d47138b20ef10a12e416425bfc7d5899c6eaf60 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 28 Sep 2023 12:25:31 -0400
Subject: [PATCH 05/12] Rename index to row to avoid name collision with pandas
 args

---
 tests/test_io.py |  2 +-
 xscen/io.py      | 80 ++++++++++++++++++++++++++----------------------
 2 files changed, 44 insertions(+), 38 deletions(-)

diff --git a/tests/test_io.py b/tests/test_io.py
index ef9359f3..5b7f3d1a 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -106,7 +106,7 @@ def test_normal(self):
 
         # Variable in the index, thus no coords
         tab = xs.io.to_table(
-            self.ds, index=["time", "variable"], column=["season", "site"], coords=False
+            self.ds, row=["time", "variable"], column=["season", "site"], coords=False
         )
         assert tab.shape == (15, 24)
         assert tab.columns.names == ["season", "site"]
diff --git a/xscen/io.py b/xscen/io.py
index 11cb8d2f..dcf163e2 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -492,7 +492,7 @@ def _skip(var):
 
 def _to_dataframe(
     data: xr.DataArray,
-    index: list[str],
+    row: list[str],
     column: list[str],
     coords: list[str],
     coords_dims: dict,
@@ -505,7 +505,7 @@ def _to_dataframe(
     df_data = (
         df[[data.name]]
         .reset_index()
-        .pivot(index=index, columns=column)
+        .pivot(index=row, columns=column)
         .droplevel(None, axis=1)
     )
     dfs = []
@@ -513,10 +513,7 @@ def _to_dataframe(
         drop_cols = [c for c in column if c not in coords_dims[v]]
         cols = [c for c in column if c in coords_dims[v]]
         dfc = (
-            df[[v]]
-            .reset_index()
-            .drop(columns=drop_cols)
-            .pivot(index=index, columns=cols)
+            df[[v]].reset_index().drop(columns=drop_cols).pivot(index=row, columns=cols)
         )
         cols = dfc.columns
         # The "None" level has the aux coord name we want it either at the same level as variable, or at lowest missing level otherwise.
@@ -542,13 +539,13 @@ def _to_dataframe(
             dfc[~dfc.index.duplicated()]
         )  # We dropped columns thus the index is not unique anymore
     dfs.append(df_data)
-    return pd.concat(dfs, axis=1).sort_index(level=index, key=season_sort_key)
+    return pd.concat(dfs, axis=1).sort_index(level=row, key=season_sort_key)
 
 
 def to_table(
     ds: Union[xr.Dataset, xr.DataArray],
     *,
-    index: Union[None, str, Sequence[str]] = None,
+    row: Union[None, str, Sequence[str]] = None,
     column: Union[None, str, Sequence[str]] = None,
     sheet: Union[None, str, Sequence[str]] = None,
     coords: Union[bool, Sequence[str]] = True,
@@ -562,12 +559,12 @@ def to_table(
     ds : xr.Dataset or xr.DataArray
       Dataset or DataArray to be saved.
       If a Dataset with more than one variable is given, the dimension "variable"
-      must appear in one of `index`, `column` or `sheet`.
-    index : str or sequence of str, optional
-      Name of the dimension(s) to use as index.
+      must appear in one of `row`, `column` or `sheet`.
+    row : str or sequence of str, optional
+      Name of the dimension(s) to use as indexes (rows).
       Default is all data dimensions.
     column : str or sequence of str, optional
-      Name of the dimension(s) to use as index.
+      Name of the dimension(s) to use as columns.
       Default is "variable", i.e. the name of the variable(s).
     sheet : str or sequence of str, optional
       Name of the dimension(s) to use as sheet names.
@@ -578,9 +575,9 @@ def to_table(
     Returns
     -------
     pd.DataFrame or dict
-      DataFrame with a MultiIndex with levels `index` and MultiColumn with levels `column`.
+      DataFrame with a MultiIndex with levels `row` and MultiColumn with levels `column`.
       If `sheet` is given, the output is dictionary with keys for each unique "sheet" dimensions tuple, values are DataFrames.
-      The DataFrames are always sorted with level priority as given in `index` and in ascending order,.
+      The DataFrames are always sorted with level priority as given in `row` and in ascending order,.
     """
     if isinstance(ds, xr.Dataset):
         da = ds.to_array(name="data")
@@ -592,19 +589,19 @@ def _ensure_list(seq):
             return [seq]
         return list(seq)
 
-    index = _ensure_list(index or (set(da.dims) - {"variable"}))
+    row = _ensure_list(row or (set(da.dims) - {"variable"}))
     column = _ensure_list(column or (["variable"] if len(ds) > 1 else []))
     sheet = _ensure_list(sheet or [])
 
-    needed_dims = index + column + sheet
+    needed_dims = row + column + sheet
     if len(set(needed_dims)) != len(needed_dims):
         raise ValueError(
-            f"Repeated dimension names. Got index={index}, column={column} and sheet={sheet}."
+            f"Repeated dimension names. Got row={row}, column={column} and sheet={sheet}."
             "Each dimension should appear only once."
         )
     if set(needed_dims) != set(da.dims):
         raise ValueError(
-            f"Passed index, column and sheet do not match available dimensions. Got {needed_dims}, data has {da.dims}."
+            f"Passed row, column and sheet do not match available dimensions. Got {needed_dims}, data has {da.dims}."
         )
 
     coords = coords or []
@@ -613,13 +610,14 @@ def _ensure_list(seq):
         da = da.drop_vars(drop)
     else:
         coords = list(set(ds.coords.keys()) - set(da.dims))
-    if len(coords) > 1 and "variable" in index:
+    if len(coords) > 1 and ("variable" in row or "variable" in sheet):
         raise NotImplementedError(
-            "Keeping auxiliary coords is not implemented when 'variable' is in the index. Pass `coords=False` or put 'variable' in `column` instead."
+            "Keeping auxiliary coords is not implemented when 'variable' is in the row or in the sheets."
+            "Pass `coords=False` or put 'variable' in `column` instead."
         )
 
     table_kwargs = dict(
-        index=index,
+        row=row,
         column=column,
         coords=coords,
         coords_dims={c: ds[c].dims for c in coords},
@@ -643,11 +641,12 @@ def save_to_table(
     filename: str,
     output_format: Optional[str] = None,
     *,
-    index: Union[None, str, Sequence[str]] = None,
+    row: Union[None, str, Sequence[str]] = None,
     column: Union[None, str, Sequence[str]] = "variable",
     sheet: Union[None, str, Sequence[str]] = None,
     coords: Union[bool, Sequence[str]] = True,
-    sep: str = "_",
+    col_sep: str = "_",
+    row_sep: str = None,
     **kwargs,
 ):
     """Save the dataset to a tabular file (csv, excel, ...).
@@ -659,18 +658,18 @@ def save_to_table(
     ds : xr.Dataset or xr.DataArray
       Dataset or DataArray to be saved.
       If a Dataset with more than one variable is given, the dimension "variable"
-      must appear in one of `index`, `column` or `sheet`.
+      must appear in one of `row`, `column` or `sheet`.
     filename : str
       Name of the file to be saved.
     output_format: {'csv', 'excel', ...}, optional
       The output format. If None (default), it is inferred
       from the extension of `filename`. Not all possible output format are supported for inference.
       Valid values are any that matches a :py:class:`pandas.DataFrame` method like "df.to_{format}".
-    index : str or sequence of str, optional
-      Name of the dimension(s) to use as index.
+    row : str or sequence of str, optional
+      Name of the dimension(s) to use as indexes (rows).
       Default is all data dimensions.
     column : str or sequence of str, optional
-      Name of the dimension(s) to use as index.
+      Name of the dimension(s) to use as columns.
       Default is "variable", i.e. the name of the variable(s).
     sheet : str or sequence of str, optional
       Name of the dimension(s) to use as sheet names.
@@ -678,10 +677,11 @@ def save_to_table(
     coords: bool or sequence of str
       A list of auxiliary coordinates to add to the columns (as would variables).
       If True, all (if any) are added.
-    sep : str
-      For output formats other than excel and for sheet names,
-      index, column and sheet names from multiple dimensions are
-      constructed by concatenating values with this separator.
+    col_sep : str,
+      Multi-columns (except in excel) and sheet names are concatenated with this separator.
+    row_sep : str, optional
+      Multi-index names are concatenated with this separator, except in excel.
+      If None (default), each level is written in its own column.
     kwargs:
       Other arguments passed to the panda function.
     """
@@ -699,17 +699,23 @@ def save_to_table(
             f"Argument `sheet` is only valid with excel as the output format. Got {output_format}."
         )
 
-    out = to_table(ds, index=index, column=column, sheet=sheet, coords=coords)
+    out = to_table(ds, row=row, column=column, sheet=sheet, coords=coords)
 
     if sheet:
         with pd.ExcelWriter(filename, engine=kwargs.get("engine")) as writer:
             for sheet_name, df in out.items():
-                df.to_excel(writer, sheet_name=sep.join(sheet_name), **kwargs)
+                df.to_excel(writer, sheet_name=col_sep.join(sheet_name), **kwargs)
     else:
-        if isinstance(out.columns, pd.MultiIndex):
-            out.columns = out.columns.map(lambda lvls: sep.join(map(str, lvls)))
-        if isinstance(out.index, pd.MultiIndex):
-            out.index = out.index.map(lambda lvls: sep.join(map(str, lvls)))
+        if output_format != "excel" and isinstance(out.columns, pd.MultiIndex):
+            out.columns = out.columns.map(lambda lvls: col_sep.join(map(str, lvls)))
+        if (
+            output_format != "excel"
+            and row_sep is not None
+            and isinstance(out.index, pd.MultiIndex)
+        ):
+            new_name = row_sep.join(out.index.names)
+            out.index = out.index.map(lambda lvls: row_sep.join(map(str, lvls)))
+            out.index.name = new_name
         getattr(out, f"to_{output_format}")(filename, **kwargs)
 
 

From 951a694319bb6fae6eb3f68fe249ff355ca34e4c Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 28 Sep 2023 15:09:28 -0400
Subject: [PATCH 06/12] keep order

---
 xscen/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xscen/io.py b/xscen/io.py
index dcf163e2..d4c007ea 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -589,7 +589,7 @@ def _ensure_list(seq):
             return [seq]
         return list(seq)
 
-    row = _ensure_list(row or (set(da.dims) - {"variable"}))
+    row = _ensure_list(row or (list(da.dims) - {"variable"}))
     column = _ensure_list(column or (["variable"] if len(ds) > 1 else []))
     sheet = _ensure_list(sheet or [])
 

From c0df03c20d5264a0450540ac7a508e92afd9ca1e Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 28 Sep 2023 15:26:44 -0400
Subject: [PATCH 07/12] coding while in a meeting is impolite

---
 xscen/io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xscen/io.py b/xscen/io.py
index d4c007ea..c090c374 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -589,7 +589,7 @@ def _ensure_list(seq):
             return [seq]
         return list(seq)
 
-    row = _ensure_list(row or (list(da.dims) - {"variable"}))
+    row = _ensure_list(row or ([d for d in da.dims if d != "variable"]))
     column = _ensure_list(column or (["variable"] if len(ds) > 1 else []))
     sheet = _ensure_list(sheet or [])
 

From 99df3e2e080a2a45801cd9f9d7db3b8160dbc709 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Thu, 28 Sep 2023 15:35:24 -0400
Subject: [PATCH 08/12] fix test

---
 tests/test_io.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tests/test_io.py b/tests/test_io.py
index 5b7f3d1a..c8083246 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -100,9 +100,12 @@ def test_normal(self):
         assert tab.columns.names == ["variable"]
         assert tab.index.names == ["season", "time", "site"]
         # Season order is chronological, rather than alphabetical
-        assert tab.xs("1993", level="time").xs(
-            "a", level="site"
-        ).index.get_level_values("season") == ["JFM", "AMJ", "JAS", "OND"]
+        np.testing.assert_array_equal(
+            tab.xs("1993", level="time")
+            .xs("a", level="site")
+            .index.get_level_values("season"),
+            ["JFM", "AMJ", "JAS", "OND"],
+        )
 
         # Variable in the index, thus no coords
         tab = xs.io.to_table(

From 9dbb8af7caae13dcc5cbedaf8d67fef20b0a19ed Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 29 Sep 2023 11:56:25 -0400
Subject: [PATCH 09/12] Add simple toc generation

---
 xscen/data/fr/LC_MESSAGES/xscen.po | 26 +++++++++++---
 xscen/io.py                        | 58 +++++++++++++++++++++++++++---
 2 files changed, 75 insertions(+), 9 deletions(-)

diff --git a/xscen/data/fr/LC_MESSAGES/xscen.po b/xscen/data/fr/LC_MESSAGES/xscen.po
index 34f5a8cd..e7d76e0a 100644
--- a/xscen/data/fr/LC_MESSAGES/xscen.po
+++ b/xscen/data/fr/LC_MESSAGES/xscen.po
@@ -7,7 +7,7 @@ msgid ""
 msgstr ""
 "Project-Id-Version: xscen 0.6.18b0\n"
 "Report-Msgid-Bugs-To: Rondeau-Genesse.Gabriel@ouranos.ca\n"
-"POT-Creation-Date: 2023-09-08 15:17-0400\n"
+"POT-Creation-Date: 2023-09-29 11:45-0400\n"
 "PO-Revision-Date: 2023-08-15 16:48-0400\n"
 "Last-Translator: Pascal Bourgault <bourgault.pascal@ouranos.ca>\n"
 "Language: fr\n"
@@ -18,18 +18,34 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\n"
 "Generated-By: Babel 2.12.1\n"
 
-#: xscen/aggregate.py:184
+#: xscen/aggregate.py:185
 msgid "{window}-year mean of {attr}."
 msgstr "Moyenne {window} ans de {attr}."
 
-#: xscen/aggregate.py:318
+#: xscen/aggregate.py:319
 msgid "{attr1}: {kind} delta compared to {refhoriz}."
 msgstr "{attr1}: Delta {kind} comparé à {refhoriz}."
 
-#: xscen/diagnostics.py:500
+#: xscen/diagnostics.py:501
 msgid "Ranking of measure performance"
 msgstr "Classement de performance de la mesure"
 
-#: xscen/diagnostics.py:559
+#: xscen/diagnostics.py:560
 msgid "Fraction of improved grid cells"
 msgstr "Fraction de points de grille améliorés"
+
+#: xscen/io.py:650
+msgid "Variable"
+msgstr "Variable"
+
+#: xscen/io.py:650
+msgid "Description"
+msgstr "Description"
+
+#: xscen/io.py:650
+msgid "Units"
+msgstr "Unités"
+
+#: xscen/io.py:654
+msgid "Content"
+msgstr "Contenu"
diff --git a/xscen/io.py b/xscen/io.py
index c090c374..8ae86f9b 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -14,10 +14,12 @@
 import zarr
 from rechunker import rechunk as _rechunk
 from xclim.core.calendar import get_calendar
+from xclim.core.options import METADATA_LOCALES
+from xclim.core.options import OPTIONS as XC_OPTIONS
 
 from .config import parse_config
 from .scripting import TimeoutException
-from .utils import season_sort_key, translate_time_chunk
+from .utils import TRANSLATOR, season_sort_key, translate_time_chunk
 
 logger = logging.getLogger(__name__)
 
@@ -26,6 +28,7 @@
     "clean_incomplete",
     "estimate_chunks",
     "get_engine",
+    "make_toc",
     "rechunk",
     "rechunk_for_saving",
     "save_to_table",
@@ -633,6 +636,36 @@ def _ensure_list(seq):
     return _to_dataframe(da, **table_kwargs)
 
 
+def make_toc(ds: Union[xr.Dataset, xr.DataArray], loc: str = None) -> pd.DataFrame:
+    """Make a table of content describing a dataset's variables.
+
+    This return a simple DataFrame with variable names as index, the long_name as "description" and units.
+    Column names and long names are taken from the activated locale if found, otherwise the english version is taken.
+    """
+    if loc is None:
+        loc = (XC_OPTIONS[METADATA_LOCALES] or ["en"])[0]
+    locsuf = "" if loc == "en" else f"_{loc}"
+    _ = TRANSLATOR[loc]  # Combine translation and gettext parsing (like it usually is)
+
+    if isinstance(ds, xr.DataArray):
+        ds = ds.to_dataset()
+
+    toc = pd.DataFrame.from_records(
+        [
+            {
+                _("Variable"): vv,
+                _("Description"): da.attrs.get(
+                    f"long_name{locsuf}", da.attrs.get("long_name")
+                ),
+                _("Units"): da.attrs.get("units"),
+            }
+            for vv, da in ds.data_vars.items()
+        ],
+    ).set_index(_("Variable"))
+    toc.attrs["name"] = _("Content")
+    return toc
+
+
 TABLE_FORMATS = {".csv": "csv", ".xls": "excel", ".xlsx": "excel"}
 
 
@@ -647,6 +680,7 @@ def save_to_table(
     coords: Union[bool, Sequence[str]] = True,
     col_sep: str = "_",
     row_sep: str = None,
+    add_toc: Union[bool, pd.DataFrame] = False,
     **kwargs,
 ):
     """Save the dataset to a tabular file (csv, excel, ...).
@@ -682,8 +716,13 @@ def save_to_table(
     row_sep : str, optional
       Multi-index names are concatenated with this separator, except in excel.
       If None (default), each level is written in its own column.
+    add_toc : bool or DataFrame
+      A table of content to add as the first sheet. Only valid if the output format is excel.
+      If True, :py:func:`make_toc` is used to generate the toc.
+      The sheet name of the toc can be given through the "name" attribute of the DataFrame, otherwise "Content" is used.
     kwargs:
-      Other arguments passed to the panda function.
+      Other arguments passed to the pandas function.
+      If the output format is excel and multiple sheets are requested, "engine" will be passed to :py:class:`pandas.ExcelWriter`.
     """
     filename = Path(filename)
 
@@ -698,11 +737,22 @@ def save_to_table(
         raise ValueError(
             f"Argument `sheet` is only valid with excel as the output format. Got {output_format}."
         )
+    if add_toc is not False and output_format != "excel":
+        raise ValueError(
+            f"A TOC was requested, but the output format is not Excel. Got {output_format}."
+        )
 
     out = to_table(ds, row=row, column=column, sheet=sheet, coords=coords)
 
-    if sheet:
-        with pd.ExcelWriter(filename, engine=kwargs.get("engine")) as writer:
+    if add_toc is not False:
+        if not sheet:
+            out = {("data",): out}
+        if add_toc is True:
+            add_toc = make_toc(ds)
+        out = {(add_toc.attrs.get("name", "Content"),): add_toc, **out}
+
+    if sheet or (add_toc is not False):
+        with pd.ExcelWriter(filename, engine=kwargs.pop("engine", None)) as writer:
             for sheet_name, df in out.items():
                 df.to_excel(writer, sheet_name=col_sep.join(sheet_name), **kwargs)
     else:

From 30254590e5a51ac49e8a79d8f4698bcb88ede847 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 29 Sep 2023 12:24:50 -0400
Subject: [PATCH 10/12] Better dim guessing - get engine kwargs from kwargs

---
 xscen/io.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/xscen/io.py b/xscen/io.py
index 8ae86f9b..5bb876ae 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -3,6 +3,7 @@
 import os
 import shutil as sh
 from collections.abc import Sequence
+from inspect import signature
 from pathlib import Path
 from typing import Optional, Union
 
@@ -592,9 +593,18 @@ def _ensure_list(seq):
             return [seq]
         return list(seq)
 
-    row = _ensure_list(row or ([d for d in da.dims if d != "variable"]))
-    column = _ensure_list(column or (["variable"] if len(ds) > 1 else []))
-    sheet = _ensure_list(sheet or [])
+    passed_dims = set().union(
+        _ensure_list(row or []), _ensure_list(column or []), _ensure_list(sheet or [])
+    )
+    if row is None:
+        row = [d for d in da.dims if d != "variable" and d not in passed_dims]
+    row = _ensure_list(row)
+    if column is None:
+        column = ["variable"] if len(ds) > 1 and "variable" not in passed_dims else []
+    column = _ensure_list(column)
+    if sheet is None:
+        sheet = []
+    sheet = _ensure_list(sheet)
 
     needed_dims = row + column + sheet
     if len(set(needed_dims)) != len(needed_dims):
@@ -722,7 +732,7 @@ def save_to_table(
       The sheet name of the toc can be given through the "name" attribute of the DataFrame, otherwise "Content" is used.
     kwargs:
       Other arguments passed to the pandas function.
-      If the output format is excel and multiple sheets are requested, "engine" will be passed to :py:class:`pandas.ExcelWriter`.
+      If the output format is excel, kwargs to :py:class:`pandas.ExcelWriter` can be given here as well.
     """
     filename = Path(filename)
 
@@ -752,7 +762,12 @@ def save_to_table(
         out = {(add_toc.attrs.get("name", "Content"),): add_toc, **out}
 
     if sheet or (add_toc is not False):
-        with pd.ExcelWriter(filename, engine=kwargs.pop("engine", None)) as writer:
+        engine_kwargs = {}  # Extract engine kwargs
+        for arg in signature(pd.ExcelWriter).parameters:
+            if arg in kwargs:
+                engine_kwargs[arg] = kwargs.pop(arg)
+
+        with pd.ExcelWriter(filename, **engine_kwargs) as writer:
             for sheet_name, df in out.items():
                 df.to_excel(writer, sheet_name=col_sep.join(sheet_name), **kwargs)
     else:

From 4383bc8dfb689b2e580382f51ed7c59a11b69e06 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Fri, 29 Sep 2023 14:15:30 -0400
Subject: [PATCH 11/12] Add save_to_table to top pkg - more details in hist

---
 HISTORY.rst       | 2 +-
 xscen/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 75b1ccef..24b8f090 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -15,7 +15,7 @@ New features and enhancements
 * Added the ability to search for simulations that reach a given warming level. (:pull:`251`).
 * ``xs.spatial_mean`` now accepts the ``region="global"`` keyword to perform a global average (:issue:`94`, :pull:`260`).
 * ``xs.spatial_mean`` with ``method='xESMF'`` will also automatically segmentize polygons (down to a 1° resolution) to ensure a correct average (:pull:`260`).
-* ``xs.io.save_to_table`` and ``xs.io.to_table`` to transform datasets and arrays to DataFrames, but with support for multi-columns and multi-sheets.
+* ``xs.save_to_table`` and ``xs.io.to_table`` to transform datasets and arrays to DataFrames, but with support for multi-columns, multi-sheets and localized table of content generation.
 
 Breaking changes
 ^^^^^^^^^^^^^^^^
diff --git a/xscen/__init__.py b/xscen/__init__.py
index 6d8b718e..002f2a61 100644
--- a/xscen/__init__.py
+++ b/xscen/__init__.py
@@ -36,7 +36,7 @@
     subset_warming_level,
 )
 from .indicators import compute_indicators  # noqa
-from .io import save_to_netcdf, save_to_zarr  # noqa
+from .io import save_to_netcdf, save_to_table, save_to_zarr  # noqa
 from .reduce import build_reduction_data, reduce_ensemble
 from .regrid import *
 from .scripting import (

From db8786c513eccedde7251799d715d7f3d2223a69 Mon Sep 17 00:00:00 2001
From: Pascal Bourgault <bourgault.pascal@ouranos.ca>
Date: Mon, 2 Oct 2023 11:35:40 -0400
Subject: [PATCH 12/12] All single string in `coords`

---
 xscen/io.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/xscen/io.py b/xscen/io.py
index 5bb876ae..87235767 100644
--- a/xscen/io.py
+++ b/xscen/io.py
@@ -552,7 +552,7 @@ def to_table(
     row: Union[None, str, Sequence[str]] = None,
     column: Union[None, str, Sequence[str]] = None,
     sheet: Union[None, str, Sequence[str]] = None,
-    coords: Union[bool, Sequence[str]] = True,
+    coords: Union[bool, str, Sequence[str]] = True,
 ) -> Union[pd.DataFrame, dict]:
     """Convert a dataset to a pandas DataFrame with support for multicolumns and multisheet.
 
@@ -572,7 +572,7 @@ def to_table(
       Default is "variable", i.e. the name of the variable(s).
     sheet : str or sequence of str, optional
       Name of the dimension(s) to use as sheet names.
-    coords: bool or sequence of str
+    coords: bool or str or sequence of str
       A list of auxiliary coordinates to add to the columns (as would variables).
       If True, all (if any) are added.
 
@@ -617,8 +617,8 @@ def _ensure_list(seq):
             f"Passed row, column and sheet do not match available dimensions. Got {needed_dims}, data has {da.dims}."
         )
 
-    coords = coords or []
     if coords is not True:
+        coords = _ensure_list(coords or [])
         drop = set(ds.coords.keys()) - set(da.dims) - set(coords)
         da = da.drop_vars(drop)
     else: