From 7deee5707e437bcde9dd622374834f84565dd8f0 Mon Sep 17 00:00:00 2001
From: DHRUVA KUMAR KAUSHAL <sanjay@MacBook-Air.local>
Date: Mon, 1 Dec 2025 21:07:44 +0530
Subject: [PATCH 1/5] mypy fixes

---
 xarray/core/dataarray.py       | 12 ++++++++++--
 xarray/core/dataset.py         | 23 +++++++++++++++++++----
 xarray/tests/test_dataarray.py | 26 ++++++++++++++++++++++++++
 xarray/tests/test_dataset.py   | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 6c8d0617038..f8a42c53faf 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3954,7 +3954,10 @@ def to_pandas(self) -> Self | pd.Series | pd.DataFrame:
         return pandas_object
 
     def to_dataframe(
-        self, name: Hashable | None = None, dim_order: Sequence[Hashable] | None = None
+        self,
+        name: Hashable | None = None,
+        dim_order: Sequence[Hashable] | None = None,
+        create_index: bool = True,
     ) -> pd.DataFrame:
         """Convert this array and its coordinates into a tidy pandas.DataFrame.
 
@@ -3979,6 +3982,11 @@ def to_dataframe(
 
             If provided, must include all dimensions of this DataArray. By default,
             dimensions are sorted according to the DataArray dimensions order.
+        create_index : bool, default: True
+            If True (default), create a MultiIndex from the Cartesian product
+            of this DataArray's indices. If False, use a RangeIndex instead.
+            This can be useful to avoid the potentially expensive MultiIndex
+            creation.
 
         Returns
         -------
@@ -4013,7 +4021,7 @@ def to_dataframe(
         else:
             ordered_dims = ds._normalize_dim_order(dim_order=dim_order)
 
-        df = ds._to_dataframe(ordered_dims)
+        df = ds._to_dataframe(ordered_dims, create_index=create_index)
         df.columns = [name if c == unique_name else c for c in df.columns]
         return df
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 9c2c2f60db1..84d5a26ba8d 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -7200,7 +7200,7 @@ def to_pandas(self) -> pd.Series | pd.DataFrame:
             "Please use Dataset.to_dataframe() instead."
         )
 
-    def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
+    def _to_dataframe(self, ordered_dims: Mapping[Any, int], create_index: bool = True):
         from xarray.core.extension_array import PandasExtensionArray
 
         # All and only non-index arrays (whether data or coordinates) should
@@ -7231,7 +7231,13 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
             self._variables[k].set_dims(ordered_dims).values.reshape(-1)
             for k in non_extension_array_columns
         ]
-        index = self.coords.to_index([*ordered_dims])
+        if create_index:
+            index = self.coords.to_index([*ordered_dims])
+        else:
+            # Use a simple RangeIndex when create_index=False
+            # Calculate the total size from ordered_dims
+            total_size = np.prod(list(ordered_dims.values())) if ordered_dims else 0
+            index = pd.RangeIndex(total_size)
         broadcasted_df = pd.DataFrame(
             {
                 **dict(zip(non_extension_array_columns, data, strict=True)),
@@ -7259,7 +7265,11 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int]):
             broadcasted_df = broadcasted_df.join(extension_array_df)
         return broadcasted_df[columns_in_order]
 
-    def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFrame:
+    def to_dataframe(
+        self,
+        dim_order: Sequence[Hashable] | None = None,
+        create_index: bool = True,
+    ) -> pd.DataFrame:
         """Convert this dataset into a pandas.DataFrame.
 
         Non-index variables in this dataset form the columns of the
@@ -7278,6 +7288,11 @@ def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFr
 
             If provided, must include all dimensions of this dataset. By
             default, dimensions are in the same order as in `Dataset.sizes`.
+        create_index : bool, default: True
+            If True (default), create a MultiIndex from the Cartesian product
+            of this dataset's indices. If False, use a RangeIndex instead.
+            This can be useful to avoid the potentially expensive MultiIndex
+            creation.
 
         Returns
         -------
@@ -7288,7 +7303,7 @@ def to_dataframe(self, dim_order: Sequence[Hashable] | None = None) -> pd.DataFr
 
         ordered_dims = self._normalize_dim_order(dim_order=dim_order)
 
-        return self._to_dataframe(ordered_dims=ordered_dims)
+        return self._to_dataframe(ordered_dims=ordered_dims, create_index=create_index)
 
     def _set_sparse_data_from_dataframe(
         self, idx: pd.Index, arrays: list[tuple[Hashable, np.ndarray]], dims: tuple
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 5eec7b8a2fd..2cf75fb58e2 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3553,6 +3553,32 @@ def test_to_dataframe_0length(self) -> None:
         assert len(actual) == 0
         assert_array_equal(actual.index.names, list("ABC"))
 
+    def test_to_dataframe_create_index(self) -> None:
+        # Test create_index parameter
+        arr_np = np.arange(12).reshape(3, 4)
+        arr = DataArray(arr_np, [("x", [1, 2, 3]), ("y", list("abcd"))], name="foo")
+
+        # Default behavior: create MultiIndex
+        df_with_index = arr.to_dataframe()
+        assert isinstance(df_with_index.index, pd.MultiIndex)
+        assert df_with_index.index.names == ["x", "y"]
+        assert len(df_with_index) == 12
+
+        # With create_index=False: use RangeIndex
+        df_without_index = arr.to_dataframe(create_index=False)
+        assert isinstance(df_without_index.index, pd.RangeIndex)
+        assert len(df_without_index) == 12
+
+        # Data should be the same regardless
+        assert_array_equal(df_with_index["foo"].values, df_without_index["foo"].values)
+
+        # Test with coords that have different dimensions
+        arr.coords["z"] = ("x", [-1, -2, -3])
+        df_with_coords = arr.to_dataframe(create_index=False)
+        assert isinstance(df_with_coords.index, pd.RangeIndex)
+        assert "z" in df_with_coords.columns
+        assert len(df_with_coords) == 12
+
     @pytest.mark.parametrize(
         "x_dtype,y_dtype,v_dtype",
         [
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index e677430dfbf..e30fa28bbbb 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -2259,6 +2259,38 @@ def test_to_pandas(self) -> None:
         with pytest.raises(ValueError, match=r"cannot convert Datasets"):
             Dataset({"a": (["t", "r"], x2d), "b": (["t", "r"], y2d)}).to_pandas()
 
+    def test_to_dataframe_create_index(self) -> None:
+        # Test create_index parameter for Dataset
+        x = np.random.randn(3, 4)
+        y = np.random.randn(3, 4)
+        ds = Dataset(
+            {"a": (("x", "y"), x), "b": (("x", "y"), y)},
+            coords={"x": [1, 2, 3], "y": list("abcd")},
+        )
+
+        # Default behavior: create MultiIndex
+        df_with_index = ds.to_dataframe()
+        assert isinstance(df_with_index.index, pd.MultiIndex)
+        assert df_with_index.index.names == ["x", "y"]
+        assert len(df_with_index) == 12
+
+        # With create_index=False: use RangeIndex
+        df_without_index = ds.to_dataframe(create_index=False)
+        assert isinstance(df_without_index.index, pd.RangeIndex)
+        assert len(df_without_index) == 12
+
+        # Data should be the same regardless
+        assert_array_equal(df_with_index["a"].values, df_without_index["a"].values)
+        assert_array_equal(df_with_index["b"].values, df_without_index["b"].values)
+
+        # Test with dim_order and create_index=False
+        df_reordered = ds.to_dataframe(dim_order=["y", "x"], create_index=False)
+        assert isinstance(df_reordered.index, pd.RangeIndex)
+        assert len(df_reordered) == 12
+        # Check that dim_order affects the data ordering
+        df_reordered_with_idx = ds.to_dataframe(dim_order=["y", "x"])
+        assert_array_equal(df_reordered["a"].values, df_reordered_with_idx["a"].values)
+
     def test_reindex_like(self) -> None:
         data = create_test_data()
         data["letters"] = ("dim3", 10 * ["a"])

From 139a19b73dc8720fb75128c577a1989167b99071 Mon Sep 17 00:00:00 2001
From: DHRUVA KUMAR KAUSHAL <sanjay@MacBook-Air.local>
Date: Thu, 4 Dec 2025 03:29:44 +0530
Subject: [PATCH 2/5] ruff

---
 xarray/core/dataset.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 5ba895d4bb0..3c02ef293f5 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -7236,7 +7236,9 @@ def _to_dataframe(self, ordered_dims: Mapping[Any, int], create_index: bool = Tr
         else:
             # Use a simple RangeIndex when create_index=False
             # Calculate the total size from ordered_dims
-            total_size = np.prod(list(ordered_dims.values())) if ordered_dims else 0
+            total_size = (
+                int(np.prod(list(ordered_dims.values()))) if ordered_dims else 0
+            )
             index = pd.RangeIndex(total_size)
         broadcasted_df = pd.DataFrame(
             {

From 843cf7d1311ea4928a54cb5df945386a22c447bc Mon Sep 17 00:00:00 2001
From: DHRUVA KUMAR KAUSHAL <sanjay@MacBook-Air.local>
Date: Sun, 28 Dec 2025 00:25:00 +0530
Subject: [PATCH 3/5] minor fixes

---
 doc/whats-new.rst              |  7 ++++
 xarray/core/dataarray.py       | 14 ++++++--
 xarray/core/dataset.py         | 31 +++++++++++++----
 xarray/tests/test_dask.py      | 63 ++++++++++++++++++++++++++++++++++
 xarray/tests/test_dataarray.py | 54 +++++++++++++++++++++++++++++
 5 files changed, 160 insertions(+), 9 deletions(-)

diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index 23af750060c..1f97508e1b2 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -23,6 +23,13 @@ New Features
   (:pull:`10849`).
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 
+- Added ``create_index`` parameter to :py:meth:`Dataset.to_dataframe`, :py:meth:`DataArray.to_dataframe`,
+  :py:meth:`Dataset.to_dask_dataframe`, and :py:meth:`DataArray.to_dask_dataframe` methods.
+  When ``create_index=False``, the resulting DataFrame will use a :py:class:`pandas.RangeIndex`
+  instead of setting dimension coordinates as the index, which can significantly improve performance
+  when the default multi-index is not needed.
+  By `Sanjay Kumar <https://github.com/sanjay>`_.
+
 Breaking Changes
 ~~~~~~~~~~~~~~~~
 
diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index c26a3cdf176..132394fa9ad 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -3983,8 +3983,8 @@ def to_dataframe(
             If provided, must include all dimensions of this DataArray. By default,
             dimensions are sorted according to the DataArray dimensions order.
         create_index : bool, default: True
-            If True (default), create a MultiIndex from the Cartesian product
-            of this DataArray's indices. If False, use a RangeIndex instead.
+            If True (default), create a :py:class:`pandas.MultiIndex` from the Cartesian product
+            of this DataArray's indices. If False, use a :py:class:`pandas.RangeIndex` instead.
             This can be useful to avoid the potentially expensive MultiIndex
             creation.
 
@@ -7587,6 +7587,7 @@ def to_dask_dataframe(
         self,
         dim_order: Sequence[Hashable] | None = None,
         set_index: bool = False,
+        create_index: bool = True,
     ) -> DaskDataFrame:
         """Convert this array into a dask.dataframe.DataFrame.
 
@@ -7602,6 +7603,13 @@ def to_dask_dataframe(
             If set_index=True, the dask DataFrame is indexed by this dataset's
             coordinate. Since dask DataFrames do not support multi-indexes,
             set_index only works if the dataset only contains one dimension.
+        create_index : bool, default: True
+            If ``create_index=False``, the resulting DataFrame will use a
+            :py:class:`pandas.RangeIndex` instead of setting dimensions as index columns.
+            This can significantly improve performance when the default index is not needed.
+            ``create_index=False`` is incompatible with ``set_index=True``.
+
+            .. versionadded:: 2025.01.1
 
         Returns
         -------
@@ -7646,7 +7654,7 @@ def to_dask_dataframe(
             )
         name = self.name
         ds = self._to_dataset_whole(name, shallow_copy=False)
-        return ds.to_dask_dataframe(dim_order, set_index)
+        return ds.to_dask_dataframe(dim_order, set_index, create_index)
 
     # this needs to be at the end, or mypy will confuse with `str`
     # https://mypy.readthedocs.io/en/latest/common_issues.html#dealing-with-conflicting-names
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 3c02ef293f5..9fe6b50fb52 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -7291,8 +7291,8 @@ def to_dataframe(
             If provided, must include all dimensions of this dataset. By
             default, dimensions are in the same order as in `Dataset.sizes`.
         create_index : bool, default: True
-            If True (default), create a MultiIndex from the Cartesian product
-            of this dataset's indices. If False, use a RangeIndex instead.
+            If True (default), create a :py:class:`pandas.MultiIndex` from the Cartesian product
+            of this dataset's indices. If False, use a :py:class:`pandas.RangeIndex` instead.
             This can be useful to avoid the potentially expensive MultiIndex
             creation.
 
@@ -7463,7 +7463,10 @@ def from_dataframe(cls, dataframe: pd.DataFrame, sparse: bool = False) -> Self:
         return obj[dataframe.columns] if len(dataframe.columns) else obj
 
     def to_dask_dataframe(
-        self, dim_order: Sequence[Hashable] | None = None, set_index: bool = False
+        self,
+        dim_order: Sequence[Hashable] | None = None,
+        set_index: bool = False,
+        create_index: bool = True,
     ) -> DaskDataFrame:
         """
         Convert this dataset into a dask.dataframe.DataFrame.
@@ -7487,6 +7490,13 @@ def to_dask_dataframe(
             If set_index=True, the dask DataFrame is indexed by this dataset's
             coordinate. Since dask DataFrames do not support multi-indexes,
             set_index only works if the dataset only contains one dimension.
+        create_index : bool, default: True
+            If ``create_index=False``, the resulting DataFrame will use a
+            :py:class:`pandas.RangeIndex` instead of setting dimensions as index columns.
+            This can significantly improve performance when the default index is not needed.
+            ``create_index=False`` is incompatible with ``set_index=True``.
+
+            .. versionadded:: 2025.01.1
 
         Returns
         -------
@@ -7496,11 +7506,20 @@ def to_dask_dataframe(
         import dask.array as da
         import dask.dataframe as dd
 
+        if not create_index and set_index:
+            raise ValueError("create_index=False is incompatible with set_index=True")
+
         ordered_dims = self._normalize_dim_order(dim_order=dim_order)
 
-        columns = list(ordered_dims)
-        columns.extend(k for k in self.coords if k not in self.dims)
-        columns.extend(self.data_vars)
+        if create_index:
+            columns = list(ordered_dims)
+            columns.extend(k for k in self.coords if k not in self.dims)
+            columns.extend(self.data_vars)
+        else:
+            # When create_index=False, exclude dimensions from columns
+            columns = []
+            columns.extend(k for k in self.coords if k not in self.dims)
+            columns.extend(self.data_vars)
 
         ds_chunks = self.chunks
 
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index 2d103994410..d55d5fa5143 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -941,6 +941,69 @@ def test_to_dask_dataframe_dim_order(self):
         with pytest.raises(ValueError, match=r"does not match the set of dimensions"):
             ds.to_dask_dataframe(dim_order=["x"])
 
+    def test_to_dask_dataframe_create_index_false(self):
+        # Test that create_index=False uses RangeIndex instead of dimension columns
+        x = np.random.randn(10)
+        y = np.arange(10, dtype="uint8")
+        t = list("abcdefghij")
+
+        ds = Dataset(
+            {"a": ("t", da.from_array(x, chunks=4)), "b": ("t", y), "t": ("t", t)}
+        )
+
+        # With create_index=False, we should get a RangeIndex and no dimension columns
+        actual = ds.to_dask_dataframe(create_index=False)
+        assert isinstance(actual, dd.DataFrame)
+        actual_computed = actual.compute()
+
+        # Check that index is RangeIndex
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+
+        # Check that dimension columns are not present
+        assert "t" not in actual_computed.columns
+
+        # Check that data columns are present
+        assert "a" in actual_computed.columns
+        assert "b" in actual_computed.columns
+
+        # Verify values are correct
+        assert_array_equal(actual_computed["a"].values, x)
+        assert_array_equal(actual_computed["b"].values, y)
+
+    def test_to_dask_dataframe_create_index_incompatible_with_set_index(self):
+        # Test that create_index=False and set_index=True raises an error
+        ds = Dataset({"a": ("t", da.from_array([1, 2, 3], chunks=2))})
+
+        with pytest.raises(
+            ValueError,
+            match="create_index=False is incompatible with set_index=True",
+        ):
+            ds.to_dask_dataframe(create_index=False, set_index=True)
+
+    def test_to_dask_dataframe_create_index_2D(self):
+        # Test create_index=False with 2D dataset
+        w = np.random.randn(2, 3)
+        ds = Dataset({"w": (("x", "y"), da.from_array(w, chunks=(1, 2)))})
+        ds["x"] = ("x", np.array([0, 1], np.int64))
+        ds["y"] = ("y", list("abc"))
+
+        actual = ds.to_dask_dataframe(create_index=False)
+        assert isinstance(actual, dd.DataFrame)
+        actual_computed = actual.compute()
+
+        # Check that index is RangeIndex
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+
+        # Check that dimension columns are not present
+        assert "x" not in actual_computed.columns
+        assert "y" not in actual_computed.columns
+
+        # Check that data column is present
+        assert "w" in actual_computed.columns
+
+        # Verify values are correct (flattened)
+        assert_array_equal(actual_computed["w"].values, w.reshape(-1))
+
 
 @pytest.mark.parametrize("method", ["load", "compute"])
 def test_dask_kwargs_variable(method):
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index 872da934423..bb8568a90df 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3746,6 +3746,60 @@ def test_to_dask_dataframe(self) -> None:
         ):
             arr.to_dask_dataframe()
 
+    def test_to_dask_dataframe_create_index(self) -> None:
+        # Test create_index parameter for to_dask_dataframe
+        arr_np = np.arange(3 * 4).reshape(3, 4)
+        arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo")
+
+        # With create_index=False, should use RangeIndex
+        actual = arr.to_dask_dataframe(create_index=False)
+        actual_computed = actual.compute()
+
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+        assert "B" not in actual_computed.columns
+        assert "A" not in actual_computed.columns
+        assert "foo" in actual_computed.columns
+        assert_array_equal(actual_computed["foo"].values, arr_np.reshape(-1))
+
+        # Test incompatibility with set_index=True
+        with pytest.raises(
+            ValueError,
+            match="create_index=False is incompatible with set_index=True",
+        ):
+            arr.to_dask_dataframe(create_index=False, set_index=True)
+        arr_np = np.arange(3 * 4).reshape(3, 4)
+        arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo")
+        expected_s = arr.to_series()
+        actual = arr.to_dask_dataframe()["foo"]
+
+        assert_array_equal(actual.values, np.asarray(expected_s.values))
+
+        actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"]
+        assert_array_equal(arr_np.transpose().reshape(-1), actual.values)
+
+        # regression test for coords with different dimensions
+
+        arr.coords["C"] = ("B", [-1, -2, -3])
+        expected_df = arr.to_series().to_frame()
+        expected_df["C"] = [-1] * 4 + [-2] * 4 + [-3] * 4
+        expected_df = expected_df[["C", "foo"]]
+        actual = arr.to_dask_dataframe()[["C", "foo"]]
+
+        assert_array_equal(expected_df.values, np.asarray(actual.values))
+        assert_array_equal(
+            expected_df.columns.values, np.asarray(actual.columns.values)
+        )
+
+        with pytest.raises(ValueError, match="does not match the set of dimensions"):
+            arr.to_dask_dataframe(dim_order=["B", "A", "C"])
+
+        arr.name = None
+        with pytest.raises(
+            ValueError,
+            match="Cannot convert an unnamed DataArray",
+        ):
+            arr.to_dask_dataframe()
+
     def test_to_pandas_name_matches_coordinate(self) -> None:
         # coordinate with same name as array
         arr = DataArray([1, 2, 3], dims="x", name="x")

From 4bde825de62e7feabc0c639c488dc6ff7fd6d6c3 Mon Sep 17 00:00:00 2001
From: DHRUVA KUMAR KAUSHAL <sanjay@MacBook-Air.local>
Date: Sun, 28 Dec 2025 00:36:05 +0530
Subject: [PATCH 4/5] tests added

---
 xarray/tests/test_dask.py      | 27 +++++++++++++++++
 xarray/tests/test_dataarray.py | 54 ----------------------------------
 2 files changed, 27 insertions(+), 54 deletions(-)

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index d55d5fa5143..f3018e0eb66 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -1004,6 +1004,33 @@ def test_to_dask_dataframe_create_index_2D(self):
         # Verify values are correct (flattened)
         assert_array_equal(actual_computed["w"].values, w.reshape(-1))
 
+    def test_to_dask_dataframe_create_index_dataarray(self):
+        # Test create_index parameter for DataArray.to_dask_dataframe
+        arr_np = np.arange(3 * 4).reshape(3, 4)
+        arr = DataArray(
+            da.from_array(arr_np, chunks=(2, 2)),
+            [("B", [1, 2, 3]), ("A", list("cdef"))],
+            name="foo",
+        )
+
+        # With create_index=False, should use RangeIndex
+        actual = arr.to_dask_dataframe(create_index=False)
+        assert isinstance(actual, dd.DataFrame)
+        actual_computed = actual.compute()
+
+        assert isinstance(actual_computed.index, pd.RangeIndex)
+        assert "B" not in actual_computed.columns
+        assert "A" not in actual_computed.columns
+        assert "foo" in actual_computed.columns
+        assert_array_equal(actual_computed["foo"].values, arr_np.reshape(-1))
+
+        # Test incompatibility with set_index=True
+        with pytest.raises(
+            ValueError,
+            match="create_index=False is incompatible with set_index=True",
+        ):
+            arr.to_dask_dataframe(create_index=False, set_index=True)
+
 
 @pytest.mark.parametrize("method", ["load", "compute"])
 def test_dask_kwargs_variable(method):
diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py
index bb8568a90df..872da934423 100644
--- a/xarray/tests/test_dataarray.py
+++ b/xarray/tests/test_dataarray.py
@@ -3746,60 +3746,6 @@ def test_to_dask_dataframe(self) -> None:
         ):
             arr.to_dask_dataframe()
 
-    def test_to_dask_dataframe_create_index(self) -> None:
-        # Test create_index parameter for to_dask_dataframe
-        arr_np = np.arange(3 * 4).reshape(3, 4)
-        arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo")
-
-        # With create_index=False, should use RangeIndex
-        actual = arr.to_dask_dataframe(create_index=False)
-        actual_computed = actual.compute()
-
-        assert isinstance(actual_computed.index, pd.RangeIndex)
-        assert "B" not in actual_computed.columns
-        assert "A" not in actual_computed.columns
-        assert "foo" in actual_computed.columns
-        assert_array_equal(actual_computed["foo"].values, arr_np.reshape(-1))
-
-        # Test incompatibility with set_index=True
-        with pytest.raises(
-            ValueError,
-            match="create_index=False is incompatible with set_index=True",
-        ):
-            arr.to_dask_dataframe(create_index=False, set_index=True)
-        arr_np = np.arange(3 * 4).reshape(3, 4)
-        arr = DataArray(arr_np, [("B", [1, 2, 3]), ("A", list("cdef"))], name="foo")
-        expected_s = arr.to_series()
-        actual = arr.to_dask_dataframe()["foo"]
-
-        assert_array_equal(actual.values, np.asarray(expected_s.values))
-
-        actual = arr.to_dask_dataframe(dim_order=["A", "B"])["foo"]
-        assert_array_equal(arr_np.transpose().reshape(-1), actual.values)
-
-        # regression test for coords with different dimensions
-
-        arr.coords["C"] = ("B", [-1, -2, -3])
-        expected_df = arr.to_series().to_frame()
-        expected_df["C"] = [-1] * 4 + [-2] * 4 + [-3] * 4
-        expected_df = expected_df[["C", "foo"]]
-        actual = arr.to_dask_dataframe()[["C", "foo"]]
-
-        assert_array_equal(expected_df.values, np.asarray(actual.values))
-        assert_array_equal(
-            expected_df.columns.values, np.asarray(actual.columns.values)
-        )
-
-        with pytest.raises(ValueError, match="does not match the set of dimensions"):
-            arr.to_dask_dataframe(dim_order=["B", "A", "C"])
-
-        arr.name = None
-        with pytest.raises(
-            ValueError,
-            match="Cannot convert an unnamed DataArray",
-        ):
-            arr.to_dask_dataframe()
-
     def test_to_pandas_name_matches_coordinate(self) -> None:
         # coordinate with same name as array
         arr = DataArray([1, 2, 3], dims="x", name="x")

From 8bfee0db268ddb85cc70df3084e6418ecda1ded3 Mon Sep 17 00:00:00 2001
From: DHRUVA KUMAR KAUSHAL <sanjay@MacBook-Air.local>
Date: Mon, 29 Dec 2025 11:00:46 +0530
Subject: [PATCH 5/5] dimensions & index resolved

---
 xarray/core/dataarray.py  |  9 +++++----
 xarray/core/dataset.py    | 21 ++++++++++++---------
 xarray/tests/test_dask.py |  6 +++---
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py
index 132394fa9ad..cfe8e6f6de8 100644
--- a/xarray/core/dataarray.py
+++ b/xarray/core/dataarray.py
@@ -7604,10 +7604,11 @@ def to_dask_dataframe(
             coordinate. Since dask DataFrames do not support multi-indexes,
             set_index only works if the dataset only contains one dimension.
         create_index : bool, default: True
-            If ``create_index=False``, the resulting DataFrame will use a
-            :py:class:`pandas.RangeIndex` instead of setting dimensions as index columns.
-            This can significantly improve performance when the default index is not needed.
-            ``create_index=False`` is incompatible with ``set_index=True``.
+            If ``create_index=True`` (default), dimension coordinates will be included
+            as columns in the resulting DataFrame. If ``create_index=False``, dimension
+            coordinates will be excluded, leaving only data variables and non-dimension
+            coordinates. This can improve performance and reduce memory usage when dimension
+            information is not needed. ``create_index=False`` is incompatible with ``set_index=True``.
 
             .. versionadded:: 2025.01.1
 
diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py
index 9fe6b50fb52..0a15cf84ee5 100644
--- a/xarray/core/dataset.py
+++ b/xarray/core/dataset.py
@@ -7491,10 +7491,11 @@ def to_dask_dataframe(
             coordinate. Since dask DataFrames do not support multi-indexes,
             set_index only works if the dataset only contains one dimension.
         create_index : bool, default: True
-            If ``create_index=False``, the resulting DataFrame will use a
-            :py:class:`pandas.RangeIndex` instead of setting dimensions as index columns.
-            This can significantly improve performance when the default index is not needed.
-            ``create_index=False`` is incompatible with ``set_index=True``.
+            If ``create_index=True`` (default), dimension coordinates will be included
+            as columns in the resulting DataFrame. If ``create_index=False``, dimension
+            coordinates will be excluded, leaving only data variables and non-dimension
+            coordinates. This can improve performance and reduce memory usage when dimension
+            information is not needed. ``create_index=False`` is incompatible with ``set_index=True``.
 
             .. versionadded:: 2025.01.1
 
@@ -7511,15 +7512,17 @@ def to_dask_dataframe(
 
         ordered_dims = self._normalize_dim_order(dim_order=dim_order)
 
+        # Build column list based on create_index
         if create_index:
+            # Include dimension coordinates as columns
             columns = list(ordered_dims)
-            columns.extend(k for k in self.coords if k not in self.dims)
-            columns.extend(self.data_vars)
         else:
-            # When create_index=False, exclude dimensions from columns
+            # Exclude dimension coordinates
             columns = []
-            columns.extend(k for k in self.coords if k not in self.dims)
-            columns.extend(self.data_vars)
+
+        # Always include non-dimension coordinates and data variables
+        columns.extend(k for k in self.coords if k not in self.dims)
+        columns.extend(self.data_vars)
 
         ds_chunks = self.chunks
 
diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
index f3018e0eb66..21d6f8249aa 100644
--- a/xarray/tests/test_dask.py
+++ b/xarray/tests/test_dask.py
@@ -942,7 +942,7 @@ def test_to_dask_dataframe_dim_order(self):
             ds.to_dask_dataframe(dim_order=["x"])
 
     def test_to_dask_dataframe_create_index_false(self):
-        # Test that create_index=False uses RangeIndex instead of dimension columns
+        # Test that create_index=False excludes dimension columns
         x = np.random.randn(10)
         y = np.arange(10, dtype="uint8")
         t = list("abcdefghij")
@@ -951,7 +951,7 @@ def test_to_dask_dataframe_create_index_false(self):
             {"a": ("t", da.from_array(x, chunks=4)), "b": ("t", y), "t": ("t", t)}
         )
 
-        # With create_index=False, we should get a RangeIndex and no dimension columns
+        # With create_index=False, dimension columns should be excluded
         actual = ds.to_dask_dataframe(create_index=False)
         assert isinstance(actual, dd.DataFrame)
         actual_computed = actual.compute()
@@ -959,7 +959,7 @@ def test_to_dask_dataframe_create_index_false(self):
         # Check that index is RangeIndex
         assert isinstance(actual_computed.index, pd.RangeIndex)
 
-        # Check that dimension columns are not present
+        # Check that dimension columns are NOT present
         assert "t" not in actual_computed.columns
 
         # Check that data columns are present