From 065c9a0422e2c940bfad67416c259df70613dabb Mon Sep 17 00:00:00 2001 From: farrandi Date: Fri, 26 Jan 2024 17:21:31 -0800 Subject: [PATCH 1/3] remove inplace=True --- src/tidyversetopandas/tidyversetopandas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tidyversetopandas/tidyversetopandas.py b/src/tidyversetopandas/tidyversetopandas.py index 17c5a79..f9ca375 100644 --- a/src/tidyversetopandas/tidyversetopandas.py +++ b/src/tidyversetopandas/tidyversetopandas.py @@ -24,11 +24,11 @@ def mutate(df: pd.DataFrame, expr: str) -> pd.DataFrame: raise ValueError(msg) try: - df.eval(expr, inplace=True) + res = df.eval(expr) except Exception as e: raise ValueError(f"Error evaluating the expression: %s" % e) - return df + return res def select(dataframe, *columns): From f6369528e21ee93a95246dbc781a7be155f2b6c9 Mon Sep 17 00:00:00 2001 From: farrandi Date: Fri, 26 Jan 2024 17:21:47 -0800 Subject: [PATCH 2/3] edit some syntaxes --- docs/example.ipynb | 344 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 310 insertions(+), 34 deletions(-) diff --git a/docs/example.ipynb b/docs/example.ipynb index a5c6b23..c511d9b 100644 --- a/docs/example.ipynb +++ b/docs/example.ipynb @@ -40,20 +40,82 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Initial DataFrame:\n", - " ProductID Sales Region\n", - "0 101 250 East\n", - "1 102 150 West\n", - "2 103 300 East\n", - "3 104 200 South\n" + "Initial DataFrame:\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductIDSalesRegion
0101250East
1102150West
2103300East
3104200South
\n", + "
" + ], + "text/plain": [ + " ProductID Sales Region\n", + "0 101 250 East\n", + "1 102 150 West\n", + "2 103 300 East\n", + "3 104 200 South" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -67,7 +129,7 @@ "\n", "# Display the initial DataFrame\n", "print(\"Initial DataFrame:\")\n", - "print(df)" + "df" ] }, { @@ -83,7 +145,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -91,13 +153,80 @@ "output_type": "stream", "text": [ "\n", - "DataFrame after applying 'mutate':\n", - " ProductID Sales Region VAT\n", - "0 101 250 East 37.5\n", - "1 102 150 West 22.5\n", - "2 103 300 East 45.0\n", - "3 104 200 South 30.0\n" + "DataFrame after applying 'mutate':\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductIDSalesRegionVAT
0101250East37.5
1102150West22.5
2103300East45.0
3104200South30.0
\n", + "
" + ], + "text/plain": [ + " ProductID Sales Region VAT\n", + "0 101 250 East 37.5\n", + "1 102 150 West 22.5\n", + "2 103 300 East 45.0\n", + "3 104 200 South 30.0" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -105,7 +234,7 @@ "df = ttp.mutate(df, \"VAT = Sales * 0.15\")\n", "\n", "print(\"\\nDataFrame after applying 'mutate':\")\n", - "print(df)" + "df" ] }, { @@ -119,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -127,18 +256,71 @@ "output_type": "stream", "text": [ "\n", - "DataFrame after applying 'filter':\n", - " ProductID Sales Region VAT\n", - "0 101 250 East 37.5\n", - "2 103 300 East 45.0\n" + "DataFrame after applying 'filter':\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductIDSalesRegionVAT
0101250East37.5
2103300East45.0
\n", + "
" + ], + "text/plain": [ + " ProductID Sales Region VAT\n", + "0 101 250 East 37.5\n", + "2 103 300 East 45.0" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# Filtering rows where sales are greater than 200\n", "df = ttp.filter(df, \"Sales > 200\")\n", "print(\"\\nDataFrame after applying 'filter':\")\n", - "print(df)" + "df" ] }, { @@ -152,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -160,18 +342,65 @@ "output_type": "stream", "text": [ "\n", - "DataFrame after applying 'select':\n", - " ProductID VAT\n", - "0 101 37.5\n", - "2 103 45.0\n" + "DataFrame after applying 'select':\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductIDVAT
010137.5
210345.0
\n", + "
" + ], + "text/plain": [ + " ProductID VAT\n", + "0 101 37.5\n", + "2 103 45.0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# Selecting specific columns\n", "df = ttp.select(df, \"ProductID\", \"VAT\")\n", "print(\"\\nDataFrame after applying 'select':\")\n", - "print(df)" + "df" ] }, { @@ -185,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -193,18 +422,65 @@ "output_type": "stream", "text": [ "\n", - "DataFrame after applying 'arrange':\n", - " ProductID VAT\n", - "2 103 45.0\n", - "0 101 37.5\n" + "DataFrame after applying 'arrange':\n" ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
ProductIDVAT
210345.0
010137.5
\n", + "
" + ], + "text/plain": [ + " ProductID VAT\n", + "2 103 45.0\n", + "0 101 37.5" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "# Sorting the DataFrame\n", "df = ttp.arrange(df, False, \"ProductID\")\n", "print(\"\\nDataFrame after applying 'arrange':\")\n", - "print(df)" + "df" ] }, { From d1fada9a21f38385f9c4f8423152c719bd827da4 Mon Sep 17 00:00:00 2001 From: farrandi Date: Fri, 26 Jan 2024 17:22:01 -0800 Subject: [PATCH 3/3] update tutorial --- docs/tutorial.ipynb | 176 +++++++++++++++++--------------------------- 1 file changed, 67 insertions(+), 109 deletions(-) diff --git a/docs/tutorial.ipynb b/docs/tutorial.ipynb index 85dfbd6..6ad169e 100644 --- a/docs/tutorial.ipynb +++ b/docs/tutorial.ipynb @@ -22,7 +22,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -138,7 +138,7 @@ "4 3450.0 female 2007 " ] }, - "execution_count": 19, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -269,8 +269,7 @@ ], "source": [ "print(newPenguins.shape)\n", - "newPenguins = ttp.filter(\n", - " newPenguins, \"species == 'Adelie' & body_mass_g > 3000\")\n", + "newPenguins = ttp.filter(newPenguins, \"species == 'Adelie' & body_mass_g > 3000\")\n", "print(newPenguins.shape)" ] }, @@ -318,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -440,13 +439,13 @@ "4 3450.0 female 2007 3.45 " ] }, - "execution_count": 13, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ttp.mutate(penguins, \"body_mass_kg = body_mass_g / 1000\")\n", + "penguins = ttp.mutate(penguins, \"body_mass_kg = body_mass_g / 1000\")\n", "\n", "penguins.head()" ] @@ -464,7 +463,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -611,15 +610,15 @@ "4 19.3 " ] }, - "execution_count": 14, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ttp.mutate(penguins, \"bill_length_cm = bill_length_mm / 10\")\n", - "ttp.mutate(penguins, \"bill_depth_cm = bill_depth_mm / 10\")\n", - "ttp.mutate(penguins, \"flipper_length_cm = flipper_length_mm / 10\")\n", + "penguins = ttp.mutate(penguins, \"bill_length_cm = bill_length_mm / 10\")\n", + "penguins = ttp.mutate(penguins, \"bill_depth_cm = bill_depth_mm / 10\")\n", + "penguins = ttp.mutate(penguins, \"flipper_length_cm = flipper_length_mm / 10\")\n", "\n", "penguins.head()" ] @@ -653,7 +652,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -714,7 +713,7 @@ "4 Adelie" ] }, - "execution_count": 15, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -744,7 +743,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -823,7 +822,7 @@ "4 Adelie 36.7 19.3 3450.0" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -850,7 +849,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -935,7 +934,7 @@ "4 Adelie 3.67 1.93 19.3 3.45" ] }, - "execution_count": 18, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -973,7 +972,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -997,105 +996,74 @@ " \n", " \n", " \n", - " rowid\n", " species\n", - " island\n", - " bill_length_mm\n", - " bill_depth_mm\n", - " flipper_length_mm\n", - " body_mass_g\n", - " sex\n", - " year\n", + " bill_length_cm\n", + " bill_depth_cm\n", + " flipper_length_cm\n", + " body_mass_kg\n", " \n", " \n", " \n", " \n", " 169\n", - " 170\n", " Gentoo\n", - " Biscoe\n", - " 49.2\n", - " 15.2\n", - " 221.0\n", - " 6300.0\n", - " male\n", - " 2007\n", + " 4.92\n", + " 1.52\n", + " 22.1\n", + " 6.30\n", " \n", " \n", " 185\n", - " 186\n", " Gentoo\n", - " Biscoe\n", - " 59.6\n", - " 17.0\n", - " 230.0\n", - " 6050.0\n", - " male\n", - " 2007\n", + " 5.96\n", + " 1.70\n", + " 23.0\n", + " 6.05\n", " \n", " \n", " 269\n", - " 270\n", " Gentoo\n", - " Biscoe\n", - " 48.8\n", - " 16.2\n", - " 222.0\n", - " 6000.0\n", - " male\n", - " 2009\n", + " 4.88\n", + " 1.62\n", + " 22.2\n", + " 6.00\n", " \n", " \n", " 229\n", - " 230\n", " Gentoo\n", - " Biscoe\n", - " 51.1\n", - " 16.3\n", - " 220.0\n", - " 6000.0\n", - " male\n", - " 2008\n", + " 5.11\n", + " 1.63\n", + " 22.0\n", + " 6.00\n", " \n", " \n", " 263\n", - " 264\n", " Gentoo\n", - " Biscoe\n", - " 49.8\n", - " 15.9\n", - " 229.0\n", - " 5950.0\n", - " male\n", - " 2009\n", + " 4.98\n", + " 1.59\n", + " 22.9\n", + " 5.95\n", " \n", " \n", "\n", "" ], "text/plain": [ - " rowid species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", - "169 170 Gentoo Biscoe 49.2 15.2 221.0 \n", - "185 186 Gentoo Biscoe 59.6 17.0 230.0 \n", - "269 270 Gentoo Biscoe 48.8 16.2 222.0 \n", - "229 230 Gentoo Biscoe 51.1 16.3 220.0 \n", - "263 264 Gentoo Biscoe 49.8 15.9 229.0 \n", - "\n", - " body_mass_g sex year \n", - "169 6300.0 male 2007 \n", - "185 6050.0 male 2007 \n", - "269 6000.0 male 2009 \n", - "229 6000.0 male 2008 \n", - "263 5950.0 male 2009 " + " species bill_length_cm bill_depth_cm flipper_length_cm body_mass_kg\n", + "169 Gentoo 4.92 1.52 22.1 6.30\n", + "185 Gentoo 5.96 1.70 23.0 6.05\n", + "269 Gentoo 4.88 1.62 22.2 6.00\n", + "229 Gentoo 5.11 1.63 22.0 6.00\n", + "263 Gentoo 4.98 1.59 22.9 5.95" ] }, - "execution_count": 21, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "penguins_sorted = ttp.arrange(penguins, False, \"body_mass_g\")\n", + "penguins_sorted = ttp.arrange(penguins, False, \"body_mass_kg\")\n", "penguins_sorted.head()" ] }, @@ -1115,7 +1083,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -1139,50 +1107,38 @@ " \n", " \n", " \n", - " rowid\n", " species\n", - " island\n", - " bill_length_mm\n", - " bill_depth_mm\n", - " flipper_length_mm\n", - " body_mass_g\n", - " sex\n", - " year\n", + " bill_length_cm\n", + " bill_depth_cm\n", + " flipper_length_cm\n", + " body_mass_kg\n", " \n", " \n", " \n", " \n", " 142\n", - " 143\n", " Adelie\n", - " Dream\n", - " 32.1\n", - " 15.5\n", - " 188.0\n", - " 3050.0\n", - " female\n", - " 2009\n", + " 3.21\n", + " 1.55\n", + " 18.8\n", + " 3.05\n", " \n", " \n", "\n", "" ], "text/plain": [ - " rowid species island bill_length_mm bill_depth_mm flipper_length_mm \\\n", - "142 143 Adelie Dream 32.1 15.5 188.0 \n", - "\n", - " body_mass_g sex year \n", - "142 3050.0 female 2009 " + " species bill_length_cm bill_depth_cm flipper_length_cm body_mass_kg\n", + "142 Adelie 3.21 1.55 18.8 3.05" ] }, - "execution_count": 22, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "penguins_small_bill = ttp.arrange(\n", - " penguins, True, \"bill_length_mm\", \"bill_depth_mm\")\n", + "penguins_small_bill = ttp.arrange(penguins, True, \"bill_length_cm\", \"bill_depth_cm\")\n", "penguins_small_bill.head(1)" ] }, @@ -1197,7 +1153,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -1270,12 +1226,14 @@ "263 Gentoo Biscoe 5.95" ] }, - "execution_count": 24, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ + "penguins = pd.read_csv(\"penguins.csv\")\n", + "\n", "penguins_subset2 = (\n", " penguins.pipe(ttp.filter, \"~ sex.isnull()\")\n", " .pipe(ttp.mutate, \"body_mass_kg = body_mass_g / 1000\")\n",