diff --git a/docs/example.ipynb b/docs/example.ipynb
index a5c6b23..c511d9b 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -40,20 +40,82 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Initial DataFrame:\n",
- " ProductID Sales Region\n",
- "0 101 250 East\n",
- "1 102 150 West\n",
- "2 103 300 East\n",
- "3 104 200 South\n"
+ "Initial DataFrame:\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ProductID | \n",
+ " Sales | \n",
+ " Region | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 101 | \n",
+ " 250 | \n",
+ " East | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 102 | \n",
+ " 150 | \n",
+ " West | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 103 | \n",
+ " 300 | \n",
+ " East | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 104 | \n",
+ " 200 | \n",
+ " South | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ProductID Sales Region\n",
+ "0 101 250 East\n",
+ "1 102 150 West\n",
+ "2 103 300 East\n",
+ "3 104 200 South"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -67,7 +129,7 @@
"\n",
"# Display the initial DataFrame\n",
"print(\"Initial DataFrame:\")\n",
- "print(df)"
+ "df"
]
},
{
@@ -83,7 +145,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -91,13 +153,80 @@
"output_type": "stream",
"text": [
"\n",
- "DataFrame after applying 'mutate':\n",
- " ProductID Sales Region VAT\n",
- "0 101 250 East 37.5\n",
- "1 102 150 West 22.5\n",
- "2 103 300 East 45.0\n",
- "3 104 200 South 30.0\n"
+ "DataFrame after applying 'mutate':\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ProductID | \n",
+ " Sales | \n",
+ " Region | \n",
+ " VAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 101 | \n",
+ " 250 | \n",
+ " East | \n",
+ " 37.5 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 102 | \n",
+ " 150 | \n",
+ " West | \n",
+ " 22.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 103 | \n",
+ " 300 | \n",
+ " East | \n",
+ " 45.0 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 104 | \n",
+ " 200 | \n",
+ " South | \n",
+ " 30.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ProductID Sales Region VAT\n",
+ "0 101 250 East 37.5\n",
+ "1 102 150 West 22.5\n",
+ "2 103 300 East 45.0\n",
+ "3 104 200 South 30.0"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
@@ -105,7 +234,7 @@
"df = ttp.mutate(df, \"VAT = Sales * 0.15\")\n",
"\n",
"print(\"\\nDataFrame after applying 'mutate':\")\n",
- "print(df)"
+ "df"
]
},
{
@@ -119,7 +248,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -127,18 +256,71 @@
"output_type": "stream",
"text": [
"\n",
- "DataFrame after applying 'filter':\n",
- " ProductID Sales Region VAT\n",
- "0 101 250 East 37.5\n",
- "2 103 300 East 45.0\n"
+ "DataFrame after applying 'filter':\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ProductID | \n",
+ " Sales | \n",
+ " Region | \n",
+ " VAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 101 | \n",
+ " 250 | \n",
+ " East | \n",
+ " 37.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 103 | \n",
+ " 300 | \n",
+ " East | \n",
+ " 45.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ProductID Sales Region VAT\n",
+ "0 101 250 East 37.5\n",
+ "2 103 300 East 45.0"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
"# Filtering rows where sales are greater than 200\n",
"df = ttp.filter(df, \"Sales > 200\")\n",
"print(\"\\nDataFrame after applying 'filter':\")\n",
- "print(df)"
+ "df"
]
},
{
@@ -152,7 +334,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -160,18 +342,65 @@
"output_type": "stream",
"text": [
"\n",
- "DataFrame after applying 'select':\n",
- " ProductID VAT\n",
- "0 101 37.5\n",
- "2 103 45.0\n"
+ "DataFrame after applying 'select':\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ProductID | \n",
+ " VAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 101 | \n",
+ " 37.5 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 103 | \n",
+ " 45.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ProductID VAT\n",
+ "0 101 37.5\n",
+ "2 103 45.0"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
"# Selecting specific columns\n",
"df = ttp.select(df, \"ProductID\", \"VAT\")\n",
"print(\"\\nDataFrame after applying 'select':\")\n",
- "print(df)"
+ "df"
]
},
{
@@ -185,7 +414,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -193,18 +422,65 @@
"output_type": "stream",
"text": [
"\n",
- "DataFrame after applying 'arrange':\n",
- " ProductID VAT\n",
- "2 103 45.0\n",
- "0 101 37.5\n"
+ "DataFrame after applying 'arrange':\n"
]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " ProductID | \n",
+ " VAT | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2 | \n",
+ " 103 | \n",
+ " 45.0 | \n",
+ "
\n",
+ " \n",
+ " 0 | \n",
+ " 101 | \n",
+ " 37.5 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " ProductID VAT\n",
+ "2 103 45.0\n",
+ "0 101 37.5"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
}
],
"source": [
"# Sorting the DataFrame\n",
"df = ttp.arrange(df, False, \"ProductID\")\n",
"print(\"\\nDataFrame after applying 'arrange':\")\n",
- "print(df)"
+ "df"
]
},
{
diff --git a/docs/tutorial.ipynb b/docs/tutorial.ipynb
index 85dfbd6..6ad169e 100644
--- a/docs/tutorial.ipynb
+++ b/docs/tutorial.ipynb
@@ -22,7 +22,7 @@
},
{
"cell_type": "code",
- "execution_count": 19,
+ "execution_count": 1,
"metadata": {},
"outputs": [
{
@@ -138,7 +138,7 @@
"4 3450.0 female 2007 "
]
},
- "execution_count": 19,
+ "execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
@@ -269,8 +269,7 @@
],
"source": [
"print(newPenguins.shape)\n",
- "newPenguins = ttp.filter(\n",
- " newPenguins, \"species == 'Adelie' & body_mass_g > 3000\")\n",
+ "newPenguins = ttp.filter(newPenguins, \"species == 'Adelie' & body_mass_g > 3000\")\n",
"print(newPenguins.shape)"
]
},
@@ -318,7 +317,7 @@
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -440,13 +439,13 @@
"4 3450.0 female 2007 3.45 "
]
},
- "execution_count": 13,
+ "execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ttp.mutate(penguins, \"body_mass_kg = body_mass_g / 1000\")\n",
+ "penguins = ttp.mutate(penguins, \"body_mass_kg = body_mass_g / 1000\")\n",
"\n",
"penguins.head()"
]
@@ -464,7 +463,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
@@ -611,15 +610,15 @@
"4 19.3 "
]
},
- "execution_count": 14,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "ttp.mutate(penguins, \"bill_length_cm = bill_length_mm / 10\")\n",
- "ttp.mutate(penguins, \"bill_depth_cm = bill_depth_mm / 10\")\n",
- "ttp.mutate(penguins, \"flipper_length_cm = flipper_length_mm / 10\")\n",
+ "penguins = ttp.mutate(penguins, \"bill_length_cm = bill_length_mm / 10\")\n",
+ "penguins = ttp.mutate(penguins, \"bill_depth_cm = bill_depth_mm / 10\")\n",
+ "penguins = ttp.mutate(penguins, \"flipper_length_cm = flipper_length_mm / 10\")\n",
"\n",
"penguins.head()"
]
@@ -653,7 +652,7 @@
},
{
"cell_type": "code",
- "execution_count": 15,
+ "execution_count": 8,
"metadata": {},
"outputs": [
{
@@ -714,7 +713,7 @@
"4 Adelie"
]
},
- "execution_count": 15,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -744,7 +743,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 9,
"metadata": {},
"outputs": [
{
@@ -823,7 +822,7 @@
"4 Adelie 36.7 19.3 3450.0"
]
},
- "execution_count": 7,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@@ -850,7 +849,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -935,7 +934,7 @@
"4 Adelie 3.67 1.93 19.3 3.45"
]
},
- "execution_count": 18,
+ "execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@@ -973,7 +972,7 @@
},
{
"cell_type": "code",
- "execution_count": 21,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
@@ -997,105 +996,74 @@
" \n",
" \n",
" | \n",
- " rowid | \n",
" species | \n",
- " island | \n",
- " bill_length_mm | \n",
- " bill_depth_mm | \n",
- " flipper_length_mm | \n",
- " body_mass_g | \n",
- " sex | \n",
- " year | \n",
+ " bill_length_cm | \n",
+ " bill_depth_cm | \n",
+ " flipper_length_cm | \n",
+ " body_mass_kg | \n",
"
\n",
" \n",
" \n",
" \n",
" 169 | \n",
- " 170 | \n",
" Gentoo | \n",
- " Biscoe | \n",
- " 49.2 | \n",
- " 15.2 | \n",
- " 221.0 | \n",
- " 6300.0 | \n",
- " male | \n",
- " 2007 | \n",
+ " 4.92 | \n",
+ " 1.52 | \n",
+ " 22.1 | \n",
+ " 6.30 | \n",
"
\n",
" \n",
" 185 | \n",
- " 186 | \n",
" Gentoo | \n",
- " Biscoe | \n",
- " 59.6 | \n",
- " 17.0 | \n",
- " 230.0 | \n",
- " 6050.0 | \n",
- " male | \n",
- " 2007 | \n",
+ " 5.96 | \n",
+ " 1.70 | \n",
+ " 23.0 | \n",
+ " 6.05 | \n",
"
\n",
" \n",
" 269 | \n",
- " 270 | \n",
" Gentoo | \n",
- " Biscoe | \n",
- " 48.8 | \n",
- " 16.2 | \n",
- " 222.0 | \n",
- " 6000.0 | \n",
- " male | \n",
- " 2009 | \n",
+ " 4.88 | \n",
+ " 1.62 | \n",
+ " 22.2 | \n",
+ " 6.00 | \n",
"
\n",
" \n",
" 229 | \n",
- " 230 | \n",
" Gentoo | \n",
- " Biscoe | \n",
- " 51.1 | \n",
- " 16.3 | \n",
- " 220.0 | \n",
- " 6000.0 | \n",
- " male | \n",
- " 2008 | \n",
+ " 5.11 | \n",
+ " 1.63 | \n",
+ " 22.0 | \n",
+ " 6.00 | \n",
"
\n",
" \n",
" 263 | \n",
- " 264 | \n",
" Gentoo | \n",
- " Biscoe | \n",
- " 49.8 | \n",
- " 15.9 | \n",
- " 229.0 | \n",
- " 5950.0 | \n",
- " male | \n",
- " 2009 | \n",
+ " 4.98 | \n",
+ " 1.59 | \n",
+ " 22.9 | \n",
+ " 5.95 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " rowid species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
- "169 170 Gentoo Biscoe 49.2 15.2 221.0 \n",
- "185 186 Gentoo Biscoe 59.6 17.0 230.0 \n",
- "269 270 Gentoo Biscoe 48.8 16.2 222.0 \n",
- "229 230 Gentoo Biscoe 51.1 16.3 220.0 \n",
- "263 264 Gentoo Biscoe 49.8 15.9 229.0 \n",
- "\n",
- " body_mass_g sex year \n",
- "169 6300.0 male 2007 \n",
- "185 6050.0 male 2007 \n",
- "269 6000.0 male 2009 \n",
- "229 6000.0 male 2008 \n",
- "263 5950.0 male 2009 "
+ " species bill_length_cm bill_depth_cm flipper_length_cm body_mass_kg\n",
+ "169 Gentoo 4.92 1.52 22.1 6.30\n",
+ "185 Gentoo 5.96 1.70 23.0 6.05\n",
+ "269 Gentoo 4.88 1.62 22.2 6.00\n",
+ "229 Gentoo 5.11 1.63 22.0 6.00\n",
+ "263 Gentoo 4.98 1.59 22.9 5.95"
]
},
- "execution_count": 21,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "penguins_sorted = ttp.arrange(penguins, False, \"body_mass_g\")\n",
+ "penguins_sorted = ttp.arrange(penguins, False, \"body_mass_kg\")\n",
"penguins_sorted.head()"
]
},
@@ -1115,7 +1083,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -1139,50 +1107,38 @@
" \n",
" \n",
" | \n",
- " rowid | \n",
" species | \n",
- " island | \n",
- " bill_length_mm | \n",
- " bill_depth_mm | \n",
- " flipper_length_mm | \n",
- " body_mass_g | \n",
- " sex | \n",
- " year | \n",
+ " bill_length_cm | \n",
+ " bill_depth_cm | \n",
+ " flipper_length_cm | \n",
+ " body_mass_kg | \n",
"
\n",
" \n",
" \n",
" \n",
" 142 | \n",
- " 143 | \n",
" Adelie | \n",
- " Dream | \n",
- " 32.1 | \n",
- " 15.5 | \n",
- " 188.0 | \n",
- " 3050.0 | \n",
- " female | \n",
- " 2009 | \n",
+ " 3.21 | \n",
+ " 1.55 | \n",
+ " 18.8 | \n",
+ " 3.05 | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " rowid species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
- "142 143 Adelie Dream 32.1 15.5 188.0 \n",
- "\n",
- " body_mass_g sex year \n",
- "142 3050.0 female 2009 "
+ " species bill_length_cm bill_depth_cm flipper_length_cm body_mass_kg\n",
+ "142 Adelie 3.21 1.55 18.8 3.05"
]
},
- "execution_count": 22,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
- "penguins_small_bill = ttp.arrange(\n",
- " penguins, True, \"bill_length_mm\", \"bill_depth_mm\")\n",
+ "penguins_small_bill = ttp.arrange(penguins, True, \"bill_length_cm\", \"bill_depth_cm\")\n",
"penguins_small_bill.head(1)"
]
},
@@ -1197,7 +1153,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -1270,12 +1226,14 @@
"263 Gentoo Biscoe 5.95"
]
},
- "execution_count": 24,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
+ "penguins = pd.read_csv(\"penguins.csv\")\n",
+ "\n",
"penguins_subset2 = (\n",
" penguins.pipe(ttp.filter, \"~ sex.isnull()\")\n",
" .pipe(ttp.mutate, \"body_mass_kg = body_mass_g / 1000\")\n",
diff --git a/src/tidyversetopandas/tidyversetopandas.py b/src/tidyversetopandas/tidyversetopandas.py
index 17c5a79..f9ca375 100644
--- a/src/tidyversetopandas/tidyversetopandas.py
+++ b/src/tidyversetopandas/tidyversetopandas.py
@@ -24,11 +24,11 @@ def mutate(df: pd.DataFrame, expr: str) -> pd.DataFrame:
raise ValueError(msg)
try:
- df.eval(expr, inplace=True)
+ res = df.eval(expr)
except Exception as e:
raise ValueError(f"Error evaluating the expression: %s" % e)
- return df
+ return res
def select(dataframe, *columns):