Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: test exported code executes #159

Merged
merged 6 commits into from
Oct 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions tests/execution_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import base64
import pickle

import nbconvert
import nbformat
import pandas as pd

from edvart.report import ReportBase
from edvart.report_sections.code_string_formatting import code_dedent
from edvart.report_sections.section_base import Section


def check_section_executes(section: Section, df: pd.DataFrame) -> None:
nb = nbformat.v4.new_notebook()
section_code_cells = []
section.add_cells(section_code_cells, df)
lukany marked this conversation as resolved.
Show resolved Hide resolved

buffer = pickle.dumps(df, fix_imports=False)
buffer_base64 = base64.b85encode(buffer)

unpickle_df = code_dedent(
f"""
import pickle
import base64

data = {buffer_base64}
df = pickle.loads(base64.b85decode(data), fix_imports=False)"""
)

all_imports = [
*ReportBase._DEFAULT_IMPORTS,
*section.required_imports(),
]

nb["cells"] = [
nbformat.v4.new_code_cell("\n".join(all_imports)),
nbformat.v4.new_code_cell(unpickle_df),
*section_code_cells,
]
preprocessor = nbconvert.preprocessors.ExecutePreprocessor(timeout=60)
preprocessor.preprocess(nb)
38 changes: 38 additions & 0 deletions tests/test_bivariate_analysis.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import warnings
from contextlib import redirect_stdout

import numpy as np
import pandas as pd
import pytest

Expand All @@ -9,6 +10,7 @@
from edvart.report_sections.code_string_formatting import get_code
from edvart.report_sections.section_base import Verbosity

from .execution_utils import check_section_executes
from .pyarrow_utils import pyarrow_parameterize


Expand Down Expand Up @@ -136,6 +138,8 @@ def test_code_export_verbosity_low():
assert len(exported_code) == 1
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(bivariate_section, df=get_test_df())


def test_code_export_verbosity_low_with_subsections():
bivariate_section = bivariate_analysis.BivariateAnalysis(
Expand All @@ -160,12 +164,26 @@ def test_code_export_verbosity_low_with_subsections():
assert len(exported_code) == 1
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(bivariate_section, df=get_test_df())


def test_generated_code_verbosity_low_columns():
columns = [f"col{i}" for i in range(5)]
columns_x = [f"col_x{i}" for i in range(6)]
columns_y = [f"col_y{i}" for i in range(4)]
columns_pairs = [(f"first{i}", f"second{i}") for i in range(8)]
columns_all = (
columns
+ columns_x
+ columns_y
+ [col_pair[0] for col_pair in columns_pairs]
+ [col_pair[1] for col_pair in columns_pairs]
)
test_df = pd.DataFrame(
data=np.random.rand(4, len(columns_all)),
columns=columns_all,
)

bivariate_section = bivariate_analysis.BivariateAnalysis(
columns=columns,
columns_x=columns_x,
Expand All @@ -188,6 +206,8 @@ def test_generated_code_verbosity_low_columns():
assert len(exported_code) == 1
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(bivariate_section, df=test_df)


def test_generated_code_verbosity_medium():
bivariate_section = bivariate_analysis.BivariateAnalysis(
Expand All @@ -213,6 +233,8 @@ def test_generated_code_verbosity_medium():
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(bivariate_section, df=get_test_df())


def test_generated_code_verbosity_medium_columns_x_y():
columns_x = ["a", "b"]
Expand All @@ -228,6 +250,10 @@ def test_generated_code_verbosity_medium_columns_x_y():
],
color_col="b",
)
test_df = pd.DataFrame(
columns=columns_x + columns_y,
data=np.random.rand(10, 4),
)

exported_cells = []
bivariate_section.add_cells(exported_cells, df=pd.DataFrame())
Expand All @@ -243,6 +269,8 @@ def test_generated_code_verbosity_medium_columns_x_y():
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(bivariate_section, df=test_df)


def test_generated_code_verbosity_medium_columns_pairs():
columns_pairs = [("a", "b"), ("c", "d")]
Expand All @@ -257,6 +285,10 @@ def test_generated_code_verbosity_medium_columns_pairs():
BivariateAnalysisSubsection.ContingencyTable,
],
)
test_df = pd.DataFrame(
columns=columns_x_correct + columns_y_correct,
data=np.random.rand(10, 4),
)

exported_cells = []
bivariate_section.add_cells(exported_cells, df=pd.DataFrame())
Expand All @@ -272,6 +304,8 @@ def test_generated_code_verbosity_medium_columns_pairs():
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(bivariate_section, df=test_df)


def test_generated_code_verbosity_high():
bivariate_section = bivariate_analysis.BivariateAnalysis(
Expand Down Expand Up @@ -311,6 +345,8 @@ def test_generated_code_verbosity_high():
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(bivariate_section, df=get_test_df())


def test_verbosity_low_different_subsection_verbosities():
bivariate_section = BivariateAnalysis(
Expand Down Expand Up @@ -341,6 +377,8 @@ def test_verbosity_low_different_subsection_verbosities():
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(bivariate_section, df=get_test_df())


def test_imports_verbosity_low():
bivariate_section = BivariateAnalysis(verbosity=Verbosity.LOW)
Expand Down
7 changes: 7 additions & 0 deletions tests/test_group_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from edvart.report_sections.section_base import Verbosity

from .execution_utils import check_section_executes
from .pyarrow_utils import pyarrow_parameterize

# Workaround to prevent multiple browser tabs opening with figures
Expand Down Expand Up @@ -105,6 +106,8 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool):
assert len(exported_code) == 1
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(group_section, df)


@pyarrow_parameterize
def test_code_export_verbosity_medium(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -132,6 +135,8 @@ def test_code_export_verbosity_medium(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(group_section, df)


@pyarrow_parameterize
def test_code_export_verbosity_high(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -187,6 +192,8 @@ def test_code_export_verbosity_high(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(group_section, df)


@pyarrow_parameterize
def test_columns_parameter(pyarrow_dtypes: bool):
Expand Down
15 changes: 15 additions & 0 deletions tests/test_multivariate_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from edvart.report_sections.section_base import Verbosity
from edvart.utils import select_numeric_columns

from .execution_utils import check_section_executes
from .pyarrow_utils import pyarrow_parameterize


Expand Down Expand Up @@ -147,6 +148,8 @@ def test_code_export_verbosity_low(pyarrow_dtypes: bool):
assert len(exported_code) == 1
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(multivariate_section, df)


@pyarrow_parameterize
def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -186,6 +189,8 @@ def test_code_export_verbosity_low_with_subsections(pyarrow_dtypes: bool):
assert len(exported_code) == 1
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(multivariate_section, df)


@pyarrow_parameterize
def test_code_export_verbosity_medium_all_cols_valid(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -215,6 +220,8 @@ def test_code_export_verbosity_medium_all_cols_valid(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(multivariate_section, all_numeric_df)


@pyarrow_parameterize
def test_generated_code_verbosity_1(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -252,6 +259,8 @@ def test_generated_code_verbosity_1(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(multivariate_section, df)


@pyarrow_parameterize
def test_generated_code_verbosity_2(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -314,6 +323,8 @@ def test_generated_code_verbosity_2(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(multivariate_section, df)


@pyarrow_parameterize
def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool):
Expand All @@ -337,6 +348,8 @@ def test_verbosity_medium_non_categorical_col(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(multivariate_section, random_df)


@pyarrow_parameterize
def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool):
Expand Down Expand Up @@ -382,6 +395,8 @@ def test_verbosity_low_different_subsection_verbosities(pyarrow_dtypes: bool):
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(multivariate_section, df)


def test_imports_verbosity_low():
multivariate_section = MultivariateAnalysis(verbosity=Verbosity.LOW)
Expand Down
12 changes: 12 additions & 0 deletions tests/test_overview_section.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from edvart.report_sections.dataset_overview import Overview, OverviewSubsection
from edvart.report_sections.section_base import Verbosity

from .execution_utils import check_section_executes


def get_test_df() -> pd.DataFrame:
test_df = pd.DataFrame(data=[[1.1, "a"], [2.2, "b"], [3.3, "c"]], columns=["A", "B"])
Expand Down Expand Up @@ -136,6 +138,8 @@ def test_code_export_verbosity_low():
# Test code equivalence
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(overview_section, df=get_test_df())


def test_code_export_verbosity_low_with_subsections():
overview_section = Overview(
Expand All @@ -158,6 +162,8 @@ def test_code_export_verbosity_low_with_subsections():
# Test code equivalence
assert exported_code[0] == expected_code[0], "Exported code mismatch"

check_section_executes(overview_section, df=get_test_df())


def test_code_export_verbosity_medium():
# Construct overview section
Expand Down Expand Up @@ -192,6 +198,8 @@ def test_code_export_verbosity_medium():
for i in range(len(exported_code)):
assert exported_code[i] == expected_code[i], "Exported code mismatch"

check_section_executes(overview_section, df=get_test_df())


def test_code_export_verbosity_high():
# Construct overview section
Expand Down Expand Up @@ -270,6 +278,8 @@ def test_code_export_verbosity_high():
for i in range(len(exported_code)):
assert exported_code[i] == expected_code[i], "Exported code mismatch"

check_section_executes(overview_section, df=get_test_df())


def test_verbosity_low_different_subsection_verbosities():
overview_section = Overview(
Expand Down Expand Up @@ -303,6 +313,8 @@ def test_verbosity_low_different_subsection_verbosities():
for expected_line, exported_line in zip(expected_code, exported_code):
assert expected_line == exported_line, "Exported code mismatch"

check_section_executes(overview_section, df=get_test_df())


def test_imports_verbosity_low():
overview_section = Overview(verbosity=Verbosity.LOW)
Expand Down
Loading