diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b4aad9943..9974b992a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,8 +1,6 @@ name: ML Pipeline CI on: - # push: - # branches: [ main, master ] pull_request: branches: [ main, master ] @@ -10,32 +8,49 @@ jobs: test: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest great_expectations pandas scikit-learn flake8 black mypy pytest-cov - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - - name: Lint with flake8 - run: | - flake8 day5/演習3 --count --select=E9,F63,F7,F82 --show-source --statistics - flake8 day5/演習3 --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics - - - name: Format check with black - run: | - black --check day5/演習3 - - - name: Run data tests - run: | - pytest day5/演習3/tests/test_data.py -v - - - name: Run model tests - run: | - pytest day5/演習3/tests/test_model.py -v + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest great_expectations pandas scikit-learn flake8 black mypy pytest-cov joblib mlflow + # now install all Day5-specific deps (includes kedro) + if [ -f day5/requirements.txt ]; then pip install -r day5/requirements.txt; fi + + - name: Generate model for tests + run: | + cd day5/演習1 + python main.py # データ準備→学習→モデル保存 + python pipeline.py # (必要ならKedroパイプラインも) + cd ../.. # ルートに戻る + + - name: Lint with flake8 + run: | + flake8 day5/演習3 --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 day5/演習3 --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics + + - name: Format check with black + run: | + black --check day5/演習3 + + - name: Run data tests + run: | + pytest day5/演習3/tests/test_data.py -v + + - name: Run model tests + run: | + pytest day5/演習3/tests/test_model.py -v + + - name: Run model performance accuracy test + run: | + pytest day5/演習3/tests/test_model_performance.py::test_model_inference_accuracy -v + + - name: Run model performance inference-time test + run: | + pytest day5/演習3/tests/test_model_performance.py::test_model_inference_time -v + \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..044027e7c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +# day5 の学習済みモデルを無視 +day5/**/models/*.pkl diff --git "a/day5/\346\274\224\347\277\2221/models/titanic_model.pkl" "b/day5/\346\274\224\347\277\2221/models/titanic_model.pkl" index 6fec87e47..a1b055d4b 100644 Binary files "a/day5/\346\274\224\347\277\2221/models/titanic_model.pkl" and "b/day5/\346\274\224\347\277\2221/models/titanic_model.pkl" differ diff --git "a/day5/\346\274\224\347\277\2222/models/titanic_model.pkl" "b/day5/\346\274\224\347\277\2222/models/titanic_model.pkl" index 9e1859fdf..1659278cd 100644 Binary files "a/day5/\346\274\224\347\277\2222/models/titanic_model.pkl" and "b/day5/\346\274\224\347\277\2222/models/titanic_model.pkl" differ diff --git "a/day5/\346\274\224\347\277\2223/tests/test_model_performance.py" "b/day5/\346\274\224\347\277\2223/tests/test_model_performance.py" new file mode 100644 index 000000000..40275ef2b --- /dev/null +++ "b/day5/\346\274\224\347\277\2223/tests/test_model_performance.py" @@ -0,0 +1,46 @@ +import time +import joblib +import pandas as pd +from sklearn.metrics import accuracy_score +import os + + +def load_test_data(): + df = pd.read_csv( + os.path.join( + os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../..")), + "day5", + "演習1", + "data", + "titanic_test.csv", + ) + ) + X = df.drop("Survived", axis=1) + y = df["Survived"] + return X, y + + +def get_model(): + repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../..")) + model_path = os.path.join(repo_root, "day5", "演習1", "models", "titanic_model.pkl") + assert os.path.exists(model_path), f"Model file not found at {model_path}" + return joblib.load(model_path) + + +def test_model_inference_accuracy(): + model = get_model() + X_test, y_test = load_test_data() + y_pred = model.predict(X_test) + acc = accuracy_score(y_test, y_pred) + assert acc >= 0.75, f"Expected accuracy >= 0.75, got {acc:.3f}" + + +def test_model_inference_time(): + model = get_model() + X_test, _ = load_test_data() + n_runs = 100 + start = time.time() + for _ in range(n_runs): + model.predict(X_test) + avg_time = (time.time() - start) / n_runs + assert avg_time < 0.1, f"Inference too slow: {avg_time:.3f}s per run"