databricks · bali0019 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml
@@ -0,0 +1,212 @@
+name: Integration Tests
+on:
+  schedule:
+    # Run nightly at 2 AM UTC
+    - cron: '0 2 * * *'
+  workflow_dispatch:
+    # Allow manual triggering for debugging
+    inputs:
+      cloud:
+        description: 'Cloud provider to test (aws, azure, or both)'
+        required: false
+        default: 'both'
+        type: choice
+        options:
+          - both
+          - aws
+          - azure
+
+jobs:
+  integration-tests:
+    runs-on: 
+      group: databricks-field-eng-protected-runner-group
+      labels: [linux-ubuntu-latest]
+    timeout-minutes: 45
+    strategy:
+      fail-fast: false  # Don't cancel Azure if AWS fails
+      matrix:
+        include:
+          - cloud: aws
+            profile: gha-aws-profile
+            host_var: DATABRICKS_HOST_AWS
+            token_var: DATABRICKS_TOKEN_AWS
+          - cloud: azure
+            profile: gha-azure-profile
+            host_var: DATABRICKS_HOST_AZURE
+            token_var: DATABRICKS_TOKEN_AZURE
+
+    # Always run for scheduled events or when no specific cloud is chosen
+    if: ${{ github.event_name == 'schedule' || github.event_name == 'push' || !github.event.inputs.cloud || github.event.inputs.cloud == 'both' }}
+
+    name: Integration Tests (${{ matrix.cloud }})
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '22'
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r dev-requirements.txt
+          # Install pytest plugins for retry functionality and HTML reports
+          pip install pytest-rerunfailures pytest-html
+
+      - name: Install Node.js dependencies
+        run: |
+          npm install -g [email protected]
+
+      - name: Install Databricks CLI
+        run: |
+          curl -fsSL https://raw.githubusercontent.com/databricks/setup-cli/main/install.sh | sh
+          echo "$HOME/.databricks/bin" >> $GITHUB_PATH
+
+      - name: Configure Databricks CLI Profile
+        env:
+          DATABRICKS_HOST: ${{ secrets[matrix.host_var] }}
+          DATABRICKS_TOKEN: ${{ secrets[matrix.token_var] }}
+        run: |
+          # Create databricks config directory
+          mkdir -p ~/.databrickscfg.d
+
+          # Configure profile using environment variables
+          cat > ~/.databrickscfg << EOF
+          [${{ matrix.profile }}]
+          host = $DATABRICKS_HOST
+          token = $DATABRICKS_TOKEN
+          EOF
+
+          # Verify profile is configured correctly
+          databricks --profile ${{ matrix.profile }} current-user me
+
+      - name: Run Integration Tests
+        if: ${{ github.event_name == 'schedule' || github.event_name == 'push' || !github.event.inputs.cloud || github.event.inputs.cloud == 'both' || github.event.inputs.cloud == matrix.cloud }}
+        env:
+          # Test configuration
+          DATABRICKS_CONFIG_PROFILE: ${{ matrix.profile }}
+          DATABRICKS_CLOUD: ${{ matrix.cloud }}
+          DATABRICKS_HOST: ${{ secrets[matrix.host_var] }}
+          DATABRICKS_TOKEN: ${{ secrets[matrix.token_var] }}
+          # Use environment variables for catalog/schema names
+          TEST_CATALOG_NAME: ${{ vars.TEST_CATALOG_NAME || 'main_integration_tests' }}
+          TEST_SCHEMA_NAME: ${{ vars.TEST_SCHEMA_NAME || 'gha_integration_tests' }}
+          # GitHub-specific environment
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          # Run all tests including integration tests with retry on failures
+          pytest tests/ --integration --large -v \
+            --tb=short \
+            --reruns=2 \
+            --reruns-delay=30 \
+            --junit-xml=test-results-${{ matrix.cloud }}.xml \
+            --html=test-report-${{ matrix.cloud }}.html \
+            --self-contained-html
+
+      - name: Cleanup Credentials
+        if: always()  # Always run cleanup, even if tests fail
+        run: |
+          # Securely remove Databricks config files
+          shred -vfz -n 3 ~/.databrickscfg 2>/dev/null || rm -f ~/.databrickscfg
+          rm -rf ~/.databrickscfg.d
+          # Clear credential environment variables
+          unset DATABRICKS_HOST DATABRICKS_TOKEN
+          # Remove any temporary credential files
+          find /tmp -name "*databricks*" -type f -delete 2>/dev/null || true
+
+      - name: Upload Test Results
+        uses: actions/upload-artifact@v4
+        if: always()  # Upload even if tests failed
+        with:
+          name: test-results-${{ matrix.cloud }}
+          path: |
+            test-results-${{ matrix.cloud }}.xml
+            test-report-${{ matrix.cloud }}.html
+          retention-days: 30
+
+      - name: Upload Test Logs on Failure
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: test-logs-${{ matrix.cloud }}
+          path: |
+            integration_test_*.log
+          retention-days: 7
+
+  notify-failure:
+    runs-on: 
+      group: databricks-field-eng-protected-runner-group
+      labels: [linux-ubuntu-latest]
+    needs: integration-tests
+    if: failure() && github.event_name == 'schedule'
+    steps:
+      - name: Send Email Notification
+        uses: dawidd6/action-send-mail@v3
+        with:
+          server_address: smtp.gmail.com
+          server_port: 587
+          username: ${{ secrets.NOTIFICATION_EMAIL_USERNAME }}
+          password: ${{ secrets.NOTIFICATION_EMAIL_PASSWORD }}
+          subject: "FAILED: MLOps Stacks Integration Tests Failed - ${{ github.sha }}"
+          to: ${{ secrets.NOTIFICATION_EMAIL_TO }}
+          from: "GitHub Actions <${{ secrets.NOTIFICATION_EMAIL_USERNAME }}>"
+          body: |
+            The nightly integration tests have failed for MLOps Stacks.
+
+            **Repository:** ${{ github.repository }}
+            **Branch:** ${{ github.ref }}
+            **Commit:** ${{ github.sha }}
+            **Workflow:** ${{ github.workflow }}
+            **Run ID:** ${{ github.run_id }}
+
+            **View Results:** ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+            Please check the test results and logs for more details.
+
+            ---
+            This is an automated message from GitHub Actions.
+
+  # Create a check run that shows the overall status
+  integration-status:
+    runs-on: 
+      group: databricks-field-eng-protected-runner-group
+      labels: [linux-ubuntu-latest]
+    needs: integration-tests
+    if: always()
+    steps:
+      - name: Set Status Check
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const { data: checkRuns } = await github.rest.checks.listForRef({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              ref: context.sha,
+              check_name: 'Integration Tests Status'
+            });
+
+            const conclusion = '${{ needs.integration-tests.result }}' === 'success' ? 'success' : 'failure';
+            const title = conclusion === 'success' ? 
+              'PASSED: All integration tests passed' : 
+              'FAILED: Integration tests failed';
+
+            await github.rest.checks.create({
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              name: 'Integration Tests Status',
+              head_sha: context.sha,
+              status: 'completed',
+              conclusion: conclusion,
+              output: {
+                title: title,
+                summary: `Integration tests ${conclusion === 'success' ? 'passed' : 'failed'} for commit ${context.sha.substring(0, 7)}`
+              }
+            });
diff --git a/.github/workflows/run-checks.yaml b/.github/workflows/run-checks.yaml
@@ -3,12 +3,17 @@ on:
   pull_request:
 jobs:
   run-tests:
-    runs-on: ubuntu-latest
+    runs-on: 
+      group: databricks-field-eng-protected-runner-group
+      labels: [linux-ubuntu-latest]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
         with:
           python-version: '3.9'
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '22'
       - name: Install act
         run: |
           # Install act
@@ -21,6 +26,9 @@ jobs:
         run: |
             python -m pip install --upgrade pip
             pip install -r dev-requirements.txt
+      - name: Install Node.js dependencies
+        run: |
+            npm install -g [email protected]
       - name: Generate CICD Zip
         run: |
           cd template/{{.input_root_dir}}

diff --git a/conftest.py b/conftest.py
@@ -22,19 +22,35 @@ def pytest_addoption(parser):
         default=False,
         help="Run tests decorated with 'large' annotation",
     )
+    parser.addoption(
+        "--integration",
+        action="store_true",
+        dest="integration",
+        default=False,
+        help="Run tests decorated with 'integration' annotation (requires Databricks workspace)",
+    )
 
 
 def pytest_configure(config):
     # Register markers to suppress `PytestUnknownMarkWarning`
     config.addinivalue_line("markers", "large")
+    config.addinivalue_line("markers", "integration: mark test as integration test requiring Databricks workspace")
 
 
 def pytest_runtest_setup(item):
     markers = [mark.name for mark in item.iter_markers()]
     marked_as_large = "large" in markers
+    marked_as_integration = "integration" in markers
     large_option = item.config.getoption("--large")
     large_only_option = item.config.getoption("--large-only")
+    integration_option = item.config.getoption("--integration")
+
+    # Handle large tests
     if marked_as_large and not (large_option or large_only_option):
         pytest.skip("use `--large` or `--large-only` to run this test")
     if not marked_as_large and large_only_option:
         pytest.skip("remove `--large-only` to run this test")
+
+    # Handle integration tests
+    if marked_as_integration and not integration_option:
+        pytest.skip("use `--integration` to run this test (requires Databricks workspace configuration)")
diff --git a/template/{{.input_root_dir}}/_params_testing_only.txt.tmpl b/template/{{.input_root_dir}}/_params_testing_only.txt.tmpl
@@ -13,6 +13,11 @@ input_include_models_in_unity_catalog={{.input_include_models_in_unity_catalog}}
 input_schema_name={{.input_schema_name}}
 input_unity_catalog_read_user_group={{.input_unity_catalog_read_user_group}}
 input_inference_table_name={{.input_inference_table_name}}
+input_staging_catalog_name={{.input_staging_catalog_name}}
+input_prod_catalog_name={{.input_prod_catalog_name}}
+input_test_catalog_name={{.input_test_catalog_name}}
+input_setup_cicd_and_project={{.input_setup_cicd_and_project}}
+input_docker_image={{.input_docker_image}}
 
 databricks_staging_workspace_host={{ template `databricks_staging_workspace_host` . }}
 databricks_prod_workspace_host={{ template `databricks_prod_workspace_host` . }}

diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
@@ -0,0 +1,2 @@
+# Integration tests for MLOps Stacks
+# These tests require a real Databricks workspace and are configured via CLI profiles
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Integration tests for MLOps Stacks
		# These tests require a real Databricks workspace and are configured via CLI profiles
Copy link Collaborator arpitjasa-db Sep 5, 2025 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. Probably a comment why this file is empty helps