diff --git a/tests/performance/.helmignore b/tests/performance/.helmignore index db4e51df6b..4400d67bb8 100644 --- a/tests/performance/.helmignore +++ b/tests/performance/.helmignore @@ -3,5 +3,11 @@ # Match any file or path named .git .git +*.sh +*.sh.bak* -tools/ \ No newline at end of file +tools/ +ssh/ +scenarios/ +report/ +monitoring/ \ No newline at end of file diff --git a/tests/performance/README.md b/tests/performance/README.md deleted file mode 100644 index 430c268845..0000000000 --- a/tests/performance/README.md +++ /dev/null @@ -1,447 +0,0 @@ -- [Performance Testing Framework](#performance-testing-framework) - - [🏗️ Architecture](#️-architecture) - - [Core Components](#core-components) - - [Directory Structure](#directory-structure) - - [🚀 Quick Start](#-quick-start) - - [Prerequisites](#prerequisites) - - [Install Dependencies](#install-dependencies) - - [Create Test Resources](#create-test-resources) - - [Remove Resources](#remove-resources) - - [🛠️ Tools](#️-tools) - - [bootstrap](#bootstrap) - - [Taskfile Integration](#taskfile-integration) - - [Shatal - VM Wobbling Tool](#shatal---vm-wobbling-tool) - - [Evicter - Migration Tool](#evicter---migration-tool) - - [Statistics - Statistics Collection](#statistics---statistics-collection) - - [📊 Monitoring](#-monitoring) - - [Grafana Dashboards](#grafana-dashboards) - - [Prometheus Rules](#prometheus-rules) - - [⚙️ Configuration](#️-configuration) - - [values.yaml](#valuesyaml) - - [Resource Types](#resource-types) - - [🎯 Testing Scenarios](#-testing-scenarios) - - [1. Basic Performance Testing](#1-basic-performance-testing) - - [2. Migration Testing](#2-migration-testing) - - [3. VM Access Testing](#3-vm-access-testing) - - [📈 Metrics and Monitoring](#-metrics-and-monitoring) - - [Key Metrics](#key-metrics) - - [Dashboards](#dashboards) - - [🔧 Development](#-development) - - [Building Tools](#building-tools) - - [Adding New Tests](#adding-new-tests) - - [📝 Usage Examples](#-usage-examples) - - [Creating Test Environment](#creating-test-environment) - - [Resource Cleanup](#resource-cleanup) - - [🤝 Contributing](#-contributing) - - [📄 License](#-license) - -# Performance Testing Framework - -A comprehensive framework for virtualization performance testing, including tools for creating, migrating, and monitoring virtual machines in Kubernetes. - -## 🏗️ Architecture - -### Core Components - -- **Helm Chart**: Resource management through Helm -- **bootstrap**: Main script for creating/deleting test resources -- **Shatal**: Virtual machine "wobbling" tool -- **Evicter**: Continuous VM migration tool -- **Statistics**: Performance statistics collection -- **Monitoring**: Grafana dashboards and Prometheus rules - -### Directory Structure - -``` -performance/ -├── templates/ # Kubernetes manifests -├── tools/ # Testing tools -│ ├── evicter/ # VM migration -│ ├── shatal/ # VM migration tool via node drain -│ ├── statistic/ # Statistics collection -│ └── status-access-vms/ # VM access and monitoring -├── monitoring/ # Grafana dashboards -├── ssh/ # SSH keys for VM access -├── bootstrap.sh # Main script -├── values.yaml # Configuration -└── Taskfile.yaml # Task automation -``` - -## 🚀 Quick Start - -### Prerequisites - -- Kubernetes cluster with virtualization support -- Helm 3 -- kubectl -- Go (for building tools) - -### Install Dependencies - -```bash -task check_or_install_software -``` - -### Create Test Resources - -```bash -# Create 10 virtual machines -task apply COUNT=10 - -# Create only disks -task apply:disks COUNT=5 - -# Create only VMs -task apply:vms COUNT=5 -``` - -### Remove Resources - -```bash -# Remove all resources -task destroy - -# Remove only VMs -task destroy:vms - -# Remove only disks -task destroy:disks -``` - -## 🛠️ Tools - -### bootstrap - -Main script for managing test resources. - -**Available Flags:** -- `--count, -c`: Number of virtual machines to create (required for apply) -- `--namespace, -n`: Namespace for resources (default: current context namespace) -- `--storage-class, -s`: Storage class for VM disks -- `--name, -r`: Release name (default: performance) -- `--resources, -R`: Resources to manage - 'vds', 'vms', or 'all' (default: all) -- `--resources-prefix, -p`: Prefix for resource names (default: performance) - -```bash -# Create resources (using long flags) -./bootstrap.sh apply --count=10 --namespace=perf --storage-class=ceph-pool-r2-csi-rbd - -# Create resources (using short flags) -./bootstrap.sh apply -c 10 -n perf -s ceph-pool-r2-csi-rbd - -# Create only disks -./bootstrap.sh apply -c 5 -n perf -R vds -r performance-disks - -# Create only VMs (assuming disks exist) -./bootstrap.sh apply -c 5 -n perf -R vms -r performance-vms - -# Remove resources -./bootstrap.sh destroy --namespace=perf --resources=all -# or using short flags -./bootstrap.sh destroy -n perf -R all - -# Remove specific resources -./bootstrap.sh destroy -n perf -R vms -r performance-vms -``` - -### Taskfile Integration - -The framework includes comprehensive Taskfile integration for easy automation: - -**Available Tasks:** -```bash -# Basic operations -task apply COUNT=10 # Create 10 VMs -task destroy # Remove all resources -task apply:disks COUNT=5 # Create only disks -task apply:vms COUNT=5 # Create only VMs -task destroy:disks # Remove only disks -task destroy:vms # Remove only VMs - -# Two-step deployment -task apply:all COUNT=30 # Create disks first, then VMs -task destroy:all # Remove VMs first, then disks - -# Utility tasks -task render # Preview Helm templates -task help # Show bootstrap.sh help -task check_or_install_software # Install dependencies -``` - -**Environment Variables:** -```bash -# Set custom values -COUNT=50 NAMESPACE=test STORAGE_CLASS=ceph-pool-r2-csi-rbd task apply -``` - -### Shatal - VM Wobbling Tool - -Tool for continuous stress testing of virtual machines. - -**Features:** -- Node draining with VM migration -- CPU core fraction changes (10% ↔ 25%) -- VM creation/deletion -- Configurable operation weights - -**Usage:** -```bash -cd tools/shatal -KUBECONFIG=$(cat ~/.kube/config | base64 -w 0) -KUBECONFIG_BASE64=$KUBECONFIG task run -``` - -### Evicter - Migration Tool - -Continuous migration of a specified percentage of virtual machines. - -```bash -# Migrate 20% of VMs in namespace 'perf' for 1 hour -./evicter --target=20 --duration=1h --ns=perf -``` - -### Statistics - Statistics Collection - -```bash -cd tools/statistic -task run -``` - -## 📊 Monitoring - -### Grafana Dashboards - -The monitoring directory contains pre-configured Grafana dashboards: - -- **virtualization-dashboard.yaml**: General virtualization statistics -- **virtual-machine-dashboard.yaml**: Detailed VM statistics -- **ceph-dashboard.yaml**: Storage monitoring - -### SSH Access - -The `ssh/` directory contains SSH keys for VM access: -- `id_ed`: Private SSH key -- `id_ed.pub`: Public SSH key - -### Prometheus Rules - -Configured rules for performance monitoring and alerts. - -## ⚙️ Configuration - -### values.yaml - -Main configuration parameters: - -```yaml -# Number of resources -count: 1 - -# Resources to create -resources: - default: all # all, vms, vds, vi - prefix: "performance" - storageClassName: "ceph-pool-r2-csi-rbd" - - # VM configuration - vm: - runPolicy: AlwaysOnUnlessStoppedManually - restartApprovalMode: Dynamic - spec: - cpu: - cores: 1 - coreFraction: 10% - memory: - size: 256Mi - - # Virtual disk configuration - vd: - spec: - type: vd # vi or vd - diskSize: 300Mi - - # Virtual image configuration - vi: - spec: - type: vi # vi or pvc - baseImage: - name: alpine - url: "https://example.com/alpine.qcow2" -``` - -### Resource Types - -**VirtualDisk (vd.spec.type):** -- `vi`: creates VMs with VirtualImage in blockDeviceRefs -- `vd`: creates VMs with corresponding VirtualDisk - -**VirtualImage (vi.spec.type):** -- `vi`: creates image through ContainerRegistry -- `pvc`: creates image through PersistentVolumeClaim - -## 🎯 Testing Scenarios - -### 1. Basic Performance Testing - -```bash -# Create 100 VMs for load testing -task apply COUNT=100 - -# Start statistics collection -cd tools/statistic && task run - -# Start wobbling tool -cd tools/shatal && task run -``` - -### 2. Migration Testing - -```bash -# Start continuous migration of 30% VMs -cd tools/evicter -go run cmd/main.go --target=30 --duration=2h -``` - -### 3. VM Access Testing - -```bash -# Configure VM access through Ansible -cd tools/status-access-vms/ansible -task run - -# Start load testing -cd tools/status-access-vms/tank -task run -``` - -## 📈 Metrics and Monitoring - -### Key Metrics - -- VM creation time -- VM migration time -- Resource usage (CPU, memory, disk) -- VM availability -- Storage performance - -### Dashboards - -All dashboards are automatically deployed when creating resources and are available in Grafana. - -## 🔧 Development - -### Building Tools - -```bash -# Build evicter -cd tools/evicter -go build -o evicter cmd/main.go - -# Build shatal -cd tools/shatal -go build -o shatal cmd/shatal/main.go - -# Build statistic -cd tools/statistic -go build -o stat cmd/stat/main.go -``` - -### Adding New Tests - -1. Create a new template in `templates/` -2. Add configuration to `values.yaml` -3. Update `bootstrap.sh` if necessary -4. Add tasks to `Taskfile.yaml` - -## 📝 Usage Examples - -### Creating Test Environment - -```bash -# 1. Create namespace -kubectl create namespace perf - -# 2. Create 50 VMs with disks -task apply COUNT=50 NAMESPACE=perf - -# 3. Start monitoring -cd tools/statistic && task run - -# 4. Start stress testing -cd tools/shatal && task run -``` - -### Resource Cleanup - -```bash -# Remove all resources from namespace -task destroy NAMESPACE=perf -``` - -## 🔧 Troubleshooting - -### Common Issues - -**1. Helm Template Errors** -```bash -# If you get template errors, check the values structure -helm template test . --values values.yaml - -# Debug with verbose output -task apply COUNT=1 --verbose -``` - -**2. Resource Conflicts** -```bash -# If resources are stuck in terminating state -kubectl delete virtualmachines --all -n perf --force --grace-period=0 -kubectl delete virtualdisks --all -n perf --force --grace-period=0 - -# Clean up secrets -kubectl delete secrets --all -n perf -``` - -**3. Namespace Issues** -```bash -# Check current namespace -kubectl config view --minify -o jsonpath='{..namespace}' - -# Switch to correct namespace -kubectl config set-context --current --namespace=perf -``` - -**4. Storage Class Issues** -```bash -# List available storage classes -kubectl get storageclass - -# Use correct storage class -task apply COUNT=5 STORAGE_CLASS=ceph-pool-r2-csi-rbd -``` - -### Debug Commands - -```bash -# Check Helm releases -helm list -n perf - -# Check resource status -kubectl get all -n perf -kubectl get virtualmachines -n perf -kubectl get virtualdisks -n perf - -# Check logs -kubectl logs -n perf -l app=performance -``` - -## 🤝 Contributing - -1. Fork the repository -2. Create a branch for new feature -3. Make changes -4. Add tests -5. Create Pull Request - -## 📄 License - -Copyright 2024 Flant JSC. Licensed under the Apache License, Version 2.0. \ No newline at end of file diff --git a/tests/performance/README_tests.md b/tests/performance/README_tests.md new file mode 100644 index 0000000000..06f11e443f --- /dev/null +++ b/tests/performance/README_tests.md @@ -0,0 +1,449 @@ +# Performance Testing Script - Original Version + +## Overview + +`tests.sh` is the original performance testing script for Kubernetes Virtual Machines (VMs) and Virtual Disks (VDs) operations. It provides comprehensive end-to-end testing scenarios for virtualization workloads. + +## Features + +- **Complete Scenario Execution** - Runs full test scenarios from start to finish +- **Comprehensive Testing** - Tests VM lifecycle, migrations, controller restarts, and node draining +- **Detailed Reporting** - Generates comprehensive reports with timing and statistics +- **Multiple Scenarios** - Supports different virtual image types +- **Batch Deployment** - Supports large-scale deployments (up to 15,000 VMs) with intelligent batching +- **Flexible Deployment Control** - Bootstrap-only mode, continue mode, and resource preservation options +- **Production Ready** - Battle-tested in production environments + +## Usage + +### Basic Commands + +```bash +# Run scenario 1 with 2 resources (default) +./tests.sh + +# Run scenario 1 with 4 resources +./tests.sh -s 1 -c 4 + +# Run scenario 2 with 10 resources +./tests.sh -s 2 -c 10 + +# Clean reports and run +./tests.sh --clean-reports +``` + +### Command Line Options + +| Option | Description | Default | +|--------|-------------|---------| +| `-s, --scenario NUMBER` | Scenario number to run (1 or 2) | 1 | +| `-c, --count NUMBER` | Number of resources to create | 2 | +| `--batch-size NUMBER` | Maximum resources per batch | 1200 | +| `--enable-batch` | Force batch deployment mode | false | +| `--bootstrap-only` | Only deploy resources, skip tests | false | +| `--continue` | Continue tests after bootstrap (use with --bootstrap-only) | false | +| `--keep-resources` | Keep resources after tests (don't cleanup) | false | +| `--clean-reports` | Clean all report directories before running | false | +| `-h, --help` | Show help message | - | + +### Examples + +```bash +# Default execution (scenario 1, 2 resources) +./tests.sh + +# Custom resource count +./tests.sh -c 10 + +# Different scenario +./tests.sh -s 2 -c 5 + +# Clean start +./tests.sh --clean-reports -c 20 + +# Large scale deployment with batch processing +./tests.sh -c 15000 --batch-size 1200 + +# Force batch deployment for smaller numbers +./tests.sh -c 500 --enable-batch + +# Bootstrap-only mode (deploy resources only) +./tests.sh --bootstrap-only -c 1000 + +# Continue tests after bootstrap +./tests.sh --continue -c 1000 + +# Keep resources after tests +./tests.sh --keep-resources -c 50 +``` + +## Batch Deployment + +For large-scale deployments (>1200 resources), the script automatically uses intelligent batch deployment: + +### Features +- **Automatic Batching** - Automatically detects when batch deployment is needed +- **Configurable Batch Size** - Default 1200 resources per batch (customizable) +- **Progress Tracking** - Real-time progress updates with ETA +- **Cluster Resource Checks** - Pre-deployment resource validation +- **Stability Delays** - 30-second delays between batches to prevent cluster overload + +### Configuration +```bash +# Default batch settings +MAX_BATCH_SIZE=1200 +TOTAL_TARGET_RESOURCES=15000 +BATCH_DEPLOYMENT_ENABLED=false +``` + +### Examples +```bash +# Deploy 15,000 VMs in batches of 1200 +./tests.sh -c 15000 --batch-size 1200 + +# Force batch mode for smaller deployments +./tests.sh -c 500 --enable-batch + +# Custom batch size +./tests.sh -c 5000 --batch-size 800 +``` + +## Deployment Control + +The script provides flexible deployment control options for different use cases: + +### Bootstrap-Only Mode + +Use `--bootstrap-only` to deploy resources without running tests: + +```bash +# Deploy 1000 resources without running tests +./tests.sh --bootstrap-only -c 1000 + +# Deploy with batch processing +./tests.sh --bootstrap-only -c 5000 --batch-size 1000 +``` + +**Use Cases:** +- Pre-deploying resources for later testing +- Resource provisioning without test execution +- Large-scale infrastructure setup + +### Continue Mode + +Use `--continue` to run tests on existing resources: + +```bash +# Continue tests on existing resources +./tests.sh --continue -c 1000 + +# Continue with specific scenario +./tests.sh --continue -s 2 -c 1000 +``` + +**Use Cases:** +- Running tests on pre-deployed resources +- Resuming tests after bootstrap +- Testing on existing infrastructure + +### Keep Resources Mode + +Use `--keep-resources` to preserve resources after test completion: + +```bash +# Keep resources after tests +./tests.sh --keep-resources -c 50 + +# Combine with continue mode +./tests.sh --continue --keep-resources -c 100 +``` + +**Use Cases:** +- Preserving test environment for analysis +- Keeping resources for additional testing +- Debugging and troubleshooting + +### Workflow Examples + +#### Large-Scale Deployment Workflow + +```bash +# Step 1: Bootstrap large deployment +./tests.sh --bootstrap-only -c 15000 --batch-size 1200 + +# Step 2: Continue with tests +./tests.sh --continue -c 15000 + +# Step 3: Keep resources for analysis +./tests.sh --continue --keep-resources -c 15000 +``` + +#### Development Testing Workflow + +```bash +# Quick bootstrap for development +./tests.sh --bootstrap-only -c 10 + +# Run tests on development environment +./tests.sh --continue -c 10 + +# Keep resources for debugging +./tests.sh --continue --keep-resources -c 10 +``` + +#### Production Testing Workflow + +```bash +# Deploy production-scale resources +./tests.sh --bootstrap-only -c 5000 --batch-size 1000 + +# Run comprehensive tests +./tests.sh --continue -c 5000 + +# Clean up after testing +./tests.sh -c 5000 # Normal execution with cleanup +``` + +## Scenarios + +### Scenario 1: persistentVolumeClaim (Default) +- **Virtual Image Type**: persistentVolumeClaim +- **Test Coverage**: VM lifecycle, migrations, controller operations +- **Use Case**: Production workloads with persistent storage + +### Scenario 2: containerRegistry (Currently Disabled) +- **Virtual Image Type**: containerRegistry +- **Test Coverage**: Similar to Scenario 1 but with container images +- **Use Case**: Container-based workloads + +## Test Sequence + +The script runs a comprehensive 22-step test sequence: + +1. **Cleanup** - Remove existing resources +2. **VM Deployment** - Deploy VMs with disks +3. **Statistics Collection** - Gather initial statistics +4. **VM Stop** - Stop all VMs +5. **VM Start** - Start all VMs +6. **Migration Setup** - Start 5% migration in background +7. **VM Undeploy** - Undeploy 10% VMs (keeping disks) +8. **VM Deploy** - Deploy 10% VMs +9. **Statistics Collection** - Gather statistics for 10% VMs +10. **VM Undeploy 10%** - Undeploy 10% VMs (keeping disks) +11. **VM Deploy 10%** - Deploy 10% VMs (keeping disks) +12. **VM Statistics** - Gather statistics for 10% VMs +13. **VM Operations** - Test stop/start operations on 10% VMs +14. **Migration Cleanup** - Stop migration and cleanup +15. **Migration Percentage** - Migrate 10% VMs +16. **Migration Parallel 2x** - Test with 2x parallel migrations +17. **Migration Parallel 4x** - Test with 4x parallel migrations +18. **Migration Parallel 8x** - Test with 8x parallel migrations +19. **Controller Restart** - Test controller restart with VM creation +20. **Final Statistics** - Gather final statistics +21. **Drain Node** - Test node draining +22. **Final Cleanup** - Clean up all resources + +## Report Structure + +Reports are generated in the `report/` directory with the following structure: + +``` +report/ +└── scenario_1_persistentVolumeClaim_2vm_20251021_013737/ + ├── test.log # Main test log + ├── vm_operations.log # VM operations log + ├── summary.txt # Summary report + ├── statistics/ # Statistics data + │ ├── *.csv # CSV statistics files + │ └── ... + └── vpa/ # VPA data + ├── vpa_*.yaml # VPA configurations + └── ... +``` + +### Report Naming Convention + +``` +{scenario_name}_{vi_type}_{count}vm_{timestamp} +``` + +Example: `scenario_1_persistentVolumeClaim_2vm_20251021_013737` + +## Configuration + +### Default Values + +```bash +SCENARIO_NUMBER=1 +MAIN_COUNT_RESOURCES=2 +PERCENT_VMS=10 +MIGRATION_DURATION="1m" +MIGRATION_PERCENTAGE_10=10 +MIGRATION_PERCENTAGE_5=5 + +# Batch deployment configuration +MAX_BATCH_SIZE=1200 +TOTAL_TARGET_RESOURCES=15000 +BATCH_DEPLOYMENT_ENABLED=false +``` + +### Resource Calculations + +- **Percent Resources**: 10% of total resources +- **Migration 5% Count**: 5% of total resources (minimum 1) +- **Migration 10% Count**: 10% of total resources (minimum 1) + +## Dependencies + +### Required Tools +- `kubectl` - Kubernetes command-line tool +- `helm` - Package manager for Kubernetes +- `tmux` - Terminal multiplexer for migration testing +- `jq` - JSON processor +- `bc` - Calculator for percentages + +### Kubernetes Requirements +- Kubernetes cluster with virtualization support +- Virtualization controller running +- Proper RBAC permissions +- Storage classes configured + +## Output and Logging + +### Log Levels +- **INFO** - General information +- **SUCCESS** - Successful operations +- **WARNING** - Non-critical issues +- **ERROR** - Error conditions + +### Log Files +- **test.log** - Main test execution log +- **vm_operations.log** - Detailed VM operations log +- **summary.txt** - Comprehensive summary report + +### Console Output +- Real-time progress updates +- Step-by-step execution status +- Duration and timing information +- Error messages and warnings + +## Performance Metrics + +The script measures and reports: + +- **VM Deployment Time** - Time to deploy VMs and disks +- **VM Stop Time** - Time to stop all VMs +- **VM Start Time** - Time to start all VMs +- **Migration Times** - Time for various migration scenarios +- **Controller Restart Time** - Time for controller restart +- **Node Drain Time** - Time for node draining operations + +## Troubleshooting + +### Common Issues + +1. **Permission Denied** + ```bash + # Ensure proper Kubernetes access + kubectl auth can-i create virtualmachines + ``` + +2. **Storage Class Issues** + ```bash + # Check available storage classes + kubectl get storageclass + ``` + +3. **Controller Not Available** + ```bash + # Check controller status + kubectl get pods -n d8-virtualization + ``` + +4. **Migration Failures** + ```bash + # Check migration status + kubectl get vmop -n perf + ``` + +### Debug Mode + +Enable debug output by uncommenting the debug line: +```bash +# set -x # Uncomment this line for debug output +``` + +## Best Practices + +1. **Resource Planning** + - Start with small resource counts for testing + - Increase gradually for production testing + - Monitor cluster resources during execution + +2. **Environment Setup** + - Ensure cluster has sufficient resources + - Configure proper storage classes + - Set up monitoring and logging + +3. **Test Execution** + - Run tests during low-traffic periods + - Monitor cluster health during execution + - Keep logs for analysis + +4. **Cleanup** + - Always run cleanup after tests + - Monitor for orphaned resources + - Verify cluster state after completion + +## Examples + +### Development Testing +```bash +# Quick test with minimal resources +./tests.sh -c 2 + +# Test with more resources +./tests.sh -c 10 +``` + +### Production Testing +```bash +# Full production test +./tests.sh -c 50 --clean-reports + +# Long-running test +./tests.sh -c 100 + +# Large scale production test +./tests.sh -c 15000 --batch-size 1200 --clean-reports + +# Bootstrap production resources +./tests.sh --bootstrap-only -c 5000 --batch-size 1000 + +# Continue production tests +./tests.sh --continue -c 5000 +``` + +### Custom Scenarios +```bash +# Test scenario 2 (if enabled) +./tests.sh -s 2 -c 20 + +# Clean environment test +./tests.sh --clean-reports -c 30 + +# Bootstrap and continue workflow +./tests.sh --bootstrap-only -c 100 +./tests.sh --continue -c 100 + +# Keep resources for analysis +./tests.sh --keep-resources -c 50 +``` + +## Support + +For issues and questions: +1. Check the logs in the report directory +2. Verify Kubernetes cluster status +3. Ensure all dependencies are installed +4. Check resource availability diff --git a/tests/performance/README_tests_refactored.md b/tests/performance/README_tests_refactored.md new file mode 100644 index 0000000000..3306d58677 --- /dev/null +++ b/tests/performance/README_tests_refactored.md @@ -0,0 +1,524 @@ +# Performance Testing Script - Refactored Version + +## Overview + +`tests_refactored.sh` is a modular, enhanced version of the original performance testing script. It provides all the functionality of the original script plus advanced features for individual step execution, debugging, and development workflows. + +## Key Features + +- **Full Backward Compatibility** - Runs complete scenarios exactly like the original script +- **Individual Step Execution** - Run specific test steps independently +- **From-Step Execution** - Continue execution from any step +- **Modular Architecture** - Clean, maintainable code structure +- **Enhanced Logging** - Step numbers and improved visibility +- **Batch Deployment Support** - Supports large-scale deployments (up to 15,000 VMs) with intelligent batching +- **Flexible Deployment Control** - Bootstrap-only mode, continue mode, and resource preservation options +- **Development Friendly** - Perfect for debugging and development + +## Usage + +### Full Scenario Execution (Original Behavior) + +```bash +# Run scenario 1 with 2 resources (default) +./tests_refactored.sh + +# Run scenario 1 with 4 resources +./tests_refactored.sh -s 1 -c 4 + +# Run scenario 2 with 10 resources +./tests_refactored.sh -s 2 -c 10 + +# Clean reports and run +./tests_refactored.sh --clean-reports + +# Large scale deployment with batch processing +./tests_refactored.sh -c 15000 --batch-size 1200 + +# Force batch deployment for smaller numbers +./tests_refactored.sh -c 500 --enable-batch + +# Bootstrap-only mode (deploy resources only) +./tests_refactored.sh --bootstrap-only -c 1000 + +# Continue tests after bootstrap +./tests_refactored.sh --continue -c 1000 + +# Keep resources after tests +./tests_refactored.sh --keep-resources -c 50 +``` + +### Individual Step Execution (New Feature) + +```bash +# List all available steps +./tests_refactored.sh --list-steps + +# Run a specific step +./tests_refactored.sh --step cleanup --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim +./tests_refactored.sh --step vm-deployment --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim +./tests_refactored.sh --step statistics-collection --scenario-dir /path/to/scenario +./tests_refactored.sh --step vm-operations --scenario-dir /path/to/scenario +./tests_refactored.sh --step migration-tests --scenario-dir /path/to/scenario +./tests_refactored.sh --step controller-restart --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim +./tests_refactored.sh --step final-operations --scenario-dir /path/to/scenario +``` + +### From-Step Execution (New Feature) + +```bash +# Continue from a specific step +./tests_refactored.sh --from-step vm-operations --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim +./tests_refactored.sh --from-step migration-tests --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim +./tests_refactored.sh --from-step controller-restart --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim +``` + +## Command Line Options + +| Option | Description | Required For | Default | +|--------|-------------|--------------|---------| +| `-s, --scenario NUMBER` | Scenario number to run (1 or 2) | Full scenarios | 1 | +| `-c, --count NUMBER` | Number of resources to create | Full scenarios | 2 | +| `--batch-size NUMBER` | Maximum resources per batch | Optional | 1200 | +| `--enable-batch` | Force batch deployment mode | Optional | false | +| `--bootstrap-only` | Only deploy resources, skip tests | Optional | false | +| `--continue` | Continue tests after bootstrap (use with --bootstrap-only) | Optional | false | +| `--keep-resources` | Keep resources after tests (don't cleanup) | Optional | false | +| `--step STEP_NAME` | Run a specific step only | Individual steps | - | +| `--from-step STEP_NAME` | Run all steps starting from STEP_NAME | From-step execution | - | +| `--list-steps` | List all available steps | - | - | +| `--scenario-dir DIR` | Directory for scenario data | Individual/From-step | - | +| `--vi-type TYPE` | Virtual image type | Some steps | persistentVolumeClaim | +| `--clean-reports` | Clean all report directories before running | Optional | false | +| `--no-pre-cleanup` | Do not cleanup resources before running | Optional | false | +| `--no-post-cleanup` | Do not cleanup resources after running | Optional | false | +| `-h, --help` | Show help message | - | - | + +## Batch Deployment + +For large-scale deployments (>1200 resources), the script automatically uses intelligent batch deployment: + +### Features +- **Automatic Batching** - Automatically detects when batch deployment is needed +- **Configurable Batch Size** - Default 1200 resources per batch (customizable) +- **Progress Tracking** - Real-time progress updates with ETA +- **Cluster Resource Checks** - Pre-deployment resource validation +- **Stability Delays** - 30-second delays between batches to prevent cluster overload + +### Configuration +```bash +# Default batch settings +MAX_BATCH_SIZE=1200 +TOTAL_TARGET_RESOURCES=15000 +BATCH_DEPLOYMENT_ENABLED=false +``` + +### Examples +```bash +# Deploy 15,000 VMs in batches of 1200 +./tests_refactored.sh -c 15000 --batch-size 1200 + +# Force batch mode for smaller deployments +./tests_refactored.sh -c 500 --enable-batch + +# Custom batch size +./tests_refactored.sh -c 5000 --batch-size 800 +``` + +## Deployment Control + +The refactored script provides flexible deployment control options for different use cases: + +### Bootstrap-Only Mode + +Use `--bootstrap-only` to deploy resources without running tests: + +```bash +# Deploy 1000 resources without running tests +./tests_refactored.sh --bootstrap-only -c 1000 + +# Deploy with batch processing +./tests_refactored.sh --bootstrap-only -c 5000 --batch-size 1000 +``` + +**Use Cases:** +- Pre-deploying resources for later testing +- Resource provisioning without test execution +- Large-scale infrastructure setup + +### Continue Mode + +Use `--continue` to run tests on existing resources: + +```bash +# Continue tests on existing resources +./tests_refactored.sh --continue -c 1000 + +# Continue with specific scenario +./tests_refactored.sh --continue -s 2 -c 1000 +``` + +**Use Cases:** +- Running tests on pre-deployed resources +- Resuming tests after bootstrap +- Testing on existing infrastructure + +### Keep Resources Mode + +Use `--keep-resources` to preserve resources after test completion: + +```bash +# Keep resources after tests +./tests_refactored.sh --keep-resources -c 50 + +# Combine with continue mode +./tests_refactored.sh --continue --keep-resources -c 100 +``` + +**Use Cases:** +- Preserving test environment for analysis +- Keeping resources for additional testing +- Debugging and troubleshooting + +### Workflow Examples + +#### Large-Scale Deployment Workflow + +```bash +# Step 1: Bootstrap large deployment +./tests_refactored.sh --bootstrap-only -c 15000 --batch-size 1200 + +# Step 2: Continue with tests +./tests_refactored.sh --continue -c 15000 + +# Step 3: Keep resources for analysis +./tests_refactored.sh --continue --keep-resources -c 15000 +``` + +#### Development Testing Workflow + +```bash +# Quick bootstrap for development +./tests_refactored.sh --bootstrap-only -c 10 + +# Run tests on development environment +./tests_refactored.sh --continue -c 10 + +# Keep resources for debugging +./tests_refactored.sh --continue --keep-resources -c 10 +``` + +#### Production Testing Workflow + +```bash +# Deploy production-scale resources +./tests_refactored.sh --bootstrap-only -c 5000 --batch-size 1000 + +# Run comprehensive tests +./tests_refactored.sh --continue -c 5000 + +# Clean up after testing +./tests_refactored.sh -c 5000 # Normal execution with cleanup +``` + +## Available Steps + +The script supports 13 individual steps that can be executed independently: + +### 1. cleanup +- **Purpose**: Clean up existing resources +- **Required**: `--scenario-dir`, `--vi-type` +- **Use Case**: Prepare clean environment + +### 2. vm-deployment +- **Purpose**: Deploy VMs with disks +- **Required**: `--scenario-dir`, `--vi-type` +- **Use Case**: Test VM deployment functionality + +### 3. statistics-collection +- **Purpose**: Gather initial statistics +- **Required**: `--scenario-dir` +- **Use Case**: Collect baseline metrics + +### 4. vm-operations +- **Purpose**: Stop and start all VMs +- **Required**: `--scenario-dir` +- **Use Case**: Test VM lifecycle operations + +### 5. vm-undeploy-deploy +- **Purpose**: Undeploy and redeploy 10% VMs +- **Required**: `--scenario-dir` +- **Use Case**: Test VM redeployment + +### 6. vm-operations-test +- **Purpose**: Test stop/start operations on 10% VMs +- **Required**: `--scenario-dir` +- **Use Case**: Test partial VM operations + +### 7. migration-tests +- **Purpose**: Run migration tests (5% and 10%) +- **Required**: `--scenario-dir` +- **Use Case**: Test migration functionality + +### 8. migration-parallel-2x +- **Purpose**: Migrate with parallelMigrationsPerCluster at 2x nodes +- **Required**: `--scenario-dir` +- **Use Case**: Test parallel migration scenarios + +### 9. migration-parallel-4x +- **Purpose**: Migrate with parallelMigrationsPerCluster at 4x nodes +- **Required**: `--scenario-dir` +- **Use Case**: Test higher parallel migration scenarios + +### 10. migration-parallel-8x +- **Purpose**: Migrate with parallelMigrationsPerCluster at 8x nodes +- **Required**: `--scenario-dir` +- **Use Case**: Test maximum parallel migration scenarios + +### 11. controller-restart +- **Purpose**: Test controller restart with VM creation +- **Required**: `--scenario-dir`, `--vi-type` +- **Use Case**: Test controller resilience + +### 12. drain-node +- **Purpose**: Run drain node workload +- **Required**: `--scenario-dir` +- **Use Case**: Test node draining operations + +### 13. final-operations +- **Purpose**: Final statistics and optional cleanup +- **Required**: `--scenario-dir` +- **Use Case**: Complete test sequence + +## Use Cases + +### Development and Debugging + +```bash +# Test only VM deployment +./tests_refactored.sh --step vm-deployment --scenario-dir ./test-scenario --vi-type persistentVolumeClaim + +# Test migration functionality +./tests_refactored.sh --step migration-tests --scenario-dir ./test-scenario --vi-type persistentVolumeClaim + +# Debug controller restart +./tests_refactored.sh --step controller-restart --scenario-dir ./test-scenario --vi-type persistentVolumeClaim +``` + +### Resuming Interrupted Tests + +```bash +# Continue from VM operations +./tests_refactored.sh --from-step vm-operations --scenario-dir ./existing-scenario --vi-type persistentVolumeClaim + +# Continue from migration tests +./tests_refactored.sh --from-step migration-tests --scenario-dir ./existing-scenario --vi-type persistentVolumeClaim +``` + +### Production Testing + +```bash +# Run full scenario (same as original script) +./tests_refactored.sh -s 1 -c 10 + +# Run with custom parameters +./tests_refactored.sh -s 1 -c 20 --clean-reports + +# Large scale production test +./tests_refactored.sh -c 15000 --batch-size 1200 --clean-reports + +# Bootstrap production resources +./tests_refactored.sh --bootstrap-only -c 5000 --batch-size 1000 + +# Continue production tests +./tests_refactored.sh --continue -c 5000 + +# Keep resources for analysis +./tests_refactored.sh --keep-resources -c 50 +``` + +## Report Structure + +### Full Scenario Reports +``` +report/ +└── scenario_1_persistentVolumeClaim_2vm_20251021_013737/ + ├── test.log # Main test log + ├── vm_operations.log # VM operations log + ├── summary.txt # Summary report + ├── statistics/ # Statistics data + └── vpa/ # VPA data +``` + +### Individual Step Reports +``` +report/ +└── step_vm-deployment_persistentVolumeClaim_2vm_20251021_013653/ + ├── test.log # Step execution log + ├── vm_operations.log # VM operations log + └── statistics/ # Step statistics +``` + +## Enhanced Logging + +The refactored script provides enhanced logging with: + +- **Step Numbers** - Each step shows its number in the sequence +- **Clear Step Boundaries** - Easy to identify step start/end +- **Consistent Format** - Uniform logging across all steps +- **Better Visibility** - Improved debugging and monitoring + +Example log output: +``` +[INFO] === Executing Step 1: cleanup === +[INFO] === Running Step 1: cleanup === +[STEP_START] Cleanup existing resources +[STEP_END] Cleanup existing resources completed in 00:00:15 +[SUCCESS] Cleanup step completed +``` + +## Modular Architecture + +The script is organized into library modules: + +- **`lib/common.sh`** - Common utilities and logging functions +- **`lib/vm_operations.sh`** - VM operation functions +- **`lib/migration.sh`** - Migration testing functions +- **`lib/statistics.sh`** - Statistics collection functions +- **`lib/controller.sh`** - Controller management functions +- **`lib/reporting.sh`** - Report generation functions +- **`lib/scenarios.sh`** - Scenario orchestration functions + +## Migration from Original Script + +The refactored script is fully backward compatible: + +1. **Drop-in Replacement** - Can replace `tests.sh` without changes +2. **Same Output** - Generates identical reports and logs +3. **Same Performance** - No performance overhead +4. **Additional Features** - Adds new capabilities without breaking existing workflows + +## Examples + +### Quick Development Test +```bash +# Test VM deployment only +./tests_refactored.sh --step vm-deployment --scenario-dir ./dev-test --vi-type persistentVolumeClaim +``` + +### Debugging Migration Issues +```bash +# Test migration setup +./tests_refactored.sh --step migration-tests --scenario-dir ./debug-scenario --vi-type persistentVolumeClaim + +# Test specific parallel migration +./tests_refactored.sh --step migration-parallel-4x --scenario-dir ./debug-scenario --vi-type persistentVolumeClaim +``` + +### Production Workflow +```bash +# Full production test +./tests_refactored.sh -s 1 -c 50 --clean-reports + +# Resume interrupted test +./tests_refactored.sh --from-step vm-operations --scenario-dir ./production-scenario --vi-type persistentVolumeClaim +``` + +### Controller Testing +```bash +# Test controller restart +./tests_refactored.sh --step controller-restart --scenario-dir ./controller-test --vi-type persistentVolumeClaim + +# Test node draining +./tests_refactored.sh --step drain-node --scenario-dir ./drain-test --vi-type persistentVolumeClaim +``` + +## Troubleshooting + +### Common Issues + +1. **Missing scenario directory** + ```bash + # Error: Scenario directory is required for individual step execution + # Solution: Provide --scenario-dir parameter + ./tests_refactored.sh --step cleanup --scenario-dir ./my-scenario --vi-type persistentVolumeClaim + ``` + +2. **Unknown step** + ```bash + # Error: Unknown step: invalid-step + # Solution: Use --list-steps to see available steps + ./tests_refactored.sh --list-steps + ``` + +3. **Missing VI type** + ```bash + # Error: VI type required for some steps + # Solution: Provide --vi-type parameter + ./tests_refactored.sh --step vm-deployment --scenario-dir ./test --vi-type persistentVolumeClaim + ``` + +### Getting Help + +```bash +# Show help +./tests_refactored.sh --help + +# List available steps +./tests_refactored.sh --list-steps + +# Show step details +./tests_refactored.sh --list-steps | grep -A 5 "cleanup" +``` + +## Best Practices + +### Development Workflow +1. **Start Small** - Test individual steps with minimal resources +2. **Use Scenario Directories** - Create dedicated directories for different test scenarios +3. **Step-by-Step Testing** - Test each step independently before running full scenarios +4. **Monitor Logs** - Check step logs for issues and performance + +### Production Workflow +1. **Full Scenarios** - Use full scenario execution for production testing +2. **Resource Planning** - Plan resources based on test requirements +3. **Monitoring** - Monitor cluster health during execution +4. **Cleanup** - Always run cleanup after tests + +### Debugging +1. **Individual Steps** - Use individual step execution for debugging +2. **From-Step Execution** - Resume from specific steps after fixes +3. **Log Analysis** - Use enhanced logging for better debugging +4. **Step Isolation** - Test problematic steps in isolation + +## Advanced Usage + +### Custom Step Sequences +```bash +# Run specific step sequence +./tests_refactored.sh --step cleanup --scenario-dir ./custom --vi-type persistentVolumeClaim +./tests_refactored.sh --step vm-deployment --scenario-dir ./custom --vi-type persistentVolumeClaim +./tests_refactored.sh --step statistics-collection --scenario-dir ./custom +``` + +### Parallel Testing +```bash +# Test different scenarios in parallel +./tests_refactored.sh --step vm-deployment --scenario-dir ./test1 --vi-type persistentVolumeClaim & +./tests_refactored.sh --step vm-deployment --scenario-dir ./test2 --vi-type persistentVolumeClaim & +``` + +### Integration with CI/CD +```bash +# Automated testing pipeline +./tests_refactored.sh --step cleanup --scenario-dir ./ci-test --vi-type persistentVolumeClaim +./tests_refactored.sh --step vm-deployment --scenario-dir ./ci-test --vi-type persistentVolumeClaim +./tests_refactored.sh --step statistics-collection --scenario-dir ./ci-test + +# Bootstrap and continue workflow +./tests_refactored.sh --bootstrap-only -c 100 --scenario-dir ./ci-test +./tests_refactored.sh --continue -c 100 --scenario-dir ./ci-test + +# Keep resources for analysis +./tests_refactored.sh --keep-resources -c 100 --scenario-dir ./ci-test +``` diff --git a/tests/performance/Taskfile.yaml b/tests/performance/Taskfile.yaml index 2e05b3d7a5..ac132fba8f 100644 --- a/tests/performance/Taskfile.yaml +++ b/tests/performance/Taskfile.yaml @@ -24,6 +24,10 @@ includes: taskfile: tools/evicter/Taskfile.yaml dir: tools/evicter optional: true + netchecker: + taskfile: tools/netchecker/Taskfile.dist.yaml + dir: tools/netchecker + optional: true vars: COUNT: '{{ .COUNT | default "1" }}' @@ -32,17 +36,31 @@ vars: RESOURCES: '{{ .RESOURCES | default "all" }}' NAME_PREFIX: '{{ .NAME_PREFIX | default "performance" }}' RESOURCES_PREFIX: '{{ .RESOURCES_PREFIX | default "performance" }}' + VIRTUALIMAGE_TYPE: '{{ .VIRTUALIMAGE_TYPE | default "containerRegistry" }}' + VIRTUALDISK_TYPE: '{{ .VIRTUALDISK_TYPE | default "virtualDisk" }}' tasks: tst: cmds: - echo "{{ .STORAGE_CLASS }}" - render: + helm:render: + desc: "Render templates with default values" + cmds: + - | + nelm template test . --values values.yaml + + helm:render:cr: desc: "Render templates with default values" cmds: - | - helm template test . --values values.yaml + nelm template test . --values values.yaml --set resources.virtualDisk.spec.template.type=containerRegistry + + helm:render:pvc: + desc: "Render templates with default values" + cmds: + - | + nelm template test . --values values.yaml --set resources.virtualDisk.spec.template.type=persistentVolumeClaim help: desc: "Help about bootstrap.sh." @@ -52,7 +70,7 @@ tasks: apply: desc: "Apply disks and virtual machines." cmds: - - ./bootstrap.sh apply --count="{{ .COUNT }}" --namespace="{{ .NAMESPACE }}" --storage-class="{{ .STORAGE_CLASS }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="{{ .RESOURCES }}" --name="{{ .NAME_PREFIX }}" + - ./bootstrap.sh apply --count="{{ .COUNT }}" --namespace="{{ .NAMESPACE }}" --storage-class="{{ .STORAGE_CLASS }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="{{ .RESOURCES }}" --name="{{ .NAME_PREFIX }}" --virtualDisk-type="{{ .VIRTUALDISK_TYPE }}" --virtualImage-type="{{ .VIRTUALIMAGE_TYPE }}" destroy: desc: "Destroy disks and virtual machines." @@ -69,12 +87,16 @@ tasks: STORAGE_CLASS: "{{ .STORAGE_CLASS }}" NAME_PREFIX: "{{ .NAME_PREFIX }}" RESOURCES_PREFIX: "{{ .RESOURCES_PREFIX }}" + VIRTUALIMAGE_TYPE: "{{ .VIRTUALIMAGE_TYPE }}" + VIRTUALDISK_TYPE: "{{ .VIRTUALDISK_TYPE }}" - task: apply:vms vars: COUNT: "{{ .COUNT }}" NAMESPACE: "{{ .NAMESPACE }}" NAME_PREFIX: "{{ .NAME_PREFIX }}" RESOURCES_PREFIX: "{{ .RESOURCES_PREFIX }}" + VIRTUALIMAGE_TYPE: "{{ .VIRTUALIMAGE_TYPE }}" + VIRTUALDISK_TYPE: "{{ .VIRTUALDISK_TYPE }}" destroy:all: desc: "Destroy disks and virtual machines in two steps (in two different releases)." @@ -93,22 +115,64 @@ tasks: apply:disks: desc: "Apply virtual machine disks." cmds: - - ./bootstrap.sh apply --count="{{ .COUNT }}" --namespace="{{ .NAMESPACE }}" --storage-class="{{ .STORAGE_CLASS }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="vds" --name="{{ .NAME_PREFIX }}-disks" + - | + ./bootstrap.sh apply --count="{{ .COUNT }}" \ + --namespace="{{ .NAMESPACE }}" \ + --storage-class="{{ .STORAGE_CLASS }}" \ + --resources-prefix="{{ .RESOURCES_PREFIX }}" \ + --resources="vds" \ + --name="{{ .NAME_PREFIX }}-disks" \ + --virtualDisk-type="{{ .VIRTUALDISK_TYPE }}" \ + --virtualImage-type="{{ .VIRTUALIMAGE_TYPE }}" apply:vms: desc: "Apply virtual machines." cmds: - - ./bootstrap.sh apply --count="{{ .COUNT }}" --namespace="{{ .NAMESPACE }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="vms" --name="{{ .NAME_PREFIX }}-vms" + - | + ./bootstrap.sh apply --count="{{ .COUNT }}" \ + --namespace="{{ .NAMESPACE }}" \ + --resources-prefix="{{ .RESOURCES_PREFIX }}" \ + --resources="vms" \ + --name="{{ .NAME_PREFIX }}-vms" \ + --virtualDisk-type="{{ .VIRTUALDISK_TYPE }}" \ + --virtualImage-type="{{ .VIRTUALIMAGE_TYPE }}" destroy:disks: desc: "Destroy disks." cmds: - - ./bootstrap.sh destroy --namespace="{{ .NAMESPACE }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="vds" --name="{{ .NAME_PREFIX }}-disks" + - ./bootstrap.sh destroy --namespace="{{ .NAMESPACE }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="vds" --name="{{ .NAME_PREFIX }}-disks" --virtualDisk-type="{{ .VIRTUALDISK_TYPE }}" --virtualImage-type="{{ .VIRTUALIMAGE_TYPE }}" destroy:vms: desc: "Destroy virtual machines." cmds: - - ./bootstrap.sh destroy --namespace="{{ .NAMESPACE }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="vms" --name="{{ .NAME_PREFIX }}-vms" + - ./bootstrap.sh destroy --namespace="{{ .NAMESPACE }}" --resources-prefix="{{ .RESOURCES_PREFIX }}" --resources="vms" --name="{{ .NAME_PREFIX }}-vms" --virtualDisk-type="{{ .VIRTUALDISK_TYPE }}" --virtualImage-type="{{ .VIRTUALIMAGE_TYPE }}" + + gather:logs: + desc: "Gather logs from d8 namespaces." + cmds: + - | + ./gather-logs.sh d8-admission-policy-engine \ + d8-admission-policy-engine \ + d8-cert-manager d8-chrony \ + d8-cloud-instance-manager \ + d8-cni-cilium \ + d8-console \ + d8-csi-ceph \ + d8-dashboard \ + d8-descheduler \ + d8-ingress-nginx \ + d8-monitoring \ + d8-multitenancy-manager \ + d8-observability \ + d8-operator-ceph \ + d8-operator-prometheus \ + d8-runtime-audit-engine \ + d8-service-accounts \ + d8-snapshot-controller \ + d8-system \ + d8-user-authn \ + d8-user-authz \ + d8-virtualization check_or_install_software: desc: "Check and install Helm3, Ansible, and K9s" @@ -127,6 +191,18 @@ tasks: else echo "Helm3 is already installed" fi + install:nelm: + desc: "Install Helm if it's not installed" + cmds: + - | + if ! command -v helm &> /dev/null; then + echo "Installing Nelm..." + wget https://tuf.nelm.sh/targets/releases/1.16.0/linux-amd64/bin/nelm + mv nelm /usr/bin/nelm + + else + echo "Helm3 is already installed" + fi install:ansible: desc: "Install Ansible if it's not installed" diff --git a/tests/performance/bootstrap.sh b/tests/performance/bootstrap.sh index c8376d55f4..7e569d0544 100755 --- a/tests/performance/bootstrap.sh +++ b/tests/performance/bootstrap.sh @@ -71,7 +71,16 @@ function validate_apply_args() { function apply() { echo "Apply resources: ${RESOURCES}" - args=( upgrade --install "${RELEASE_NAME}" . -n "${NAMESPACE}" --create-namespace --set "count=${COUNT}" --set "resourcesPrefix=${RESOURCES_PREFIX}" --set "resources.default=${RESOURCES}" ) + args=( upgrade \ + --install "${RELEASE_NAME}" . \ + -n "${NAMESPACE}" \ + --create-namespace \ + --set "count=${COUNT}" \ + --set "resourcesPrefix=${RESOURCES_PREFIX}" \ + --set "resources.default=${RESOURCES}" \ + --set "resources.virtualDisk.spec.template.type=${VIRTUALDISK_TYPE}" \ + --set "resources.virtualImage.spec.template.type=${VIRTUALIMAGE_TYPE}" ) + if [ -n "${STORAGE_CLASS}" ]; then args+=( --set "resources.storageClassName=${STORAGE_CLASS}" ) fi @@ -145,6 +154,14 @@ while [[ $# -gt 0 ]]; do RESOURCES="$2" shift 2 ;; + --virtualDisk-type=*) + VIRTUALDISK_TYPE="${1#*=}" + shift + ;; + --virtualImage-type=*) + VIRTUALIMAGE_TYPE="${1#*=}" + shift + ;; --resources-prefix=*|-p=*) RESOURCES_PREFIX="${1#*=}" shift diff --git a/tests/performance/config/default.conf b/tests/performance/config/default.conf new file mode 100644 index 0000000000..3daae0e34d --- /dev/null +++ b/tests/performance/config/default.conf @@ -0,0 +1,33 @@ +# Default configuration for performance testing +# This file contains default values that can be overridden by command line arguments + +# Test configuration +NAMESPACE="perf" +STORAGE_CLASS="" +VI_TYPE="persistentVolumeClaim" +COUNT=2 +SLEEP_TIME=5 +REPORT_DIR="report" +MIGRATION_DURATION="5m" +MIGRATION_PERCENTAGE=10 +ACTIVE_CLUSTER_PERCENTAGE=90 +CONTROLLER_NAMESPACE="d8-virtualization" + +# Scenario configuration +SCENARIO_NUMBER=1 +MAIN_COUNT_RESOURCES=2 +PERCENT_VMS=10 +MIGRATION_PERCENTAGE_10=10 +MIGRATION_PERCENTAGE_5=5 + +# Logging configuration +LOG_LEVEL="INFO" +ENABLE_VM_OPERATIONS_LOG=true +ENABLE_DURATION_LOGGING=true + +# Performance thresholds (in seconds) +MAX_DEPLOYMENT_TIME=300 +MAX_STOP_TIME=60 +MAX_START_TIME=60 +MAX_MIGRATION_TIME=120 +MAX_CONTROLLER_RESTART_TIME=180 \ No newline at end of file diff --git a/tests/performance/gather-logs.sh b/tests/performance/gather-logs.sh new file mode 100644 index 0000000000..3df2255425 --- /dev/null +++ b/tests/performance/gather-logs.sh @@ -0,0 +1,146 @@ +#!/bin/bash + +set -euo pipefail + +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')]${NC} $1" +} + +error() { + echo -e "${RED}[ERROR]${NC} $1" >&2 +} + +warning() { + echo -e "${YELLOW}[WARNING]${NC} $1" +} + +check_kubectl() { + if ! command -v kubectl &> /dev/null; then + error "kubectl not found. Install kubectl to use this script." + exit 1 + fi +} + +check_kubectl_connection() { + if ! kubectl cluster-info &> /dev/null; then + error "Cannot connect to Kubernetes cluster. Check kubectl configuration." + exit 1 + fi +} + +create_log_directory() { + local timestamp=$(date +'%Y%m%d_%H%M%S') + LOG_DIR="k8s_logs_${timestamp}" + mkdir -p "$LOG_DIR" + log "Created log directory: $LOG_DIR" +} + +collect_pod_logs() { + local namespace="$1" + local pod_name="$2" + local log_file="$3" + + log "Collecting logs for pod: $pod_name in namespace: $namespace" + + if kubectl logs "$pod_name" -n "$namespace" > "$log_file" 2>/dev/null; then + log "Current pod logs saved to: $log_file" + else + warning "Failed to get logs for pod: $pod_name" + echo "Logs unavailable for pod: $pod_name" > "$log_file" + fi + + local previous_log_file="${log_file%.log}_previous.log" + if kubectl logs "$pod_name" -n "$namespace" --previous > "$previous_log_file" 2>/dev/null; then + log "Previous pod logs saved to: $previous_log_file" + else + warning "Previous logs unavailable for pod: $pod_name" + echo "Previous logs unavailable for pod: $pod_name" > "$previous_log_file" + fi +} + +collect_namespace_logs() { + local namespace="$1" + + log "Processing namespace: $namespace" + + if ! kubectl get namespace "$namespace" &> /dev/null; then + warning "Namespace '$namespace' not found, skipping..." + return + fi + + local ns_dir="$LOG_DIR/$namespace" + mkdir -p "$ns_dir" + + local pods=$(kubectl get pods -n "$namespace" --no-headers -o custom-columns=":metadata.name" 2>/dev/null || true) + + if [ -z "$pods" ]; then + warning "No pods found in namespace '$namespace'" + return + fi + + while IFS= read -r pod_name; do + if [ -n "$pod_name" ]; then + local log_file="$ns_dir/${pod_name}.log" + collect_pod_logs "$namespace" "$pod_name" "$log_file" + fi + done <<< "$pods" + + log "Completed processing namespace: $namespace" +} + +create_archive() { + log "Creating archive..." + + local archive_name="${LOG_DIR}.tar.gz" + + if tar -czf "$archive_name" "$LOG_DIR"; then + log "Archive created: $archive_name" + + local archive_size=$(du -h "$archive_name" | cut -f1) + log "Archive size: $archive_size" + + rm -rf "$LOG_DIR" + log "Temporary directory removed" + + echo "" + log "Done! Log archive: $archive_name" + else + error "Error creating archive" + exit 1 + fi +} + +main() { + echo "==========================================" + echo " Kubernetes Log Collector" + echo "==========================================" + echo "" + + if [ $# -eq 0 ]; then + error "At least one namespace must be specified" + echo "Usage: $0 [namespace2] [namespace3] ..." + echo "Example: $0 default kube-system monitoring" + exit 1 + fi + + check_kubectl + check_kubectl_connection + + create_log_directory + + for namespace in "$@"; do + collect_namespace_logs "$namespace" + done + + create_archive + + echo "" + log "Log collection completed successfully!" +} + +main "$@" \ No newline at end of file diff --git a/tests/performance/lib/common.sh b/tests/performance/lib/common.sh new file mode 100755 index 0000000000..03864e9a63 --- /dev/null +++ b/tests/performance/lib/common.sh @@ -0,0 +1,253 @@ +#!/usr/bin/env bash + +# Common utilities and configuration for performance testing +# This module provides shared functionality used across all other modules + +# Detect operating system +detect_os() { + if [[ "$OSTYPE" == "darwin"* ]] || [[ "$(uname)" == "Darwin" ]]; then + echo "macOS" + elif [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$(uname)" == "Linux" ]]; then + echo "Linux" + else + echo "Unknown" + fi +} + +# Set OS-specific variables +OS_TYPE=$(detect_os) + +# Global configuration +NAMESPACE="perf" +STORAGE_CLASS="" +VI_TYPE="persistentVolumeClaim" # containerRegistry, persistentVolumeClaim +COUNT=2 +SLEEP_TIME=5 +REPORT_DIR="report" +MIGRATION_DURATION="5m" +MIGRATION_PERCENTAGE=10 +ACTIVE_CLUSTER_PERCENTAGE=90 +CONTROLLER_NAMESPACE="d8-virtualization" +# Store original controller replicas count +ORIGINAL_CONTROLLER_REPLICAS="" +# Centralized logging +LOG_FILE="" +CURRENT_SCENARIO="" +VM_OPERATIONS_LOG="" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +# Centralized logging functions +init_logging() { + local scenario_name=$1 + local vi_type=$2 + local count=${3:-$MAIN_COUNT_RESOURCES} + local timestamp=$(date +"%Y%m%d_%H%M%S") + local scenario_dir="$REPORT_DIR/${scenario_name}_${vi_type}_${count}vm_${timestamp}" + LOG_FILE="$scenario_dir/test.log" + VM_OPERATIONS_LOG="$scenario_dir/vm_operations.log" + CURRENT_SCENARIO="${scenario_name}_${vi_type}_${count}vm_${timestamp}" + mkdir -p "$(dirname "$LOG_FILE")" + echo "=== Test started at $(get_current_date) ===" > "$LOG_FILE" + echo "=== VM Operations Report started at $(get_current_date) ===" > "$VM_OPERATIONS_LOG" +} + +log_info() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${BLUE}[INFO]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [INFO] $message" >> "$LOG_FILE" + fi +} + +log_success() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${GREEN}[SUCCESS]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [SUCCESS] $message" >> "$LOG_FILE" + fi +} + +log_warning() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${YELLOW}[WARNING]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [WARNING] $message" >> "$LOG_FILE" + fi +} + +log_error() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${RED}[ERROR]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [ERROR] $message" >> "$LOG_FILE" + fi +} + +# VM Operations logging functions +log_vm_operation() { + local message="$1" + local timestamp=$(get_current_date) + if [ -n "$VM_OPERATIONS_LOG" ]; then + echo "[$timestamp] [VM_OP] $message" >> "$VM_OPERATIONS_LOG" + fi +} + +log_vmop_operation() { + local message="$1" + local timestamp=$(get_current_date) + if [ -n "$VM_OPERATIONS_LOG" ]; then + echo "[$timestamp] [VMOP] $message" >> "$VM_OPERATIONS_LOG" + fi +} + +# Function to log duration details to file +log_duration() { + local step_name="$1" + local duration="$2" + local timestamp=$(get_current_date) + local formatted_duration=$(format_duration "$duration") + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [DURATION] $step_name: $formatted_duration" >> "$LOG_FILE" + fi +} + +# Function to log step start with timestamp +log_step_start() { + local step_name="$1" + local timestamp=$(get_current_date) + echo -e "${CYAN}[STEP_START] $step_name${NC}" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [STEP_START] $step_name" >> "$LOG_FILE" + fi +} + +# Function to log step end with duration +log_step_end() { + local step_name="$1" + local duration="$2" + local timestamp=$(get_current_date) + local formatted_duration=$(format_duration "$duration") + echo -e "${CYAN}[STEP_END] $step_name${NC}" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [STEP_END] $step_name completed in $formatted_duration" >> "$LOG_FILE" + fi +} + +# Function to calculate percentage safely +calculate_percentage() { + local duration="$1" + local total="$2" + + # Check if values are valid numbers and not zero + if [[ -z "$duration" || -z "$total" || "$duration" -eq 0 || "$total" -eq 0 ]]; then + echo "0.0" + return + fi + + # Use bc with error handling + local result=$(echo "scale=1; $duration * 100 / $total" | bc 2>/dev/null || echo "0.0") + echo "$result" +} + +format_duration() { + local total_seconds=$1 + local hours=$((total_seconds / 3600)) + local minutes=$(( (total_seconds % 3600) / 60 )) + local seconds=$((total_seconds % 60)) + printf "%02d:%02d:%02d\n" "$hours" "$minutes" "$seconds" +} + +formatted_date() { + local timestamp="$1" + + # Check if timestamp is valid (not empty and is a number) + if [ -z "$timestamp" ] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then + # Use current time if timestamp is invalid + date +"%H:%M:%S %d-%m-%Y" + return + fi + + # Use OS-specific date command + case "$OS_TYPE" in + "macOS") + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + "Linux") + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + *) + # Fallback - try both methods + if date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # macOS style worked + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" + elif date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # Linux style worked + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" + else + # Last resort - use current time + date +"%H:%M:%S %d-%m-%Y" + fi + ;; + esac +} + +get_current_date() { + date +"%H:%M:%S %d-%m-%Y" +} + +get_timestamp() { + date +%s +} + +exit_trap() { + echo "" + echo "Cleanup" + echo "Exiting..." + exit 0 +} + +trap exit_trap SIGINT SIGTERM + +get_default_storage_class() { + if [ -n "${STORAGE_CLASS:-}" ]; then + echo "$STORAGE_CLASS" + else + kubectl get storageclass -o json \ + | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name' + fi +} + +create_report_dir() { + local scenario_name=$1 + local vi_type=$2 + local count=${3:-$MAIN_COUNT_RESOURCES} + local timestamp=$(date +"%Y%m%d_%H%M%S") + local base_dir="$REPORT_DIR/${scenario_name}_${vi_type}_${count}vm_${timestamp}" + mkdir -p "$base_dir/statistics" + mkdir -p "$base_dir/vpa" + echo "$base_dir" +} + +remove_report_dir() { + local dir=${1:-$REPORT_DIR} + rm -rf $dir +} + +# Function to prepare for tests +prepare_for_tests() { + log_info "Preparing for tests" + log_info "Operating System: $OS_TYPE" +} + + diff --git a/tests/performance/lib/controller.sh b/tests/performance/lib/controller.sh new file mode 100755 index 0000000000..8f2f0e4703 --- /dev/null +++ b/tests/performance/lib/controller.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash + +# Controller management library for performance testing +# This module handles controller lifecycle management + +# Source common utilities +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" + +stop_virtualization_controller() { + local start_time=$(get_timestamp) + + log_info "Stopping virtualization controller" + # Get original replicas count before stopping + ORIGINAL_CONTROLLER_REPLICAS=$(kubectl -n d8-virtualization get deployment virtualization-controller -o jsonpath="{.spec.replicas}" 2>/dev/null || echo "1") + log_info "Original controller replicas: $ORIGINAL_CONTROLLER_REPLICAS" + log_info "Start time: $(formatted_date $start_time)" + + local scale_start=$(get_timestamp) + kubectl -n d8-virtualization scale --replicas 0 deployment virtualization-controller + local scale_end=$(get_timestamp) + local scale_duration=$((scale_end - scale_start)) + log_info "Scale down command completed in $(format_duration $scale_duration)" + + local wait_start=$(get_timestamp) + while true; do + local count_pods=$(kubectl -n d8-virtualization get pods | grep virtualization-controller | wc -l) + + if [ $count_pods -eq 0 ]; then + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Wait for controller stop completed in $(format_duration $wait_duration)" + log_info "Controller stopped - End time: $(formatted_date $end_time)" + log_info "Scale command: $(format_duration $scale_duration), Wait time: $(format_duration $wait_duration)" + log_success "Controller stopped in $formatted_duration" + break + fi + + log_info "Waiting for virtualization-controller to be stopped... Pods: $count_pods" + sleep 2 + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +start_virtualization_controller() { + local start_time=$(get_timestamp) + + log_info "Starting Virtualization-controller" + log_info "Restoring controller to original replicas: ${ORIGINAL_CONTROLLER_REPLICAS:-1}" + log_info "Start time: $(formatted_date $start_time)" + + local scale_start=$(get_timestamp) + kubectl -n d8-virtualization scale --replicas ${ORIGINAL_CONTROLLER_REPLICAS:-1} deployment virtualization-controller + local scale_end=$(get_timestamp) + local scale_duration=$((scale_end - scale_start)) + log_info "Scale up command completed in $(format_duration $scale_duration)" + + log_info "Wait for deployment for Virtualization-controller to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Virtualization-controller started - End time: $(formatted_date $end_time)" + log_success "Virtualization-controller started in $formatted_duration" +} + +# FIXED: Create VM while controller is stopped using task apply:all +create_vm_while_controller_stopped() { + local vi_type=$1 + local start_time=$(get_timestamp) + + log_info "Creating 1 VM and disk while controller is stopped using task apply:all" + log_info "Start time: $(formatted_date $start_time)" + log_vm_operation "Creating 1 VM and disk while controller is stopped using task apply:all" + + # Deploy MAIN_COUNT_RESOURCES + 1 VMs using task apply:all + log_info "Deploying 1 new VM" + + local task_start=$(get_timestamp) + task apply:all \ + COUNT=$((MAIN_COUNT_RESOURCES + 1)) \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type || true + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task apply:all completed in $(format_duration $task_duration)" +} + +wait_for_new_vm_after_controller_start() { + # Wait for the last VM and VD to be ready + log_info "Waiting for the last VM and VD to be ready" + local wait_start=$(get_timestamp) + + # Get the name of the last VM and VD + local last_vm=$(kubectl -n $NAMESPACE get vm --no-headers | tail -n 1 | awk '{print $1}') + local last_vd=$(kubectl -n $NAMESPACE get vd --no-headers | tail -n 1 | awk '{print $1}') + + log_info "Waiting for last VM: $last_vm and last VD: $last_vd" + + # Wait for the last VM to be Running + while true; do + local vm_status=$(kubectl -n $NAMESPACE get vm $last_vm -o jsonpath="{.status.phase}" 2>/dev/null || echo "NotFound") + local vd_status=$(kubectl -n $NAMESPACE get vd $last_vd -o jsonpath="{.status.phase}" 2>/dev/null || echo "NotFound") + + if [ "$vm_status" == "Running" ] && [ "$vd_status" == "Ready" ]; then + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + log_info "Last VM and VD are ready in $(format_duration $wait_duration)" + break + fi + + log_info "Waiting for last VM ($last_vm): $vm_status, last VD ($last_vd): $vd_status" + sleep 5 + done +} \ No newline at end of file diff --git a/tests/performance/lib/migration.sh b/tests/performance/lib/migration.sh new file mode 100755 index 0000000000..cb8a6d916c --- /dev/null +++ b/tests/performance/lib/migration.sh @@ -0,0 +1,374 @@ +#!/usr/bin/env bash + +# Migration testing library for performance testing +# This module handles migration testing functionality + +# Source common utilities +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" + +start_migration_old() { + # supoprt duration format: 0m - infinite, 30s - 30 seconds, 1h - 1 hour, 2h30m - 2 hours and 30 minutes + local duration=${1:-"0m"} + local target=${2:-"5"} + local session="test-perf" + + echo "Create tmux session: $session" + tmux -2 new-session -d -s "${session}" + + tmux new-window -t "$session:1" -n "$NAMESPACE" + tmux split-window -h -t 0 # Pane 0 (left), Pane 1 (right) + tmux split-window -v -t 1 # Pane 1 (top), Pane 2 (bottom) + + tmux select-pane -t 0 + tmux send-keys "k9s -n $NAMESPACE" C-m + tmux resize-pane -t 1 -x 50% + + echo "Start migration in $session, pane 1" + tmux select-pane -t 1 + tmux send-keys "NS=$NAMESPACE TARGET=${target} DURATION=${duration} task evicter:run:migration" C-m + tmux resize-pane -t 1 -x 50% + + tmux select-pane -t 2 + tmux resize-pane -t 2 -x 50% + echo "For watching migration in $session, connect to session with command:" + echo "tmux -2 attach -t ${session}" + + echo "" + +} + +start_migration() { + local duration=${1:-"0m"} + local target=${2:-"5"} + local session="test-perf" + local ns="${NAMESPACE:-perf}" + + echo "Create tmux session: $session" + tmux new-session -d -s "${session}" -n "${ns}" # windows named "ns" + + # split window + tmux split-window -h -t "${session}:0" # Pane 0 (left), Pane 1 (right) + tmux split-window -v -t "${session}:0.1" # Split right pane; .1 + + # 3) Посылаем команды в нужные панели явно + tmux select-pane -t "${session}:0.0" + tmux send-keys -t "${session}:0.0" "k9s -n ${ns}" C-m + tmux resize-pane -t "${session}:0.1" -x 50% + + echo "Start migration in $session, pane 1" + tmux select-pane -t "${session}:0.1" + tmux send-keys -t "${session}:0.1" "NS=${ns} TARGET=${target} DURATION=${duration} task evicter:run:migration" C-m + tmux resize-pane -t "${session}:0.1" -x 50% + + tmux select-pane -t "${session}:0.2" + tmux resize-pane -t "${session}:0.2" -x 50% + + echo "For watching migration in $session, attach with:" + echo "tmux -2 attach -t ${session}" + + # Optional + # if [ -n "${TMUX:-}" ]; then + # tmux switch-client -t "${session}" # switch client to created session inside tmux + # else + # tmux -2 attach -t "${session}" # from bash tmux — just attach to created session + # fi +} + +stop_migration() { + local SESSION="test-perf" + tmux send-keys -t "${SESSION}:1.1" C-c || true + sleep 1 + tmux -2 kill-session -t "${SESSION}" || true +} + +wait_migration() { + local timeout=${1:-"5m"} + local wait_migration=$( echo "$timeout" | sed 's/m//' ) + local start_time=$(get_timestamp) + + log_info "Waiting for migration to complete" + log_info "Duration: $timeout minutes" + + while true; do + current_time=$(get_timestamp) + duration=$((current_time - start_time)) + if [ $duration -ge $(( $wait_migration*60 )) ]; then + log_info "Migration timeout reached, stopping migrator" + stop_migration + log_success "Migration completed" + break + fi + log_info "Waiting for migration to complete" + log_info "Duration: $duration seconds from $(( $WAIT_MIGRATION*60 ))" + sleep 1 + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +# NEW: Wait for migration completion before proceeding +wait_migration_completion() { + local start_time=$(get_timestamp) + + log_info "Waiting for migration to complete" + log_vmop_operation "Waiting for migration to complete" + + # Wait for all vmops to complete + wait_vmops_complete + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "Migration completion wait finished in $(format_duration $duration)" + log_vmop_operation "Migration completion wait finished in $(format_duration $duration)" +} + +remove_vmops() { + local namespace=${1:-$NAMESPACE} + + while true; do + log_info "Check if all vmops are removed" + log_vmop_operation "Checking vmops for removal" + local vmop_total=$(( $(kubectl -n $namespace get vmop | wc -l )-1 )) + local vmop_list=$(kubectl -n $namespace get vmop | grep "Completed" | awk '{print $1}') + local vmop_failed_list=$(kubectl -n $namespace get vmop | grep "Failed" | awk '{print $1}') + log_warning "VMOP failed list: $vmop_failed_list" + log_vmop_operation "VMOP failed list: $vmop_failed_list" + + vmop_list+=" $vmop_failed_list" + + log_info "VMOP total: $( if [ $vmop_total -le 0 ]; then echo "0"; else echo $vmop_total; fi )" + log_vmop_operation "VMOP total: $( if [ $vmop_total -le 0 ]; then echo "0"; else echo $vmop_total; fi )" + if [ $vmop_total -le 0 ]; then + log_success "All vmops are removed" + log_vmop_operation "All vmops are removed" + break + fi + + for vmop in $vmop_list; do + kubectl -n $namespace delete vmop $vmop --wait=false || true + log_vmop_operation "Deleted vmop: $vmop" + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + log_info "Wait 2 sec" + sleep 2 + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_vmops() { + local sleep_time=${1:-2} + + while true; do + local VMOPInProgress=$(kubectl -n $NAMESPACE get vmop | grep "InProgress" | wc -l) + + if [ $VMOPInProgress -eq 0 ]; then + echo "All vmops are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vmops to be ready..." + echo "VMOP InProgress: $VMOPInProgress" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +# FIXED: Wait for vmops to complete (including Failed status) and check VMs are Running +wait_vmops_complete() { + local sleep_time=${1:-2} + local start_time=$(get_timestamp) + + while true; do + local vmop_total=$(( $(kubectl -n $NAMESPACE get vmop | wc -l)-1 )) + local VMOPCompleted=$(kubectl -n $NAMESPACE get vmop | grep "Completed" | wc -l) + local VMOPFailed=$(kubectl -n $NAMESPACE get vmop | grep "Failed" | wc -l) + local VMOPInProgress=$(kubectl -n $NAMESPACE get vmop | grep "InProgress" | wc -l) + + if [ $vmop_total -eq -1 ]; then + vmop_total=0 + fi + + # Consider vmops complete if they are either Completed or Failed (not InProgress) + local VMOPFinished=$((VMOPCompleted + VMOPFailed)) + + if [ $VMOPFinished -eq $vmop_total ] && [ $VMOPInProgress -eq 0 ]; then + # Additional check: ensure all VMs are Running + local VMRunning=$(kubectl -n $NAMESPACE get vm | grep "Running" | wc -l) + local VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + + if [ $VMRunning -eq $VMTotal ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + + log_info "VMOPs completed - Duration: $duration seconds" + log_info "Execution time: $formatted_duration" + log_info "Completed: $VMOPCompleted, Failed: $VMOPFailed, Total: $vmop_total" + log_info "All VMs are Running: $VMRunning/$VMTotal" + log_vmop_operation "VMOPs completed - Duration: $duration seconds" + log_vmop_operation "Completed: $VMOPCompleted, Failed: $VMOPFailed, Total: $vmop_total" + log_vmop_operation "All VMs are Running: $VMRunning/$VMTotal" + break + else + log_info "VMOPs finished but VMs not all Running yet: $VMRunning/$VMTotal" + log_vmop_operation "VMOPs finished but VMs not all Running yet: $VMRunning/$VMTotal" + fi + fi + + log_info "Waiting for vmops to be ready... Completed: $VMOPCompleted, Failed: $VMOPFailed, InProgress: $VMOPInProgress, Total: $vmop_total" + log_vmop_operation "Waiting for vmops to be ready... Completed: $VMOPCompleted, Failed: $VMOPFailed, InProgress: $VMOPInProgress, Total: $vmop_total" + sleep $sleep_time + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +migration_percent_vms() { + local target_count=${1:-$PERCENT_RESOURCES} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Starting migration of $target_count VMs" + log_info "Start time: $(formatted_date $start_time)" + log_vm_operation "Starting migration of $target_count VMs" + + local vms=( $(kubectl -n $NAMESPACE get vm --no-headers | awk '$2 == "Running" {print $1}' | tail -n $target_count) ) + + for vm in "${vms[@]}"; do + log_info "Migrating VM [$vm] via evict" + log_vm_operation "Migrating VM [$vm] via evict" + d8 v -n $NAMESPACE evict $vm --wait=false + done + + # Additional wait using kubectl wait + # log_info "Additional wait for deployment to be fully available..." + # kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + wait_vmops_complete + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Migration completed - End time: $(formatted_date $end_time)" + log_success "Migrated $target_count VMs in $formatted_duration" + log_vm_operation "Migration completed - Migrated $target_count VMs in $formatted_duration" +} + +drain_node() { + local start_time=$(get_timestamp) + + log_info "Start draining node" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + + local KUBECONFIG_MERGE=$(kubectl config view --merge --flatten | base64 -w 0) + KUBECONFIG_BASE64=$KUBECONFIG_MERGE task shatal:run + + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + local end_time=$(get_timestamp) + local formatted_duration=$(format_duration "$task_duration") + + log_info "Duration node completed - End time: $(formatted_date $end_time)" + log_info "Task Duration node execution: $(format_duration $task_duration)" + log_success "Duration node completed in $formatted_duration" + echo "$task_duration" +} + +scale_deckhouse() { + local replicas=${1} + ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS=$(kubectl -n d8-system get deployment deckhouse -o jsonpath="{.spec.replicas}" 2>/dev/null || echo "1") + log_info "Deckhouse controller replicas: $ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS" + log_info "Deckhouse controller scaled to $replicas" + kubectl -n d8-system scale --replicas $replicas deployment deckhouse + log_success "Deckhouse controller scaled to $replicas" +} + +migration_config() { + # default values + # { + # "bandwidthPerMigration": "640Mi", + # "completionTimeoutPerGiB": 800, + # "parallelMigrationsPerCluster": 8, # count all nodes + # "parallelOutboundMigrationsPerNode": 1, + # "progressTimeout": 150 + # } + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + + local bandwidthPerMigration=${1:-"640Mi"} + local completionTimeoutPerGiB=${2:-"800"} + local parallelMigrationsPerCluster=${3:-$amountNodes} + local parallelOutboundMigrationsPerNode=${4:-"1"} + local progressTimeout=${5:-"150"} + + echo "====== configure patch ======" + echo "bandwidthPerMigration: $bandwidthPerMigration" + echo "completionTimeoutPerGiB: $completionTimeoutPerGiB" + echo "parallelMigrationsPerCluster: $parallelMigrationsPerCluster" + echo "parallelOutboundMigrationsPerNode: $parallelOutboundMigrationsPerNode" + echo "progressTimeout: $progressTimeout" + + patch_json=$( + jq -n \ + --arg bpm "$bandwidthPerMigration" \ + --argjson ct $completionTimeoutPerGiB \ + --argjson pmc $parallelMigrationsPerCluster \ + --argjson pmon $parallelOutboundMigrationsPerNode \ + --argjson pt $progressTimeout \ + '{ + spec: { + configuration: { + migrations: { + bandwidthPerMigration: $bpm, + completionTimeoutPerGiB: $ct, + parallelMigrationsPerCluster: $pmc, + parallelOutboundMigrationsPerNode: $pmon, + progressTimeout: $pt + } + } + } + }' + ) + log_info "Checking restricted access policy" + + if kubectl get validatingadmissionpolicies.admissionregistration.k8s.io virtualization-restricted-access-policy >/dev/null 2>&1; then + log_info "Deleting restricted access policy" + kubectl delete validatingadmissionpolicies.admissionregistration.k8s.io virtualization-restricted-access-policy + else + log_info "No restricted access policy" + fi + + sleep 1 + + log_info "Patching kubevirt config" + + kubectl -n d8-virtualization patch \ + --as=system:sudouser \ + internalvirtualizationkubevirts.internal.virtualization.deckhouse.io config \ + --type=merge -p "$patch_json" + + log_success "Migration settings applyed" +} + diff --git a/tests/performance/lib/reporting.sh b/tests/performance/lib/reporting.sh new file mode 100755 index 0000000000..5b5655d6b6 --- /dev/null +++ b/tests/performance/lib/reporting.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +# Reporting library for performance testing +# This module handles report generation and summary + +# Source common utilities +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" + +# Function to create summary report +create_summary_report() { + local scenario_name="$1" + local vi_type="$2" + local scenario_dir="$3" + local start_time="$4" + local end_time="$5" + local total_duration="$6" + local cleanup_duration="${7:-0}" + local deploy_duration="${8:-0}" + local stats_duration="${9:-0}" + local stop_duration="${10:-0}" + local start_vm_duration="${11:-0}" + local undeploy_duration="${12:-0}" + local deploy_remaining_duration="${13:-0}" + local vm_stats_duration="${14:-0}" + local vm_ops_duration="${15:-0}" + local vm_ops_stop_duration="${16:-0}" + local vm_ops_start_duration="${17:-0}" + local migration_duration="${18:-0}" + local cleanup_ops_duration="${19:-0}" + local migration_percent_duration="${20:-0}" + local controller_duration="${21:-0}" + local final_stats_duration="${22:-0}" + local drain_stats_duration="${23:-0}" + local final_cleanup_duration="${24:-0}" + local migration_parallel_2x_duration="${25:-0}" + local migration_parallel_4x_duration="${26:-0}" + local migration_parallel_8x_duration="${27:-0}" + + local summary_file="$scenario_dir/summary.txt" + + # Calculate percentages safely + local cleanup_percent=$(calculate_percentage "$cleanup_duration" "$total_duration") + local deploy_percent=$(calculate_percentage "$deploy_duration" "$total_duration") + local stats_percent=$(calculate_percentage "$stats_duration" "$total_duration") + local stop_percent=$(calculate_percentage "$stop_duration" "$total_duration") + local start_vm_percent=$(calculate_percentage "$start_vm_duration" "$total_duration") + local undeploy_percent=$(calculate_percentage "$undeploy_duration" "$total_duration") + local deploy_remaining_percent=$(calculate_percentage "$deploy_remaining_duration" "$total_duration") + local vm_stats_percent=$(calculate_percentage "$vm_stats_duration" "$total_duration") + local vm_ops_percent=$(calculate_percentage "$vm_ops_duration" "$total_duration") + local vm_ops_stop_percent=$(calculate_percentage "$vm_ops_stop_duration" "$total_duration") + local vm_ops_start_percent=$(calculate_percentage "$vm_ops_start_duration" "$total_duration") + local migration_percent=$(calculate_percentage "$migration_duration" "$total_duration") + local cleanup_ops_percent=$(calculate_percentage "$cleanup_ops_duration" "$total_duration") + local migration_percent_percent=$(calculate_percentage "$migration_percent_duration" "$total_duration") + local controller_percent=$(calculate_percentage "$controller_duration" "$total_duration") + local final_stats_percent=$(calculate_percentage "$final_stats_duration" "$total_duration") + local drain_stats_percent=$(calculate_percentage "$drain_stats_duration" "$total_duration") + local final_cleanup_percent=$(calculate_percentage "$final_cleanup_duration" "$total_duration") + local migration_parallel_2x_percent=$(calculate_percentage "$migration_parallel_2x_duration" "$total_duration") + local migration_parallel_4x_percent=$(calculate_percentage "$migration_parallel_4x_duration" "$total_duration") + local migration_parallel_8x_percent=$(calculate_percentage "$migration_parallel_8x_duration" "$total_duration") + + cat > "$summary_file" << EOF +================================================================================ + PERFORMANCE TEST SUMMARY REPORT +================================================================================ + +Scenario: $scenario_name +Virtual Image Type: $vi_type +Test Date: $(formatted_date $start_time) +Duration: $(format_duration $total_duration) + +================================================================================ + EXECUTION TIMELINE +================================================================================ + +Start Time: $(formatted_date $start_time) +End Time: $(formatted_date $end_time) +Total Duration: $(format_duration $total_duration) + +================================================================================ + STEP DURATION BREAKDOWN +================================================================================ + +$(printf "%-55s %10s %10s\n" "Phase" "Duration" "Percentage") +$(printf "%-55s %10s %10s\n" "-------------------------------------------------------" "----------" "----------") +$(printf "%-55s %10s %10s\n" "Cleanup" "$(format_duration $cleanup_duration)" "$(printf "%5.1f" $cleanup_percent)%") +$(printf "%-55s %10s %10s\n" "Deploy VMs [$MAIN_COUNT_RESOURCES]" "$(format_duration $deploy_duration)" "$(printf "%5.1f" $deploy_percent)%") +$(printf "%-55s %10s %10s\n" "Statistics Collection" "$(format_duration $stats_duration)" "$(printf "%5.1f" $stats_percent)%") +$(printf "%-55s %10s %10s\n" "VM Stop [$MAIN_COUNT_RESOURCES]" "$(format_duration $stop_duration)" "$(printf "%5.1f" $stop_percent)%") +$(printf "%-55s %10s %10s\n" "VM Start [$MAIN_COUNT_RESOURCES]" "$(format_duration $start_vm_duration)" "$(printf "%5.1f" $start_vm_percent)%") +$(printf "%-55s %10s %10s\n" "VM Undeploy 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$(format_duration $undeploy_duration)" "$(printf "%5.1f" $undeploy_percent)%") +$(printf "%-55s %10s %10s\n" "Deploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$(format_duration $deploy_remaining_duration)" "$(printf "%5.1f" $deploy_remaining_percent)%") +$(printf "%-55s %10s %10s\n" "VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" "$(format_duration $vm_stats_duration)" "$(printf "%5.1f" $vm_stats_percent)%") +$(printf "%-55s %10s %10s\n" "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" "$(format_duration $vm_ops_stop_duration)" "$(printf "%5.1f" $vm_ops_stop_percent)%") +$(printf "%-55s %10s %10s\n" "VM Operations: Start VMs [$PERCENT_RESOURCES]" "$(format_duration $vm_ops_start_duration)" "$(printf "%5.1f" $vm_ops_start_percent)%") +$(printf "%-55s %10s %10s\n" "Migration Setup (${MIGRATION_PERCENTAGE_5}% - ${MIGRATION_5_COUNT} VMs)" "$(format_duration $migration_duration)" "$(printf "%5.1f" $migration_percent)%") +$(printf "%-55s %10s %10s\n" "Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" "$(format_duration $cleanup_ops_duration)" "$(printf "%5.1f" $cleanup_ops_percent)%") +$(printf "%-55s %10s %10s\n" "Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" "$(format_duration $migration_percent_duration)" "$(printf "%5.1f" $migration_percent_percent)%") +$(printf "%-55s %10s %10s\n" "Migration parallelMigrationsPerCluster 2x nodes" "$(format_duration $migration_parallel_2x_duration)" "$(printf "%5.1f" $migration_parallel_2x_percent)%") +$(printf "%-55s %10s %10s\n" "Migration parallelMigrationsPerCluster 4x nodes" "$(format_duration $migration_parallel_4x_duration)" "$(printf "%5.1f" $migration_parallel_4x_percent)%") +$(printf "%-55s %10s %10s\n" "Migration parallelMigrationsPerCluster 8x nodes" "$(format_duration $migration_parallel_8x_duration)" "$(printf "%5.1f" $migration_parallel_8x_percent)%") +$(printf "%-55s %10s %10s\n" "Controller Restart" "$(format_duration $controller_duration)" "$(printf "%5.1f" $controller_percent)%") +$(printf "%-55s %10s %10s\n" "Final Statistics" "$(format_duration $final_stats_duration)" "$(printf "%5.1f" $final_stats_percent)%") +$(printf "%-55s %10s %10s\n" "Drain node" "$(format_duration $drain_stats_duration)" "$(printf "%5.1f" $drain_stats_percent)%") +$(printf "%-55s %10s %10s\n" "Final Cleanup" "$(format_duration $final_cleanup_duration)" "$(printf "%5.1f" $final_cleanup_percent)%") + +================================================================================ + PERFORMANCE METRICS +================================================================================ + +$(printf "%-25s %10s\n" "Total VMs Tested:" "$MAIN_COUNT_RESOURCES") +$(printf "%-25s %10s\n" "VM Deployment Time:" "$(format_duration $deploy_duration)") +$(printf "%-25s %10s\n" "VM Stop Time:" "$(format_duration $stop_duration)") +$(printf "%-25s %10s\n" "VM Start Time:" "$(format_duration $start_vm_duration)") +$(printf "%-25s %10s\n" "Controller Restart Time:" "$(format_duration $controller_duration)") +$(printf "%-25s %10s\n" "Migration 5% Time:" "$(format_duration $migration_duration)") +$(printf "%-25s %10s\n" "Migration 10% Time:" "$(format_duration $migration_percent_duration)") +$(printf "%-25s %10s\n" "Drain Node Time:" "$(format_duration $drain_stats_duration)") +================================================================================ + FILES GENERATED +================================================================================ + +$(printf "%-25s %s\n" "Log File:" "$scenario_dir/test.log") +$(printf "%-25s %s\n" "VM Operations Log:" "$scenario_dir/vm_operations.log") +$(printf "%-25s %s\n" "Statistics Directory:" "$scenario_dir/statistics/") +$(printf "%-25s %s\n" "VPA Data Directory:" "$scenario_dir/vpa/") +$(printf "%-25s %s\n" "Summary Report:" "$scenario_dir/summary.txt") + +================================================================================ +EOF + + log_info "Summary report created: $summary_file" +} + + diff --git a/tests/performance/lib/scenarios.sh b/tests/performance/lib/scenarios.sh new file mode 100755 index 0000000000..a329922715 --- /dev/null +++ b/tests/performance/lib/scenarios.sh @@ -0,0 +1,358 @@ +#!/usr/bin/env bash + +# Scenarios library for performance testing +# This module handles scenario execution orchestration + +# Source all other libraries +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" +source "$(dirname "${BASH_SOURCE[0]}")/vm_operations.sh" +source "$(dirname "${BASH_SOURCE[0]}")/migration.sh" +source "$(dirname "${BASH_SOURCE[0]}")/statistics.sh" +source "$(dirname "${BASH_SOURCE[0]}")/controller.sh" +source "$(dirname "${BASH_SOURCE[0]}")/reporting.sh" + +# Function to run a single scenario +run_scenario() { + local scenario_name=$1 + local vi_type=$2 + + log_info "=== Starting scenario: $scenario_name with $vi_type ===" + + # Initialize logging and create report directory + init_logging "$scenario_name" "$vi_type" "$MAIN_COUNT_RESOURCES" + remove_report_dir "$REPORT_DIR/${scenario_name}_${vi_type}_${MAIN_COUNT_RESOURCES}vm_*" + local scenario_dir=$(create_report_dir "$scenario_name" "$vi_type" "$MAIN_COUNT_RESOURCES") + + # Step 1: Clean up any existing resources + log_info "Step 1: Cleaning up existing resources" + log_step_start "Step 1: Cleanup up existing resources" + local cleanup_start=$(get_timestamp) + stop_migration + remove_vmops + undeploy_resources + local cleanup_end=$(get_timestamp) + local cleanup_duration=$((cleanup_end - cleanup_start)) + log_info "Cleanup completed in $(format_duration $cleanup_duration)" + log_step_end "Step 1: Cleanup up existing resources" "$cleanup_duration" + + local start_time=$(get_timestamp) + log_info "== Scenario started at $(formatted_date $start_time) ==" + + # Step 2: Main test sequence + log_step_start "Step 2: Deploy VMs [$MAIN_COUNT_RESOURCES]" + local deploy_start=$(get_timestamp) + deploy_vms_with_disks $MAIN_COUNT_RESOURCES $vi_type + local deploy_end=$(get_timestamp) + local deploy_duration=$((deploy_end - deploy_start)) + log_info "VM [$MAIN_COUNT_RESOURCES] deploy completed in $(format_duration $deploy_duration)" + log_step_end "Step 2: End VM Deployment [$MAIN_COUNT_RESOURCES]" "$deploy_duration" + + # Step 3: Statistics Collection + log_step_start "Step 3: Start Statistics Collection" + local stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local stats_end=$(get_timestamp) + local stats_duration=$((stats_end - stats_start)) + log_info "Statistics collection completed in $(format_duration $stats_duration)" + log_step_end "Step 3: End Statistics Collection" "$stats_duration" + + log_info "Waiting 10 seconds before stopping VMs" + sleep 10 + + # Step 4: VM Stop + log_info "Step 4: Stopping all VMs [$MAIN_COUNT_RESOURCES]" + log_step_start "Step 4: VM Stop" + local stop_start=$(get_timestamp) + stop_vm + local stop_end=$(get_timestamp) + local stop_duration=$((stop_end - stop_start)) + log_info "VM stop completed in $(format_duration $stop_duration)" + log_step_end "Step 4: End Stopping VMs [$MAIN_COUNT_RESOURCES]" "$stop_duration" + + log_info "Waiting 10 seconds before starting VMs" + sleep 10 + + # Step 5: VM Start + log_info "Step 5: Starting all VMs [$MAIN_COUNT_RESOURCES]" + log_step_start "Step 5: VM Start [$MAIN_COUNT_RESOURCES]" + local start_vm_start=$(get_timestamp) + start_vm + local start_vm_end=$(get_timestamp) + local start_vm_duration=$((start_vm_end - start_vm_start)) + log_info "VM start completed in $(format_duration $start_vm_duration)" + log_step_end "Step 5: End VM Start [$MAIN_COUNT_RESOURCES]" "$start_vm_duration" + + # Step 6: VM Undeploy 10% VMs + log_info "Step 6: Undeploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + log_step_start "Step 6: VM Undeploy 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + local undeploy_start=$(get_timestamp) + undeploy_vms_only $PERCENT_RESOURCES + local undeploy_end=$(get_timestamp) + local undeploy_duration=$((undeploy_end - undeploy_start)) + log_info "VM Undeploy 10% VMs [$PERCENT_RESOURCES] completed in $(format_duration $undeploy_duration)" + log_step_end "Step 6: VM Undeploy 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$undeploy_duration" + + # Step 7: CORRECTED ORDER: Deploy 10% VMs and gather statistics (пункт 8) + log_info "Step 7: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs) and gathering statistics" + log_step_start "Step 7: Deploying 10% VMs [$PERCENT_RESOURCES]" + local deploy_remaining_start=$(get_timestamp) + deploy_vms_only $MAIN_COUNT_RESOURCES + local deploy_remaining_end=$(get_timestamp) + local deploy_remaining_duration=$((deploy_remaining_end - deploy_remaining_start)) + log_info "10% VMs deployment completed in $(format_duration $deploy_remaining_duration)" + log_step_end "Step 7: End Deploying 10% VMs ([$PERCENT_RESOURCES] VMs) " "$deploy_remaining_duration" + + # Step 8: Gather statistics for 10% VMs (пункт 8.1) + log_step_start "Step 8: VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" + local vm_stats_start=$(get_timestamp) + gather_specific_vm_statistics "$scenario_dir/statistics" "$NAMESPACE" "$PERCENT_RESOURCES" + local vm_stats_end=$(get_timestamp) + local vm_stats_duration=$((vm_stats_end - vm_stats_start)) + log_info "VM statistics collection completed in $(format_duration $vm_stats_duration)" + log_step_end "End VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" "$vm_stats_duration" + + # Start 5% migration in background (пункт 7) + local migration_duration_time="0m" + log_info "Starting migration test ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + log_step_start "Step 9: Migration Setup" + local migration_start=$(get_timestamp) + start_migration $migration_duration_time $MIGRATION_PERCENTAGE_5 + local migration_end=$(get_timestamp) + local migration_duration=$((migration_end - migration_start)) + log_info "Migration test ${MIGRATION_PERCENTAGE_5}% VMs setup completed in $(format_duration $migration_duration)" + log_step_end "Step 9: Migration Setup ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs) Started" "$migration_duration" + + # VM operations test - stop/start 10% VMs (пункты 9-10) + log_info "Testing VM stop/start operations for 10% VMs" + log_step_start "Step 10: VM Operations" + local vm_ops_start=$(get_timestamp) + + log_step_start "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" + local vm_ops_stop_start=$(get_timestamp) + stop_vm $PERCENT_RESOURCES + local vm_ops_stop_end=$(get_timestamp) + local vm_ops_stop_duration=$((vm_ops_stop_end - vm_ops_stop_start)) + log_step_end "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" "$vm_ops_stop_duration" + + sleep 2 + + log_step_start "VM Operations: Start VMs [$PERCENT_RESOURCES]" + local vm_ops_start_vm_start=$(get_timestamp) + start_vm $PERCENT_RESOURCES + local vm_ops_start_vm_end=$(get_timestamp) + local vm_ops_start_vm_duration=$((vm_ops_start_vm_end - vm_ops_start_vm_start)) + log_step_end "VM Operations: Start VMs [$PERCENT_RESOURCES]" "$vm_ops_start_vm_duration" + + local vm_ops_end=$(get_timestamp) + local vm_ops_duration=$((vm_ops_end - vm_ops_start)) + log_info "VM operations test completed in $(format_duration $vm_ops_duration)" + log_step_end "Step 10: VM Operations: Stop/Start VMs [$PERCENT_RESOURCES]" "$vm_ops_duration" + + # Stop migration and wait for completion (пункт 11) + log_step_start "Step 11: Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + local cleanup_ops_start=$(get_timestamp) + stop_migration + wait_migration_completion + remove_vmops + local cleanup_ops_end=$(get_timestamp) + local cleanup_ops_duration=$((cleanup_ops_end - cleanup_ops_start)) + log_info "Migration stop and cleanup completed in $(format_duration $cleanup_ops_duration)" + log_step_end "Step 11: Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" "$cleanup_ops_duration" + + # Migration percentage test - Migrate 10% VMs (пункт 12) + log_info "Testing migration of ${MIGRATION_10_COUNT} VMs (10%)" + log_step_start "Step 12: Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" + local migration_percent_start=$(get_timestamp) + migration_percent_vms $MIGRATION_10_COUNT + local migration_percent_end=$(get_timestamp) + local migration_percent_duration=$((migration_percent_end - migration_percent_start)) + log_info "Migration percentage test completed in $(format_duration $migration_percent_duration)" + log_step_end "Step 12: End Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" "$migration_percent_duration" + + remove_vmops + + log_info "Waiting 5 seconds" + sleep 5 + + # Migration parallelMigrationsPerCluster tests + log_info "Set deckhouse controller replicas to [0]" + scale_deckhouse 0 + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + sleep 5 + + local migration_parallel_2x=$(( $amountNodes*2 )) + local migration_parallel_2x_start=$(get_timestamp) + log_info "Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" + log_step_start "Step 13: Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" + migration_config "640Mi" "800" "$migration_parallel_2x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local migration_parallel_2x_end=$(get_timestamp) + local migration_parallel_2x_duration=$((migration_parallel_2x_end - migration_parallel_2x_start)) + log_step_end "Step 13: Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" "$migration_parallel_2x_duration" + + log_info "Waiting 2 seconds before Cleanup vmops" + sleep 2 + remove_vmops + + log_info "Waiting 5 seconds" + sleep 5 + + local migration_parallel_4x=$(( $amountNodes*4 )) + local migration_parallel_4x_start=$(get_timestamp) + log_info "Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x] (4x)" + log_step_start "Step 14: Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x] (4x)" + migration_config "640Mi" "800" "$migration_parallel_4x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local migration_parallel_4x_end=$(get_timestamp) + local migration_parallel_4x_duration=$((migration_parallel_4x_end - migration_parallel_4x_start)) + log_step_end "Step 14: Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x] (4x)" "$migration_parallel_4x_duration" + + log_info "Waiting 2 seconds before Cleanup vmops" + sleep 2 + remove_vmops + + log_info "Waiting 5 seconds" + sleep 5 + + local migration_parallel_8x=$(( $amountNodes*8 )) + local migration_parallel_8x_start=$(get_timestamp) + log_info "Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x] (8x)" + log_step_start "Step 15: Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x] (8x)" + migration_config "640Mi" "800" "$migration_parallel_8x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local migration_parallel_8x_end=$(get_timestamp) + local migration_parallel_8x_duration=$((migration_parallel_8x_end - migration_parallel_8x_start)) + log_step_end "Step 15: Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x] (8x)" "$migration_parallel_8x_duration" + + log_info "Waiting 2 seconds before Cleanup vmops" + sleep 2 + remove_vmops + + log_info "Back configuration migration back to original" + migration_config + log_info "Restoring original deckhouse controller replicas to [$ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS]" + scale_deckhouse $ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS + + log_info "Waiting 5 seconds" + sleep 5 + + # Controller restart test + log_info "Testing controller restart with 1 VM creation" + log_step_start "Step 16: Controller Restart" + local controller_start=$(get_timestamp) + + # Stop controller first + stop_virtualization_controller + + # Create 1 VM and disk while controller is stopped + log_info "Creating 1 VM and disk while controller is stopped [$((MAIN_COUNT_RESOURCES + 1)) VMs total]" + local vm_creation_start=$(get_timestamp) + local vm_creation_end=$(get_timestamp) + local vm_creation_duration=$((vm_creation_end - vm_creation_start)) + log_info "VM creation while controller stopped completed in $(format_duration $vm_creation_duration)" + + # Start controller and measure time for VM to become ready + log_info "Starting controller and waiting for VM to become ready" + local controller_start_time=$(get_timestamp) + start_virtualization_controller + create_vm_while_controller_stopped $vi_type + wait_for_new_vm_after_controller_start + local controller_end_time=$(get_timestamp) + local controller_duration=$((controller_end_time - controller_start)) + local vm_ready_duration=$((controller_end_time - controller_start_time)) + + log_info "Controller restart test completed in $(format_duration $controller_duration)" + log_info "VM became ready after controller start in $(format_duration $vm_ready_duration)" + log_step_end "Step 16: Controller Restart" "$controller_duration" + + # Final deployment and statistics + # log_info "Final deployment and statistics collection" + # log_step_start "Final Deployment" + # local final_deploy_start=$(get_timestamp) + # deploy_vms_with_disks $MAIN_COUNT_RESOURCES $vi_type + # wait_for_resources "all" + # local final_deploy_end=$(get_timestamp) + # local final_deploy_duration=$((final_deploy_end - final_deploy_start)) + # log_info "Final deployment completed in $(format_duration $final_deploy_duration)" + # log_step_end "Final Deployment" "$final_deploy_duration" + + log_step_start "Step 17: Final Statistics" + local final_stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local final_stats_end=$(get_timestamp) + local final_stats_duration=$((final_stats_end - final_stats_start)) + log_info "Final statistics collection completed in $(format_duration $final_stats_duration)" + log_step_end "Step 17: Final Statistics" "$final_stats_duration" + + log_info "Waiting 30 second before drain node" + sleep 30 + + log_step_start "Step 18: Drain node" + local drain_node_start=$(get_timestamp) + drain_node + local drain_stats_end=$(get_timestamp) + local drain_stats_duration=$((drain_stats_end - drain_node_start)) + log_info "Drain node completed in $(format_duration $drain_stats_duration)" + log_step_end "Step 18: Drain node" "$drain_stats_duration" + + # Skip final cleanup if keep-resources is enabled + if [ "$KEEP_RESOURCES" = "true" ]; then + log_info "Skipping final cleanup (--keep-resources enabled, resources preserved)" + else + log_info "Waiting 30 second before cleanup" + sleep 30 + + log_step_start "Step 19: Final Cleanup" + local final_cleanup_start=$(get_timestamp) + undeploy_resources + local final_cleanup_end=$(get_timestamp) + local final_cleanup_duration=$((final_cleanup_end - final_cleanup_start)) + log_info "Final cleanup completed in $(format_duration $final_cleanup_duration)" + log_step_end "Step 19: Final Cleanup" "$final_cleanup_duration" + fi + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_success "Scenario $scenario_name completed in $formatted_duration" + log_info "Scenario ended at $(formatted_date $end_time)" + + # Create summary report + create_summary_report "$scenario_name" "$vi_type" "$scenario_dir" "$start_time" "$end_time" "$duration" \ + "$cleanup_duration" "$deploy_duration" "$stats_duration" "$stop_duration" "$start_vm_duration" \ + "$undeploy_duration" "$deploy_remaining_duration" "$vm_stats_duration" \ + "$vm_ops_duration" "$vm_ops_stop_duration" "$vm_ops_start_vm_duration" "$migration_duration" "$cleanup_ops_duration" "$migration_percent_duration" \ + "$controller_duration" "$final_stats_duration" "$drain_stats_duration" "$final_cleanup_duration" \ + "$migration_parallel_2x_duration" "$migration_parallel_4x_duration" "$migration_parallel_8x_duration" + + # Summary of all step durations + log_info "=== Scenario $scenario_name Duration Summary ===" + log_duration "Step 1: Cleanup" "$cleanup_duration" + log_duration "Step 2: VM Deployment" "$deploy_duration" + log_duration "Step 3: Statistics Collection" "$stats_duration" + log_duration "Step 4: VM Stop" "$stop_duration" + log_duration "Step 5: VM Start" "$start_vm_duration" + log_duration "Step 6: VM Undeploy 10% VMs" "$undeploy_duration" + log_duration "Step 7: Deploying 10% VMs" "$deploy_remaining_duration" + log_duration "Step 8: VM Statistics" "$vm_stats_duration" + log_duration "Step 9: Migration Setup" "$migration_duration" + log_duration "Step 10: VM Operations" "$vm_ops_duration" + log_duration "Step 10: VM Operations: Stopping VMs" "$vm_ops_stop_duration" + log_duration "Step 10: VM Operations: Start VMs" "$vm_ops_start_vm_duration" + log_duration "Step 11: Migration Cleanup" "$cleanup_ops_duration" + log_duration "Step 12: Migration Percentage" "$migration_percent_duration" + log_duration "Step 13: Migration parallelMigrationsPerCluster 2x nodes" "$migration_parallel_2x_duration" + log_duration "Step 14: Migration parallelMigrationsPerCluster 4x nodes" "$migration_parallel_4x_duration" + log_duration "Step 15: Migration parallelMigrationsPerCluster 8x nodes" "$migration_parallel_8x_duration" + log_duration "Step 16: Controller Restart" "$controller_duration" + log_duration "Step 17: Final Statistics" "$final_stats_duration" + log_duration "Step 18: Drain node" "$drain_stats_duration" + log_duration "Step 19: Final Cleanup" "$final_cleanup_duration" + log_duration "Total Scenario Duration" "$duration" + log_info "=== End Duration Summary ===" +} + + diff --git a/tests/performance/lib/statistics.sh b/tests/performance/lib/statistics.sh new file mode 100755 index 0000000000..1747bc913e --- /dev/null +++ b/tests/performance/lib/statistics.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env bash + +# Statistics collection library for performance testing +# This module handles statistics gathering and analysis + +# Source common utilities +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" + +gather_all_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Gathering all statistics to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:all NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:all completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:all" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "All statistics gathering completed in $(format_duration $duration)" + log_success "All statistics gathered" +} + +gather_vm_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Gathering VM statistics to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:vm NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:vm completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:vm" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "VM statistics gathering completed in $(format_duration $duration)" + log_success "VM statistics gathered" +} + +gather_vd_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Gathering VD statistics to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:vd NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:vd completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:vd" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "VD statistics gathering completed in $(format_duration $duration)" + log_success "VD statistics gathered" +} + +gather_specific_vm_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local vm_count=${3:-0} + local start_time=$(get_timestamp) + + log_info "Gathering statistics for specific VMs (count: $vm_count) to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:vm NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) VM_COUNT=$vm_count + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:vm for specific VMs completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:vm specific" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "Specific VM statistics gathering completed in $(format_duration $duration)" + log_success "Specific VM statistics gathered" +} + +collect_vpa() { + local scenario_dir=$1 + local vpa_dir="$scenario_dir/vpa" + local start_time=$(get_timestamp) + + mkdir -p ${vpa_dir} + log_info "Collecting VPA data to $vpa_dir" + log_info "Start time: $(formatted_date $start_time)" + + local list_start=$(get_timestamp) + local VPAS=( $(kubectl -n d8-virtualization get vpa -o name 2>/dev/null || true) ) + local list_end=$(get_timestamp) + local list_duration=$((list_end - list_start)) + log_info "VPA list retrieval completed in $(format_duration $list_duration)" + log_duration "VPA list retrieval" "$list_duration" + + if [ ${#VPAS[@]} -eq 0 ]; then + log_warning "No VPA resources found" + return + fi + + local collect_start=$(get_timestamp) + for vpa in $VPAS; do + vpa_name=$(echo $vpa | cut -d "/" -f2) + file="vpa_${vpa_name}.yaml" + kubectl -n d8-virtualization get $vpa -o yaml > "${vpa_dir}/${file}_$(formatted_date $(get_timestamp))" 2>/dev/null || true + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + local collect_end=$(get_timestamp) + local collect_duration=$((collect_end - collect_start)) + log_info "VPA data collection completed in $(format_duration $collect_duration)" + log_duration "VPA data collection" "$collect_duration" + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "VPA collection completed in $(format_duration $duration)" + log_success "VPA data collected" +} \ No newline at end of file diff --git a/tests/performance/lib/vm_operations.sh b/tests/performance/lib/vm_operations.sh new file mode 100755 index 0000000000..b7cf2a02ab --- /dev/null +++ b/tests/performance/lib/vm_operations.sh @@ -0,0 +1,513 @@ +#!/usr/bin/env bash + +# VM operations library for performance testing +# This module handles VM lifecycle management operations + +# Source common utilities +source "$(dirname "${BASH_SOURCE[0]}")/common.sh" + +wait_vm_vd() { + local sleep_time=${1:-10} + + while true; do + local VDReady=$(kubectl -n $NAMESPACE get vd | grep "Ready" | wc -l) + local VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + + local VMReady=$(kubectl -n $NAMESPACE get vm | grep "Running" | wc -l) + local VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + + if [ $VDReady -eq $VDTotal ] && [ $VMReady -eq $VMTotal ]; then + echo "All vms and vds are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vms and vds to be ready..." + echo "VM Running: $VMReady/$VMTotal" + echo "VD Ready: $VDReady/$VDTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_vm() { + local sleep_time=${1:-10} + local expected_count=$2 + local VMTotal + local VMRunning + + while true; do + VMRunning=$(kubectl -n $NAMESPACE get vm | grep "Running" | wc -l) + + if [ -n "$expected_count" ]; then + VMTotal=$expected_count + else + VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + fi + + if [ $VMRunning -eq $VMTotal ]; then + echo "All vms are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vms to be running..." + echo "VM Running: $VMRunning/$VMTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_vd() { + local sleep_time=${1:-10} + local expected_count=$2 + local VDReady + local VDTotal + + while true; do + VDReady=$(kubectl -n $NAMESPACE get vd | grep "Ready" | wc -l) + + if [ -n "$expected_count" ]; then + VDTotal=$expected_count + else + VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + fi + + if [ $VDReady -eq $VDTotal ]; then + echo "All vds are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vds to be ready..." + echo "VD ready: $VDReady/$VDTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_for_resources() { + local resource_type=$1 + local expected_count=$2 + local start_time=$(get_timestamp) + local check_interval=5 # seconds + + case $resource_type in + "all") + log_info "Waiting for VMs and VDs to be ready" + wait_vm_vd $check_interval + ;; + "vm") + log_info "Waiting for VMs to be ready" + wait_vm $check_interval $expected_count + ;; + "vd") + log_info "Waiting for VDs to be ready" + wait_vd $check_interval $expected_count + ;; + *) + log_error "Unknown resource type: $resource_type" + return 1 + ;; + esac +} + +stop_vm() { + local count=$1 + local sleep_time=${2:-5} + local start_time=$(get_timestamp) + local stopped_vm + + if [ -z "$count" ]; then + local vms=($(kubectl -n $NAMESPACE get vm | grep "Running" | awk '{print $1}')) + else + # Stop vm from the end + local vms=($(kubectl -n $NAMESPACE get vm | grep "Running" | awk '{print $1}' | tail -n $count)) + fi + + if [ ${#vms[@]} -eq 0 ]; then + log_warning "No running VMs found to stop" + echo "0" + return 0 + fi + + log_info "Stopping ${#vms[@]} VMs" + log_vm_operation "Stopping ${#vms[@]} VMs: ${vms[*]}" + for vm in "${vms[@]}"; do + log_info "Stopping VM $vm" + log_vm_operation "Stopping VM $vm" + d8 v -n $NAMESPACE stop $vm --wait=false + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + local total=${#vms[@]} + + # Wait for vms to stop + while true; do + stopped_vm=0 + + for vm in "${vms[@]}"; do + local status=$(kubectl -n $NAMESPACE get vm $vm -o jsonpath='{.status.phase}') + if [ "$status" == "Stopped" ]; then + (( stopped_vm+=1 )) + fi + done + + # Additional wait using kubectl wait + # log_info "Additional wait for deployment to be fully available..." + # kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + stopped=${#stopped_vm[@]} + + if [ $stopped_vm -eq $total ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + + log_success "All VMs stopped - Duration: $duration seconds" + log_info "Execution time: $formatted_duration" + log_vm_operation "All VMs stopped - Duration: $duration seconds" + break + fi + + log_info "Waiting for VMs to be stopped... VM stopped: $stopped_vm/$total" + log_vm_operation "Waiting for VMs to be stopped... VM stopped: $stopped_vm/$total" + sleep $sleep_time + done + + # Additional wait using kubectl wait + # log_info "Additional wait for deployment to be fully available..." + # kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +# FIXED: Properly wait for VMs to be Running +start_vm() { + local count=$1 + local sleep_time=${2:-5} + local start_time=$(get_timestamp) + + if [ -z "$count" ]; then + local vms=($(kubectl -n $NAMESPACE get vm | grep "Stopped" | awk '{print $1}')) + else + # Start vm from the end + local vms=($(kubectl -n $NAMESPACE get vm | grep "Stopped" | awk '{print $1}' | tail -n $count)) + fi + + if [ ${#vms[@]} -eq 0 ]; then + log_warning "No stopped VMs found to start" + echo "0" + return + fi + + log_info "Starting ${#vms[@]} VMs" + log_vm_operation "Starting ${#vms[@]} VMs: ${vms[*]}" + for vm in "${vms[@]}"; do + log_info "Starting VM $vm" + log_vm_operation "Starting VM $vm" + d8 v -n $NAMESPACE start $vm + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + # Store the VMs we started for monitoring + local started_vms=("${vms[@]}") + local total=${#started_vms[@]} + + while true; do + local running_vm=0 + + for vm in "${started_vms[@]}"; do + local status=$(kubectl -n $NAMESPACE get vm $vm -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound") + if [ "$status" == "Running" ]; then + (( running_vm+=1 )) + fi + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + if [ $running_vm -eq $total ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + + log_success "All VMs started - Duration: $duration seconds" + log_info "Execution time: $formatted_duration" + log_vm_operation "All VMs started - Duration: $duration seconds" + break + fi + + log_info "Waiting for VMs to be running... VM running: $running_vm/$total" + log_vm_operation "Waiting for VMs to be running... VM running: $running_vm/$total" + sleep $sleep_time + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +deploy_vms_with_disks() { + local count=$1 + local vi_type=$2 + local start_time=$(get_timestamp) + + log_info "Deploying $count VMs with disks from $vi_type" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task apply:all \ + COUNT=$count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task apply:all completed in $(format_duration $task_duration)" + log_duration "Task apply:all" "$task_duration" + + local wait_start=$(get_timestamp) + wait_vm_vd $SLEEP_TIME + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + log_info "Wait for VMs and VDs completed in $(format_duration $wait_duration)" + log_duration "Wait for VMs and VDs" "$wait_duration" + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Deployment completed - End time: $(formatted_date $end_time)" + log_info "Task execution: $(format_duration $task_duration), Wait time: $(format_duration $wait_duration)" + log_success "Deployed $count VMs with disks in $formatted_duration" +} + +deploy_disks_only() { + local count=$1 + local vi_type=$2 + local start_time=$(get_timestamp) + + log_info "Deploying $count disks from $vi_type" + log_info "Start time: $(formatted_date $start_time)" + + task apply:disks \ + COUNT=$count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + wait_vd $SLEEP_TIME + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Disk deployment completed - End time: $(formatted_date $end_time)" + log_success "Deployed $count disks in $formatted_duration" + echo "$duration" +} + +deploy_vms_only() { + local count=$1 + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Deploying $count VMs (disks already exist)" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task apply:vms \ + COUNT=$count \ + NAMESPACE=$NAMESPACE + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task apply:vms completed in $(format_duration $task_duration)" + log_duration "Task apply:vms" "$task_duration" + + local wait_start=$(get_timestamp) + wait_vm $SLEEP_TIME + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + log_info "Wait for VMs completed in $(format_duration $wait_duration)" + log_duration "Wait for VMs" "$wait_duration" + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "VM deployment completed - End time: $(formatted_date $end_time)" + log_info "Task execution: $(format_duration $task_duration), Wait time: $(format_duration $wait_duration)" + log_success "Deployed $count VMs in $formatted_duration" + echo "$duration" +} + +# FIXED: Properly undeploy VMs from the end +undeploy_vms_only() { + local count=${1:-0} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Undeploying $count VMs from the end (disks will remain)" + log_info "Start time: $(formatted_date $start_time)" + + # Get list of VMs and select the last 'count' ones + local vms=($(kubectl -n $NAMESPACE get vm -o name | tail -n $count)) + + if [ ${#vms[@]} -eq 0 ]; then + log_warning "No VMs found to undeploy" + echo "0" + return 0 + fi + + log_info "Undeploying ${#vms[@]} VMs: ${vms[*]}" + log_vm_operation "Undeploying ${#vms[@]} VMs from the end: ${vms[*]}" + + local delete_start=$(get_timestamp) + for vm in "${vms[@]}"; do + log_info "Deleting VM $vm" + log_vm_operation "Deleting VM $vm" + kubectl -n $NAMESPACE delete $vm --wait=false || true + done + + local delete_end=$(get_timestamp) + local delete_duration=$((delete_end - delete_start)) + log_info "VM deletion commands completed in $(format_duration $delete_duration)" + log_vm_operation "VM deletion commands completed in $(format_duration $delete_duration)" + + local wait_start=$(get_timestamp) + while true; do + local remaining_vms=0 + local current_time=$(get_timestamp) + + log_info "Deleting remaining VMs..." + for vm in "${vms[@]}"; do + if kubectl -n $NAMESPACE get $vm >/dev/null 2>&1; then + log_info "Deleting VM $vm" + kubectl -n $NAMESPACE delete $vm --wait=false || true + fi + done + + for vm in "${vms[@]}"; do + # Check if VM exists and is not in Terminating state + local vm_status=$(kubectl -n $NAMESPACE get $vm -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound") + if [ "$vm_status" != "NotFound" ] && [ "$vm_status" != "Terminating" ]; then + ((remaining_vms++)) + log_info "VM $vm still exists with status: $vm_status" + fi + done + + if [ $remaining_vms -eq 0 ]; then + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Wait for VMs undeploy completed in $(format_duration $wait_duration)" + log_info "All $count VMs undeployed - End time: $(formatted_date $end_time)" + log_info "Delete commands: $(format_duration $delete_duration), Wait time: $(format_duration $wait_duration)" + log_success "Undeployed $count VMs in $formatted_duration" + log_vm_operation "Undeployed $count VMs in $formatted_duration" + break + fi + + log_info "Waiting for VMs to be undeployed... Remaining: $remaining_vms/$count" + log_vm_operation "Waiting for VMs to be undeployed... Remaining: $remaining_vms/$count" + sleep $SLEEP_TIME + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + # echo "$duration" +} + +undeploy_resources() { + local sleep_time=${1:-5} + local start_time=$(get_timestamp) + local VDTotal + local VMTotal + local VMITotal + + log_info "Undeploying all VMs and disks" + log_info "Start time: $(formatted_date $start_time)" + + task destroy:all \ + NAMESPACE=$NAMESPACE + # Wait a bit for Helm to process the deletion + sleep 5 + + # # Force delete any remaining resources + # kubectl -n $NAMESPACE delete vm --all --ignore-not-found=true --force --grace-period=0 + # kubectl -n $NAMESPACE delete vd --all --ignore-not-found=true --force --grace-period=0 + # kubectl -n $NAMESPACE delete vi --all --ignore-not-found=true --force --grace-period=0 + # local max_wait_time=600 # Maximum wait time in seconds (10 minutes) + # local wait_timeout=$((start_time + max_wait_time)) + + while true; do + # local current_time=$(get_timestamp) + + # Check for timeout + # if [ $current_time -gt $wait_timeout ]; then + # log_warning "Timeout reached while waiting for resources to be destroyed" + # break + # fi + + VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + VMITotal=$(kubectl -n $NAMESPACE get vi -o name | wc -l) + + if [ $VDTotal -eq 0 ] && [ $VMTotal -eq 0 ] && [ $VMITotal -eq 0 ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "All VMs and VDs destroyed - End time: $(formatted_date $end_time)" + log_success "Undeploy completed in $formatted_duration" + break + fi + + log_info "Waiting for VMs and VDs to be destroyed... VM: $VMTotal, VD: $VDTotal, VI: $VMITotal" + sleep $sleep_time + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} diff --git a/tests/performance/lib/vm_operations_batch.sh b/tests/performance/lib/vm_operations_batch.sh new file mode 100644 index 0000000000..1a844ea12d --- /dev/null +++ b/tests/performance/lib/vm_operations_batch.sh @@ -0,0 +1,142 @@ +# Batch deployment functions for VM operations + +# Function to check if batch deployment should be used +should_use_batch_deployment() { + local count=$1 + # Don't use batch deployment if batch size is too small (less than 10% of total) + local min_batch_size=$((count / 10)) + if [ $min_batch_size -lt 1 ]; then + min_batch_size=1 + fi + + # Warn if batch size is too small + if [ $MAX_BATCH_SIZE -lt $min_batch_size ]; then + log_warning "Batch size ($MAX_BATCH_SIZE) is too small for $count resources" + log_warning "Minimum recommended batch size: $min_batch_size" + log_warning "Using regular deployment instead of batch deployment" + return 1 # false + fi + + if [ "$BATCH_DEPLOYMENT_ENABLED" = "true" ] || [ $count -gt $MAX_BATCH_SIZE ]; then + return 0 # true + else + return 1 # false + fi +} + +# Function to show deployment progress +show_deployment_progress() { + local current_count=$1 + local total_count=$2 + local batch_number=$3 + local total_batches=$4 + local start_time=$5 + + local current_time=$(get_timestamp) + local elapsed_time=$((current_time - start_time)) + local progress_percent=$(( (current_count * 100) / total_count )) + + # Calculate estimated time remaining + local estimated_total_time=0 + local estimated_remaining_time=0 + if [ $current_count -gt 0 ]; then + estimated_total_time=$(( (elapsed_time * total_count) / current_count )) + estimated_remaining_time=$((estimated_total_time - elapsed_time)) + fi + + log_info "Progress: $current_count/$total_count ($progress_percent%)" + log_info "Batch: $batch_number/$total_batches" + log_info "Elapsed: $(format_duration $elapsed_time)" + if [ $estimated_remaining_time -gt 0 ]; then + log_info "Estimated remaining: $(format_duration $estimated_remaining_time)" + fi +} + +# New function for batch deployment of large numbers of resources +deploy_vms_with_disks_batch() { + local total_count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + local start_time=$(get_timestamp) + + log_info "Starting batch deployment of $total_count VMs with disks from $vi_type" + log_info "Batch size: $batch_size resources per batch" + log_info "Start time: $(formatted_date $start_time)" + + local deployed_count=0 + local batch_number=1 + local total_batches=$(( (total_count + batch_size - 1) / batch_size )) + + log_info "Total batches to deploy: $total_batches" + + while [ $deployed_count -lt $total_count ]; do + local remaining_count=$((total_count - deployed_count)) + local current_batch_size=$batch_size + + # Adjust batch size for the last batch if needed + if [ $remaining_count -lt $batch_size ]; then + current_batch_size=$remaining_count + fi + + log_info "=== Batch $batch_number/$total_batches ===" + show_deployment_progress "$deployed_count" "$total_count" "$batch_number" "$total_batches" "$start_time" + + local batch_start=$(get_timestamp) + + # Deploy current batch (COUNT should be cumulative, not absolute) + local cumulative_count=$((deployed_count + current_batch_size)) + log_info "Deploying batch $batch_number: $current_batch_size new resources (total will be: $cumulative_count)" + task apply:all \ + COUNT=$cumulative_count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + # Wait for current batch to be ready + wait_vm_vd $SLEEP_TIME + + local batch_end=$(get_timestamp) + local batch_duration=$((batch_end - batch_start)) + deployed_count=$((deployed_count + current_batch_size)) + + log_success "Batch $batch_number completed in $(format_duration $batch_duration)" + log_info "Total deployed so far: $deployed_count/$total_count" + + # Add delay between batches to avoid overwhelming the system + if [ $batch_number -lt $total_batches ]; then + log_info "Waiting 30 seconds before next batch..." + sleep 30 + fi + + ((batch_number++)) + done + + local end_time=$(get_timestamp) + local total_duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$total_duration") + + log_success "Batch deployment completed: $deployed_count VMs with disks in $formatted_duration" + log_info "Average time per resource: $(( total_duration / deployed_count )) seconds" + + echo "$total_duration" +} + +# Universal deployment function that automatically chooses between regular and batch deployment +deploy_vms_with_disks_smart() { + local count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + + log_info "Deployment decision for $count resources:" + log_info " - Batch size: $batch_size" + log_info " - Batch deployment enabled: $BATCH_DEPLOYMENT_ENABLED" + + if should_use_batch_deployment "$count"; then + log_info "Using batch deployment for $count resources (batch size: $batch_size)" + deploy_vms_with_disks_batch "$count" "$vi_type" "$batch_size" + else + log_info "Using regular deployment for $count resources" + deploy_vms_with_disks "$count" "$vi_type" + fi +} diff --git a/tests/performance/manage_vms.sh b/tests/performance/manage_vms.sh new file mode 100644 index 0000000000..edc27cd320 --- /dev/null +++ b/tests/performance/manage_vms.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +kubectl get vm -A | grep Running | awk '{print $1, $2}' | while read -r namespace vm; do + kubectl -n "$namespace" patch vm "$vm" --type=merge -p '{"spec":{"runPolicy":"AlwaysOff"}}' +done + diff --git a/tests/performance/monitoring/virtualization-dashboard.yaml b/tests/performance/monitoring/virtualization-dashboard.yaml index 96c01f5ad7..fe88598ac5 100644 --- a/tests/performance/monitoring/virtualization-dashboard.yaml +++ b/tests/performance/monitoring/virtualization-dashboard.yaml @@ -30,6 +30,7 @@ spec: "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, + "id": 3732, "links": [], "liveNow": false, "panels": [ @@ -108,7 +109,7 @@ spec: "sizing": "auto", "valueMode": "color" }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -179,7 +180,7 @@ spec: "sizing": "auto", "valueMode": "color" }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -251,7 +252,7 @@ spec: "sizing": "auto", "valueMode": "color" }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -259,7 +260,7 @@ spec: "uid": "${ds_prometheus}" }, "editorMode": "code", - "expr": "sum(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}) by (phase)", + "expr": "sum(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\"}) by (phase)", "hide": false, "instant": false, "legendFormat": "__auto", @@ -323,7 +324,7 @@ spec: "sizing": "auto", "valueMode": "color" }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -331,7 +332,7 @@ spec: "uid": "${ds_prometheus}" }, "editorMode": "code", - "expr": "sum(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}) by (phase)", + "expr": "sum(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\"}) by (phase)", "hide": false, "instant": false, "legendFormat": "__auto", @@ -393,7 +394,7 @@ spec: "type": "linear" }, "showPoints": "always", - "spanNulls": true, + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -589,7 +590,7 @@ spec: "type": "linear" }, "showPoints": "always", - "spanNulls": true, + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -639,7 +640,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "sum(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", + "expr": "sum(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\"}==1) by (phase)", "format": "time_series", "hide": false, "instant": false, @@ -741,7 +742,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "count(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=~\"Migrating|Running\"}==1) by (node)", + "expr": "count(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=~\"Migrating|Running\"}==1) by (node)", "format": "time_series", "hide": false, "instant": false, @@ -873,8 +874,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "light-red", @@ -947,7 +947,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Failed\"} > 0) - 1 + 0", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Failed\"} > 0) - 1 + 0", "format": "time_series", "hide": false, "instant": false, @@ -962,7 +962,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Stopping\"} > 0) - 1 + 1", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Stopping\"} > 0) - 1 + 1", "format": "time_series", "hide": false, "instant": false, @@ -977,7 +977,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Stopped\"} > 0) - 1 + 2", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Stopped\"} > 0) - 1 + 2", "format": "time_series", "hide": false, "instant": false, @@ -992,7 +992,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Terminating\"} > 0) - 1 + 3", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Terminating\"} > 0) - 1 + 3", "format": "time_series", "hide": false, "instant": false, @@ -1007,7 +1007,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Pending\"} > 0) - 1 + 4", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Pending\"} > 0) - 1 + 4", "format": "time_series", "hide": false, "instant": false, @@ -1022,7 +1022,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Pause\"} > 0) - 1 + 5", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Pause\"} > 0) - 1 + 5", "format": "time_series", "hide": false, "instant": false, @@ -1037,7 +1037,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Scheduling\"} > 0) - 1 + 6", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Scheduling\"} > 0) - 1 + 6", "format": "time_series", "hide": false, "instant": false, @@ -1052,7 +1052,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Starting\"} > 0) - 1 + 7", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Starting\"} > 0) - 1 + 7", "format": "time_series", "hide": false, "instant": false, @@ -1067,7 +1067,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Running\"} > 0) - 1 + 8", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Running\"} > 0) - 1 + 8", "format": "time_series", "hide": false, "instant": false, @@ -1082,7 +1082,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Migrating\"} > 0) - 1 + 9", + "expr": "(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\",phase=\"Migrating\"} > 0) - 1 + 9", "format": "time_series", "hide": false, "instant": false, @@ -1117,8 +1117,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1167,7 +1166,7 @@ spec: } ] }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -1244,8 +1243,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1294,7 +1292,7 @@ spec: } ] }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -1303,7 +1301,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\", phase!=\"Running\"} > 0", + "expr": "d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\", phase!=\"Running\"} > 0", "format": "table", "instant": true, "legendFormat": "__auto", @@ -1395,7 +1393,7 @@ spec: "type": "linear" }, "showPoints": "always", - "spanNulls": true, + "spanNulls": false, "stacking": { "group": "A", "mode": "none" @@ -1409,8 +1407,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] } @@ -1445,7 +1442,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "count(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", + "expr": "count(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\"}==1) by (phase)", "format": "time_series", "hide": false, "instant": false, @@ -1460,7 +1457,7 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "description": "", "fieldConfig": { @@ -1562,8 +1559,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "light-red", @@ -1631,11 +1627,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Unknown\"} > 0) - 1 + 0", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"Unknown\"} > 0) - 1 + 0", "format": "time_series", "hide": false, "instant": false, @@ -1646,11 +1642,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"PVCLost\"} > 0) - 1 + 1", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"PVCLost\"} > 0) - 1 + 1", "format": "time_series", "hide": false, "instant": false, @@ -1661,11 +1657,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Failed\"} > 0) - 1 + 2", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"Failed\"} > 0) - 1 + 2", "format": "time_series", "hide": false, "instant": false, @@ -1676,11 +1672,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Pending\"} > 0) - 1 + 3", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"Pending\"} > 0) - 1 + 3", "format": "time_series", "hide": false, "instant": false, @@ -1691,11 +1687,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"WaitForUserUpload\"} > 0) - 1 + 4", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"WaitForUserUpload\"} > 0) - 1 + 4", "format": "time_series", "hide": false, "instant": false, @@ -1706,11 +1702,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Provisioning\"} > 0) - 1 + 5", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"Provisioning\"} > 0) - 1 + 5", "format": "time_series", "hide": false, "instant": false, @@ -1721,11 +1717,11 @@ spec: { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" + "uid": "${ds_prometheus}" }, "editorMode": "code", "exemplar": false, - "expr": "(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=\"Ready\"} > 0) -1 + 6", + "expr": "(d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\",phase=\"Ready\"} > 0) -1 + 6", "format": "time_series", "hide": false, "instant": false, @@ -1760,8 +1756,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" } ] } @@ -1806,7 +1801,7 @@ spec: } ] }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -1815,7 +1810,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}==1", + "expr": "d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\"}==1", "format": "table", "hide": false, "instant": true, @@ -1880,8 +1875,7 @@ spec: "mode": "absolute", "steps": [ { - "color": "green", - "value": null + "color": "green" }, { "color": "red", @@ -1930,7 +1924,7 @@ spec: } ] }, - "pluginVersion": "10.4.5", + "pluginVersion": "v10.4.19+security-01", "targets": [ { "datasource": { @@ -1939,7 +1933,7 @@ spec: }, "editorMode": "code", "exemplar": false, - "expr": "d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\", phase!=\"Ready\"} > 0", + "expr": "d8_virtualization_virtualdisk_status_phase{namespace=~\"$namespace\", phase!=\"Ready\"} > 0", "format": "table", "instant": true, "legendFormat": "__auto", @@ -1981,7 +1975,7 @@ spec: "type": "table" }, { - "collapsed": true, + "collapsed": false, "gridPos": { "h": 1, "w": 24, @@ -1989,544 +1983,543 @@ spec: "y": 64 }, "id": 54, - "panels": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" + "panels": [], + "title": "VMBDA", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMin": 0, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "smooth", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "always", - "spanNulls": true, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - } + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "overrides": [] - }, - "gridPos": { - "h": 10, - "w": 24, - "x": 0, - "y": 12 - }, - "id": 55, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "pluginVersion": "8.5.13", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "count(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", - "format": "time_series", - "hide": false, - "instant": false, - "legendFormat": "{{ phase }}", - "range": true, - "refId": "F" - } - ], - "title": "Count VMBDA Phases", - "type": "timeseries" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 65 + }, + "id": 55, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", + "targets": [ { "datasource": { "type": "prometheus", - "uid": "P0D6E4079E36703EB" - }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "axisSoftMax": 3, - "axisSoftMin": 1, - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, - "lineWidth": 3, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "area" - } - }, - "mappings": [ - { - "options": { - "0": { - "color": "red", - "index": 1, - "text": "Failed" - }, - "1": { - "color": "red", - "index": 2, - "text": "InProgress" - }, - "2": { - "color": "red", - "index": 3, - "text": "Attached" - }, - "3": { - "color": "red", - "index": 4, - "text": "-" - }, - "-1": { - "color": "red", - "index": 0, - "text": "-" - } - }, - "type": "value" - } - ], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "light-red", - "value": -1 - }, - { - "color": "light-green", - "value": 0.5 - } - ] - } - }, - "overrides": [] + "uid": "${ds_prometheus}" }, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 22 + "editorMode": "code", + "exemplar": false, + "expr": "count(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\"}==1) by (phase)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + } + ], + "title": "Count VMBDA Phases", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 9, - "options": { - "legend": { - "calcs": [], - "displayMode": "table", - "placement": "right", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 3, + "axisSoftMin": 1, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "pluginVersion": "8.5.13", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "P0D6E4079E36703EB" - }, - "editorMode": "code", - "exemplar": false, - "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"Failed\"} > 0", - "format": "time_series", - "hide": false, - "instant": false, - "legendFormat": "{{ name }}", - "range": true, - "refId": "F" + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" }, - { - "datasource": { - "type": "prometheus", - "uid": "P0D6E4079E36703EB" - }, - "editorMode": "code", - "exemplar": false, - "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"InProgress\"} > 0) + 1", - "format": "time_series", - "hide": false, - "instant": false, - "legendFormat": "{{ name }}", - "range": true, - "refId": "A" + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "datasource": { - "type": "prometheus", - "uid": "P0D6E4079E36703EB" - }, - "editorMode": "code", - "exemplar": false, - "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"Attached\"} > 0) + 2", - "format": "time_series", - "hide": false, - "instant": false, - "legendFormat": "{{ name }}", - "range": true, - "refId": "B" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - { - "datasource": { - "type": "prometheus", - "uid": "P0D6E4079E36703EB" - }, - "editorMode": "code", - "exemplar": false, - "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"Pending\"} > 0) + 3", - "format": "time_series", - "hide": false, - "instant": false, - "legendFormat": "{{ name }}", - "range": true, - "refId": "C" + "thresholdsStyle": { + "mode": "area" } - ], - "title": "VMBDA Phases TimeLine", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Failed" + }, + "1": { + "color": "red", + "index": 2, + "text": "InProgress" + }, + "2": { + "color": "red", + "index": 3, + "text": "Attached" + }, + "3": { + "color": "red", + "index": 4, + "text": "-" }, - "inspect": false + "-1": { + "color": "red", + "index": 0, + "text": "-" + } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] - } - }, - "overrides": [ + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "custom.width", - "value": 377 - } - ] + "color": "green" }, { - "matcher": { - "id": "byName", - "options": "Name" - }, - "properties": [ - { - "id": "custom.width", - "value": 199 - } - ] - } - ] - }, - "gridPos": { - "h": 8, - "w": 6, - "x": 12, - "y": 22 - }, - "id": 56, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false - }, - "showHeader": true, - "sortBy": [ + "color": "light-red", + "value": -1 + }, { - "desc": false, - "displayName": "Name" + "color": "light-green", + "value": 0.5 } ] - }, - "pluginVersion": "10.4.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}==1", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" - } - ], - "title": "VMBDA ALL Phases", - "transformations": [ - { - "id": "merge", - "options": {} - }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "exported_namespace", - "name", - "phase" - ] - } - } - }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "exported_namespace": "Namespace", - "name": "Name", - "phase": "Phase" - } - } - } - ], - "type": "table" + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 75 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", + "targets": [ { "datasource": { "type": "prometheus", "uid": "${ds_prometheus}" }, - "description": "", - "fieldConfig": { - "defaults": { - "color": { - "mode": "thresholds" - }, - "custom": { - "align": "left", - "cellOptions": { - "type": "auto" - }, - "inspect": false - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - } - ] + "editorMode": "code", + "exemplar": false, + "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\",phase=\"Failed\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\",phase=\"InProgress\"} > 0) + 1", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\",phase=\"Attached\"} > 0) + 2", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\",phase=\"Pending\"} > 0) + 3", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "C" + } + ], + "title": "VMBDA Phases TimeLine", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" + }, + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" }, - "overrides": [ + "properties": [ { - "matcher": { - "id": "byName", - "options": "Time" - }, - "properties": [ - { - "id": "custom.width", - "value": 377 - } - ] + "id": "custom.width", + "value": 377 } ] }, - "gridPos": { - "h": 8, - "w": 6, - "x": 18, - "y": 22 - }, - "id": 57, - "options": { - "cellHeight": "sm", - "footer": { - "countRows": false, - "fields": "", - "reducer": [ - "sum" - ], - "show": false + { + "matcher": { + "id": "byName", + "options": "Name" }, - "showHeader": true, - "sortBy": [ + "properties": [ { - "desc": false, - "displayName": "Name" + "id": "custom.width", + "value": 199 } ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 12, + "y": 75 + }, + "id": 56, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Name" + } + ] + }, + "pluginVersion": "v10.4.19+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" }, - "pluginVersion": "10.4.5", - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${ds_prometheus}" - }, - "editorMode": "code", - "exemplar": false, - "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase!=\"Attached\"}==1", - "format": "table", - "instant": true, - "legendFormat": "__auto", - "range": false, - "refId": "A" + "editorMode": "code", + "exemplar": false, + "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\"}==1", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" + } + ], + "title": "VMBDA ALL Phases", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "exported_namespace", + "name", + "phase" + ] } - ], - "title": "VMBDA NotAttached Phases", - "transformations": [ - { - "id": "merge", - "options": {} + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "exported_namespace": "Namespace", + "name": "Name", + "phase": "Phase" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "custom": { + "align": "left", + "cellOptions": { + "type": "auto" }, - { - "id": "filterFieldsByName", - "options": { - "include": { - "names": [ - "exported_namespace", - "name", - "phase" - ] - } + "inspect": false + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" } + ] + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Time" }, - { - "id": "organize", - "options": { - "excludeByName": {}, - "indexByName": {}, - "renameByName": { - "exported_namespace": "Namespace", - "name": "Name", - "phase": "Phase" - } + "properties": [ + { + "id": "custom.width", + "value": 377 } - } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 6, + "x": 18, + "y": 75 + }, + "id": 57, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" ], - "type": "table" + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Name" + } + ] + }, + "pluginVersion": "v10.4.19+security-01", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{namespace=~\"$namespace\",phase!=\"Attached\"}==1", + "format": "table", + "instant": true, + "legendFormat": "__auto", + "range": false, + "refId": "A" } ], - "title": "VMBDA", - "type": "row" + "title": "VMBDA NotAttached Phases", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "exported_namespace", + "name", + "phase" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": {}, + "renameByName": { + "exported_namespace": "Namespace", + "name": "Name", + "phase": "Phase" + } + } + } + ], + "type": "table" } ], - "refresh": "1m", + "refresh": "", "schemaVersion": 39, "tags": [], "templating": { @@ -2534,8 +2527,8 @@ spec: { "current": { "selected": false, - "text": "main", - "value": "P0D6E4079E36703EB" + "text": "default-metrics", + "value": "default-metrics" }, "hide": 0, "includeAll": false, @@ -2552,12 +2545,12 @@ spec: }, { "current": { - "selected": true, + "selected": false, "text": [ - "perf" + "All" ], "value": [ - "perf" + "$__all" ] }, "datasource": { @@ -2611,6 +2604,38 @@ spec: "skipUrlSync": false, "sort": 1, "type": "query" + }, + { + "current": { + "selected": false, + "text": [ + "e2e-15k-vms" + ], + "value": [ + "e2e-15k-vms" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "default-metrics" + }, + "definition": "label_values(dop_ha_cluster)", + "hide": 0, + "includeAll": false, + "label": "virt-cluster", + "multi": true, + "name": "cluster", + "options": [], + "query": { + "qryType": 1, + "query": "label_values(dop_ha_cluster)", + "refId": "PrometheusVariableQueryEditor-VariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "type": "query" } ] }, @@ -2620,9 +2645,9 @@ spec: }, "timepicker": {}, "timezone": "browser", - "title": "Virtualization", - "uid": "d10H1nfSz", - "version": 1, + "title": "Virtualization_old", + "uid": "d10H1nfSzDVP", + "version": 9, "weekStart": "" } folder: virtualization diff --git a/tests/performance/monitoring/virtualization-db-perf.json b/tests/performance/monitoring/virtualization-db-perf.json new file mode 100644 index 0000000000..59123b85bb --- /dev/null +++ b/tests/performance/monitoring/virtualization-db-perf.json @@ -0,0 +1,2275 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 71, + "panels": [], + "title": "Control plane", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 1 + }, + "id": 70, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by (namespace) (rate(container_cpu_usage_seconds_total{namespace=~\"(d8-.*|kube-system)\",container!=\"POD\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "CPU Usage by namespace", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 1 + }, + "id": 72, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by (namespace) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=~\"(d8-.*|kube-system)\", container!=\"POD\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "Memory usage by namespace", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 10 + }, + "id": 75, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "cores", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 11 + }, + "id": 74, + "maxPerRow": 6, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": false, + "sortBy": "Mean", + "sortDesc": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "repeat": "node", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by (namespace,node) (rate(container_cpu_usage_seconds_total{namespace=~\"(d8-.*|kube-system)\", node=\"$node\", container!=\"POD\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ namespace }}", + "range": true, + "refId": "A" + } + ], + "title": "$node", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + } + ], + "title": "CPU usage per node", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 11 + }, + "id": 83, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 30, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "links": [], + "mappings": [], + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "Total" + }, + "properties": [ + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.stacking", + "value": { + "group": false, + "mode": "none" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "color", + "value": { + "fixedColor": "yellow", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 12 + }, + "id": 84, + "maxPerRow": 6, + "options": { + "legend": { + "calcs": [ + "mean" + ], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "repeat": "node", + "repeatDirection": "h", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "sum by (namespace) (avg_over_time(container_memory_working_set_bytes:without_kmem{namespace=~\"(d8-.*|kube-system)\", node=~\"$node\", container!=\"POD\"}[$__rate_interval]))", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ namepsace }}", + "range": true, + "refId": "A" + } + ], + "title": "$node", + "transformations": [ + { + "id": "calculateField", + "options": {} + } + ], + "type": "timeseries" + } + ], + "title": "Memory usage per node", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 67, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 0, + "y": 13 + }, + "id": 68, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "netchecker_active_clients", + "format": "time_series", + "instant": true, + "legendFormat": "Active", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "netchecker_total_clients", + "hide": false, + "instant": true, + "legendFormat": "Total", + "range": false, + "refId": "B" + } + ], + "title": "Panel Title", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepBefore", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 22, + "x": 2, + "y": 13 + }, + "id": 69, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "avg(netchecker_active_clients)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "expr": "avg(netchecker_total_clients)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Total / Active clients", + "type": "timeseries" + } + ], + "title": "NetChecker", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 13 + }, + "id": 66, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [ + { + "options": { + "pattern": "phase==Running", + "result": { + "color": "purple", + "index": 1, + "text": "Running" + } + }, + "type": "regex" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 0, + "y": 14 + }, + "id": 45, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(d8_virtualization_virtualmachine_status_phase{namespace=~\"$namespace\"}) by (phase)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + } + ], + "title": "Count VMs", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 5, + "y": 14 + }, + "id": 65, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by (phase) ((kube_pod_status_phase{namespace=~\"$namespace\"} * on (pod) group_left(label_vm_kubevirt_internal_virtualization_deckhouse_io_name) kube_pod_labels{label_vm_kubevirt_internal_virtualization_deckhouse_io_name!=\"\"}))", + "hide": false, + "instant": false, + "legendFormat": "{{phase}}", + "range": true, + "refId": "A" + } + ], + "title": "Count VM Pods", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 10, + "y": 14 + }, + "id": 52, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum(kube_persistentvolumeclaim_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}) by (phase)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Count PVCs", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 5, + "x": 15, + "y": 14 + }, + "id": 100, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}) by (phase)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Count VirtualDISKs", + "type": "bargauge" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "green", + "value": 0 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 4, + "x": 20, + "y": 14 + }, + "id": 58, + "options": { + "displayMode": "gradient", + "maxVizHeight": 300, + "minVizHeight": 10, + "minVizWidth": 0, + "namePlacement": "auto", + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showUnfilled": true, + "sizing": "auto", + "valueMode": "color" + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\"}) by (phase)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Count VMBDAs", + "type": "bargauge" + } + ], + "title": "Common", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 38, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 63, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by (phase) ((kube_pod_status_phase{namespace=~\"$namespace\"} * on (pod) group_left(label_vm_kubevirt_internal_virtualization_deckhouse_io_name) kube_pod_labels{label_vm_kubevirt_internal_virtualization_deckhouse_io_name!=\"\"})==1)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "VM pods phases count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 15 + }, + "id": 64, + "options": { + "legend": { + "calcs": [], + "displayMode": "hidden", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "expr": "sum by (label_kubevirt_internal_virtualization_deckhouse_io_node_name) ((kube_pod_status_phase{namespace=~\"$namespace\"} * on (pod) group_left(label_kubevirt_internal_virtualization_deckhouse_io_node_name) kube_pod_labels{label_kubevirt_internal_virtualization_deckhouse_io_node_name!=\"\"})==1)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "VM pods by nodes count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 34, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "sum(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "VM phases count", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 25, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 59, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(d8_virtualization_virtualmachine_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\",phase=~\"Migrating|Running\"}==1) by (node)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + } + ], + "title": "VM by nodes count", + "type": "timeseries" + } + ], + "title": "VirtualMachines", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 42, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 46, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(d8_virtualization_virtualdisk_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "Count VirtualDISK Phases", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 99, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(kube_persistentvolumeclaim_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(kube_persistentvolumeclaim_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "Total", + "range": true, + "refId": "A" + } + ], + "title": "Count PVC Phases", + "type": "timeseries" + } + ], + "title": "VirtualDIsks", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 16 + }, + "id": 54, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "smooth", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "always", + "spanNulls": true, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 55, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "editorMode": "code", + "exemplar": false, + "expr": "count(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\", namespace=~\"$namespace\"}==1) by (phase)", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ phase }}", + "range": true, + "refId": "F" + } + ], + "title": "Count VMBDA Phases", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMax": 3, + "axisSoftMin": 1, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 3, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "area" + } + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 1, + "text": "Failed" + }, + "1": { + "color": "red", + "index": 2, + "text": "InProgress" + }, + "2": { + "color": "red", + "index": 3, + "text": "Attached" + }, + "3": { + "color": "red", + "index": 4, + "text": "-" + }, + "-1": { + "color": "red", + "index": 0, + "text": "-" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "light-red", + "value": -1 + }, + { + "color": "light-green", + "value": 0.5 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "8.5.13", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"Failed\"} > 0", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "F" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"InProgress\"} > 0) + 1", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"Attached\"} > 0) + 2", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "$ds_prometheus" + }, + "editorMode": "code", + "exemplar": false, + "expr": "(d8_virtualization_virtualmachineblockdeviceattachment_status_phase{prometheus!=\"deckhouse\",namespace=~\"$namespace\",phase=\"Pending\"} > 0) + 3", + "format": "time_series", + "hide": false, + "instant": false, + "legendFormat": "{{ name }}", + "range": true, + "refId": "C" + } + ], + "title": "VMBDA Phases TimeLine", + "type": "timeseries" + } + ], + "title": "VMBDA", + "type": "row" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [ + { + "current": { + "selected": false, + "text": "main", + "value": "P0D6E4079E36703EB" + }, + "hide": 0, + "includeAll": false, + "label": "datasource", + "multi": false, + "name": "ds_prometheus", + "options": [], + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "definition": "label_values(namespace)", + "hide": 0, + "includeAll": true, + "label": "Namespace:", + "multi": true, + "name": "namespace", + "options": [], + "query": { + "query": "label_values(namespace)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "${ds_prometheus}" + }, + "definition": "label_values(node_os_info,node)", + "hide": 1, + "includeAll": true, + "label": "Nodes:", + "multi": true, + "name": "node", + "options": [], + "query": { + "query": "label_values(node_os_info,node)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-30m", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Virtualization-perf", + "uid": "virtualizationperf", + "version": 3, + "weekStart": "" +} \ No newline at end of file diff --git a/tests/performance/new-custom-ceph-sc.sh b/tests/performance/new-custom-ceph-sc.sh new file mode 100755 index 0000000000..558d875ac3 --- /dev/null +++ b/tests/performance/new-custom-ceph-sc.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +kubectl delete validatingwebhookconfigurations.admissionregistration.k8s.io d8-csi-ceph-sc-validation +kubectl apply -f - < ${dir_virtualDisk}/report_vm_virtualDisk.txt + echo "Start time: $(formatted_date $start_time)" >> ${dir_virtualDisk}/report_vm_virtualDisk.txt + + while [[ $up == false ]]; do + VDReady=$(kubectl -n perf get vd | grep "Ready" | wc -l) + VDTotal=$(kubectl -n perf get vd -o name | wc -l) + + VMReady=$(kubectl -n perf get vm | grep "Running" | wc -l) + VMTotal=$(kubectl -n perf get vm -o name | wc -l) + + if [ $VDReady -eq $VDTotal ] && [ $VMReady -eq $VMTotal ]; then + up=true + echo "All vms and vds are ready" + end_time=$(date +%s) + break + fi + + echo "" + echo "Waiting for vms and vds to be ready..." + echo "VM Running: $VMReady/$VMTotal" + echo "VD Ready: $VDReady/$VDTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + done + + duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + echo "Duration: $duration seconds" + echo "Execution time: $formatted_duration" + + echo "Execution time: $formatted_duration" >> ${dir_virtualDisk}/report_vm_virtualDisk.txt + echo "End time: $(formatted_date $end_time)" >> ${dir_virtualDisk}/report_vm_virtualDisk.txt + + echo "" + +} + +gather_all_statistics() { + local report_dir=${1:-"report/statistics"} + task statistic:get-stat:all + + mv tools/statistic/*.csv ${report_dir} +} + +gather_vm_statistics() { + local report_dir=${1:-"report/statistics"} + task statistic:get-stat:vm + + mv tools/statistic/*.csv ${report_dir} +} + +gather_vd_statistics() { + local report_dir=${1:-"report/statistics"} + task statistic:get-stat:vd + + mv tools/statistic/*.csv ${report_dir} +} + +start_migration() { + # supoprt duration format: 0m - infinite, 30s - 30 seconds, 1h - 1 hour, 2h30m - 2 hours and 30 minutes + local duration=${1:-"5m"} + local SESSION="test-perf" + + echo "Create tmux session: $SESSION" + tmux -2 new-session -d -s "${SESSION}" + + tmux new-window -t "$SESSION:1" -n "perf" + tmux split-window -h -t 0 # Pane 0 (left), Pane 1 (right) + tmux split-window -v -t 1 # Pane 1 (top), Pane 2 (bottom) + + tmux select-pane -t 0 + tmux send-keys "k9s -n perf" C-m + tmux resize-pane -t 1 -x 50% + + echo "Start migration in $SESSION, pane 1" + tmux select-pane -t 1 + tmux send-keys "NS=perf TARGET=5 DURATION=${duration} task evicter:run:migration" C-m + tmux resize-pane -t 1 -x 50% + + tmux select-pane -t 2 + tmux resize-pane -t 2 -x 50% + echo "For watching migration in $SESSION, connect to session with command:" + echo "tmux -2 attach -t ${SESSION}" + + echo "" + +} + +stop_migration() { + local SESSION="test-perf" + tmux -2 kill-session -t "${SESSION}" || true +} + +restart_virtualization_components() { + kubectl -n d8-virtualization rollout restart deployment virtualization-controller +} + +collect_vpa() { + local vpa_dir="report/vpa" + mkdir -p ${vpa_dir} + local VPAS=( $(kubectl -n d8-virtualization get vpa -o name) ) + + for vpa in $VPAS; do + vpa_name=$(echo $vpa | cut -d "/" -f2) + file="vpa_${vpa_name}.yaml" + kubectl -n d8-virtualization get $vpa -o yaml > "${vpa_dir}/${file}_$(formatted_date $(date +%s))" + done +} + +# test only +for vitype in "containerRegistry" "persistentVolumeClaim"; do + cleanup_dir "report" +done + +# all resources +vi_type="containerRegistry" +migration_duration="1m" +main_sleep=$( echo "$migration_duration" | sed 's/m//' ) +count=5 + +# undeploy_vm +# sleep 5 +# deploy_resources "all" $vi_type $count +# sleep 1 +# start_migration "$migration_duration" +# sleep $(( $main_sleep * 60 )) +# stop_migration +# sleep 5 +# gather_all_statistics "report/statistics/vm_vi_${vi_type}" +# collect_vpa + + +vi_type="persistentVolumeClaim" +undeploy_vm +sleep 1 +deploy_resources "all" $vi_type $count +sleep 1 + +start_migration "$migration_duration" +sleep $(( $main_sleep * 60 )) +stop_migration +sleep 5 + +gather_all_statistics "report/statistics/vm_vi_${vi_type}" +collect_vpa + +# vd only diff --git a/tests/performance/scenarios/tst.sh b/tests/performance/scenarios/tst.sh new file mode 100755 index 0000000000..c8105cd66b --- /dev/null +++ b/tests/performance/scenarios/tst.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +echo "Tst return" + +exit_trap() { + echo "" + echo "Cleanup" + echo "Exiting..." + exit 0 +} + +trap exit_trap SIGINT SIGTERM + +while true; do + for i in {1..5}; do + echo "Iteration $i" + if [ $i -eq 3 ]; then + echo "$i = 3" + echo "Break" + return 0 + fi + done +done \ No newline at end of file diff --git a/tests/performance/templates/_cloud-config.tpl b/tests/performance/templates/_cloud-config.tpl index d8311ce8f7..b1ed7f3132 100644 --- a/tests/performance/templates/_cloud-config.tpl +++ b/tests/performance/templates/_cloud-config.tpl @@ -2,8 +2,8 @@ #cloud-config ssh_pwauth: true chpasswd: { expire: false } -user: ubuntu -password: ubuntu +user: {{ .Values.resources.virtualImage.spec.template.image.name }} +password: {{ .Values.resources.virtualImage.spec.template.image.name }} users: - name: cloud #cloud @@ -54,6 +54,59 @@ write_files: EOF +{{- if ne .Values.resources.virtualImage.spec.template.image.name "ubuntu" }} +packages: + - curl + - nginx +write_files: + - path: /usr/local/bin/curl-ping.sh + permissions: '0755' + content: | + #!/bin/sh + while true; do + /usr/bin/curl -s -f -H "X-Client-Name: $(hostname)" {{ .Values.curlUrl }} + sleep 3 + done >> /var/log/curl-ping.log 2>&1 + - path: /etc/init.d/curl-ping + permissions: '0755' + content: | + #!/sbin/openrc-run + name="curl-ping" + command="/usr/local/bin/curl-ping.sh" + command_args="" + pidfile="/run/curl-ping.pid" + + start() { + start-stop-daemon --start --background --make-pidfile --pidfile "$pidfile" --exec "$command" + } + + stop() { + start-stop-daemon --stop --pidfile "$pidfile" + } runcmd: + - rc-update add curl-ping default + - /etc/init.d/curl-ping start +{{- else }} + - path: /etc/systemd/system/curl-ping.service + permissions: "0755" + content: | + Unit] + Description=Curl Ping Service + After=network.target + + [Service] + Type=oneshot + ExecStart=/usr/bin/curl --connect-timeout 0.5 -H "X-Client-Name: $(hostname)" {{ .Values.curlUrl }} + RemainAfterExit=yes + Restart=on-failure + RestartSec=3 + + [Install] + WantedBy=multi-user.target +runcmd: + - systemctl daemon-reload + - systemctl enable curl-ping.service + - systemctl start curl-ping.service +{{- end }} - /usr/local/bin/generate.sh {{- end }} diff --git a/tests/performance/templates/vds.yaml b/tests/performance/templates/vds.yaml index 911192aef6..b885a4319d 100644 --- a/tests/performance/templates/vds.yaml +++ b/tests/performance/templates/vds.yaml @@ -1,13 +1,18 @@ {{- if or (eq .Values.resources.default "vds") (eq .Values.resources.default "all") }} +--- apiVersion: virtualization.deckhouse.io/v1alpha2 kind: VirtualImage metadata: - name: {{ $.Values.resources.prefix }} +{{- if $.Values.resources.virtualImage.annotation }} + annotations: + virtualization.deckhouse.io/use-volume-snapshot: "" +{{- end }} + name: {{ $.Values.resources.prefix }}-{{ $.Values.resources.virtualImage.spec.template.type | lower }} namespace: {{ $.Release.Namespace }} labels: vms: {{ $.Values.resources.prefix }} spec: -{{- if eq .Values.resources.virtualImage.spec.template.type "persistentVolumeClaim" }} +{{- if eq $.Values.resources.virtualImage.spec.template.type "persistentVolumeClaim" }} storage: PersistentVolumeClaim persistentVolumeClaim: storageClassName: {{ $.Values.resources.storageClassName }} @@ -22,6 +27,33 @@ spec: http: url: {{ $.Values.resources.virtualImage.spec.template.image.url }} {{- end }} +{{- $count := (.Values.resources.virtualImage.spec.count | int) }} +{{- range until $count }} +--- +apiVersion: virtualization.deckhouse.io/v1alpha2 +kind: VirtualImage +metadata: + name: {{ $.Values.resources.prefix }}-{{ $.Values.resources.virtualImage.spec.template.type | lower }}-{{ . }} + namespace: {{ $.Release.Namespace }} + labels: + vms: {{ $.Values.resources.prefix }} +spec: +{{- if eq $.Values.resources.virtualImage.spec.template.type "persistentVolumeClaim" }} + storage: PersistentVolumeClaim + persistentVolumeClaim: + storageClassName: {{ $.Values.resources.storageClassName }} + dataSource: + type: "HTTP" + http: + url: {{ $.Values.resources.virtualImage.spec.template.image.url }} +{{- else }} + storage: ContainerRegistry + dataSource: + type: "HTTP" + http: + url: {{ $.Values.resources.virtualImage.spec.template.image.url }} +{{- end }} +{{- end }} {{- if ne .Values.resources.virtualDisk.spec.template.type "virtualImage" }} {{- $count := (.Values.count | int) }} {{- range until $count }} @@ -29,7 +61,11 @@ spec: apiVersion: virtualization.deckhouse.io/v1alpha2 kind: VirtualDisk metadata: - name: {{ $.Values.resources.prefix }}-{{ . }} +{{- if $.Values.resources.virtualDisk.annotation }} + annotations: + virtualization.deckhouse.io/use-volume-snapshot: "" +{{- end }} + name: {{ printf "%s-%05d" $.Values.resources.prefix . }} namespace: {{ $.Release.Namespace }} labels: vms: {{ $.Values.resources.prefix }} @@ -43,7 +79,11 @@ spec: type: "ObjectRef" objectRef: kind: "VirtualImage" - name: {{ $.Values.resources.prefix }} + {{- if gt . ($.Values.start_count | int) }} + name: {{ $.Values.resources.prefix }}-{{ $.Values.resources.virtualImage.spec.template.type | lower }}-{{ mod . $.Values.resources.virtualImage.spec.count }} + {{- else }} + name: {{ $.Values.resources.prefix }}-{{ $.Values.resources.virtualImage.spec.template.type | lower }} + {{- end }} {{- end }} {{- end }} {{- end }} diff --git a/tests/performance/templates/vms.yaml b/tests/performance/templates/vms.yaml index 4118bf7d06..714baead4e 100644 --- a/tests/performance/templates/vms.yaml +++ b/tests/performance/templates/vms.yaml @@ -5,7 +5,7 @@ apiVersion: virtualization.deckhouse.io/v1alpha2 kind: VirtualMachine metadata: - name: {{ $.Values.resources.prefix }}-{{ . }} + name: {{ printf "%s-%05d" $.Values.resources.prefix . }} namespace: {{ $.Release.Namespace }} labels: vms: {{ $.Values.resources.prefix }} @@ -38,10 +38,10 @@ spec: blockDeviceRefs: {{- if eq $.Values.resources.virtualDisk.spec.template.type "virtualImage" }} - kind: VirtualImage - name: {{ $.Values.resources.prefix }} + name: {{ $.Values.resources.prefix }}-{{ $.Values.resources.virtualImage.spec.template.type | lower }}-{{ mod . $.Values.resources.virtualImage.spec.count }} {{- else }} - kind: VirtualDisk - name: {{ $.Values.resources.prefix }}-{{ . }} + name: {{ printf "%s-%05d" $.Values.resources.prefix . }} {{- end }} provisioning: type: UserDataRef diff --git a/tests/performance/tests.sh b/tests/performance/tests.sh new file mode 100755 index 0000000000..8f7ba525c6 --- /dev/null +++ b/tests/performance/tests.sh @@ -0,0 +1,2652 @@ +#!/usr/bin/env bash + +set -eEo pipefail +# set -x + +# Parse command line arguments +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + -s|--scenario) + SCENARIO_NUMBER="$2" + shift 2 + ;; + -c|--count) + MAIN_COUNT_RESOURCES="$2" + shift 2 + ;; + --batch-size) + MAX_BATCH_SIZE="$2" + shift 2 + ;; + --enable-batch) + BATCH_DEPLOYMENT_ENABLED=true + shift + ;; + --bootstrap-only) + BOOTSTRAP_ONLY=true + shift + ;; + --continue) + CONTINUE_AFTER_BOOTSTRAP=true + shift + ;; + --skip-cleanup) + SKIP_CLEANUP=true + shift + ;; + --keep-resources) + KEEP_RESOURCES=true + shift + ;; + --clean-reports) + CLEAN_REPORTS=true + shift + ;; + -h|--help) + show_help + exit 0 + ;; + *) + echo "Unknown option: $1" + show_help + exit 1 + ;; + esac + done +} + +show_help() { + cat << EOF +Usage: $0 [OPTIONS] + +Performance testing script for Kubernetes Virtual Machines + +OPTIONS: + -s, --scenario NUMBER Scenario number to run (1 or 2, default: 1) + -c, --count NUMBER Number of resources to create (default: 2) + --batch-size NUMBER Maximum resources per batch (default: 1200) + --enable-batch Force batch deployment mode + --bootstrap-only Only deploy resources, skip tests + --continue Continue tests after bootstrap (use with --bootstrap-only) + --keep-resources Keep resources after tests (don't cleanup) + --clean-reports Clean all report directories before running + -h, --help Show this help message + +EXAMPLES: + $0 # Run scenario 1 with 2 resources (default) + $0 -s 1 -c 4 # Run scenario 1 with 4 resources + $0 -s 2 -c 10 # Run scenario 2 with 10 resources + $0 --scenario 1 --count 6 # Run scenario 1 with 6 resources + $0 -c 15000 --batch-size 1200 # Deploy 15000 resources in batches of 1200 + $0 --bootstrap-only -c 1000 # Only deploy 1000 resources, skip tests + $0 --continue -c 1000 # Continue tests after bootstrap + $0 --keep-resources # Keep resources after tests (don't cleanup) + $0 --clean-reports # Clean all reports and run default scenario + +SCENARIOS: + 1 - persistentVolumeClaim (default) + 2 - containerRegistry (currently disabled) + +BATCH DEPLOYMENT: + For large deployments (>1200 resources), the script automatically uses batch deployment. + Each batch deploys up to 1200 resources with 30-second delays between batches. + Use --batch-size to customize batch size and --enable-batch to force batch mode. + +DEPLOYMENT CONTROL: + --bootstrap-only: Only deploy resources, skip all tests (useful for large deployments) + --continue: Continue tests after bootstrap (use with --bootstrap-only) + --keep-resources: Keep resources after tests (don't cleanup at the end) + + Workflow examples: + 1. Deploy only: ./tests.sh --bootstrap-only -c 5000 + 2. Continue tests: ./tests.sh --continue -c 5000 + 3. Keep resources: ./tests.sh --keep-resources -c 1000 + +EOF +} + +# Detect operating system +detect_os() { + if [[ "$OSTYPE" == "darwin"* ]] || [[ "$(uname)" == "Darwin" ]]; then + echo "macOS" + elif [[ "$OSTYPE" == "linux-gnu"* ]] || [[ "$(uname)" == "Linux" ]]; then + echo "Linux" + else + echo "Unknown" + fi +} + +# Set OS-specific variables +OS_TYPE=$(detect_os) + +# Global configuration +NAMESPACE="perf" +STORAGE_CLASS="" +VI_TYPE="persistentVolumeClaim" # containerRegistry, persistentVolumeClaim +COUNT=2 +SLEEP_TIME=5 +REPORT_DIR="report" +MIGRATION_DURATION="5m" +MIGRATION_PERCENTAGE=10 +ACTIVE_CLUSTER_PERCENTAGE=90 +CONTROLLER_NAMESPACE="d8-virtualization" +# Store original controller replicas count +ORIGINAL_CONTROLLER_REPLICAS="" +# Centralized logging +ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS="" +LOG_FILE="" +CURRENT_SCENARIO="" +VM_OPERATIONS_LOG="" + +# Large scale deployment configuration +MAX_BATCH_SIZE=1200 # Maximum resources per batch +TOTAL_TARGET_RESOURCES=15000 # Total target resources +BATCH_DEPLOYMENT_ENABLED=false # Enable batch deployment for large numbers + +# New deployment control options +BOOTSTRAP_ONLY=false # Only deploy resources, skip tests +CONTINUE_AFTER_BOOTSTRAP=false # Continue tests after bootstrap +SKIP_CLEANUP=false +KEEP_RESOURCES=false # Keep resources after tests (don't cleanup) + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +log_info() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${BLUE}[INFO]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [INFO] $message" >> "$LOG_FILE" + fi +} + +log_success() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${GREEN}[SUCCESS]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [SUCCESS] $message" >> "$LOG_FILE" + fi +} + +log_warning() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${YELLOW}[WARNING]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [WARNING] $message" >> "$LOG_FILE" + fi +} + +log_error() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${RED}[ERROR]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [ERROR] $message" >> "$LOG_FILE" + fi +} + +# VM Operations logging functions +log_vm_operation() { + local message="$1" + local timestamp=$(get_current_date) + if [ -n "$VM_OPERATIONS_LOG" ]; then + echo "[$timestamp] [VM_OP] $message" >> "$VM_OPERATIONS_LOG" + fi +} + +log_vmop_operation() { + local message="$1" + local timestamp=$(get_current_date) + if [ -n "$VM_OPERATIONS_LOG" ]; then + echo "[$timestamp] [VMOP] $message" >> "$VM_OPERATIONS_LOG" + fi +} + +# Function to log duration details to file +log_duration() { + local step_name="$1" + local duration="$2" + local timestamp=$(get_current_date) + local formatted_duration=$(format_duration "$duration") + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [DURATION] $step_name: $formatted_duration" >> "$LOG_FILE" + fi +} + +# Function to log step start with timestamp +log_step_start() { + local step_name="$1" + local timestamp=$(get_current_date) + echo -e "${CYAN}[STEP_START] $step_name${NC}" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [STEP_START] $step_name" >> "$LOG_FILE" + fi +} + +# Function to log step end with duration +log_step_end() { + local step_name="$1" + local duration="$2" + local timestamp=$(get_current_date) + local formatted_duration=$(format_duration "$duration") + echo -e "${CYAN}[STEP_END] $step_name${NC}" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [STEP_END] $step_name completed in $formatted_duration" >> "$LOG_FILE" + fi +} + +exit_trap() { + echo "" + echo "Cleanup" + echo "Exiting..." + exit 0 +} + +format_duration() { + local total_seconds=$1 + local hours=$((total_seconds / 3600)) + local minutes=$(( (total_seconds % 3600) / 60 )) + local seconds=$((total_seconds % 60)) + printf "%02d:%02d:%02d\n" "$hours" "$minutes" "$seconds" +} + +formatted_date() { + local timestamp="$1" + + # Check if timestamp is valid (not empty and is a number) + if [ -z "$timestamp" ] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then + # Use current time if timestamp is invalid + date +"%H:%M:%S %d-%m-%Y" + return + fi + + # Use OS-specific date command + case "$OS_TYPE" in + "macOS") + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + "Linux") + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + *) + # Fallback - try both methods + if date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # macOS style worked + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" + elif date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # Linux style worked + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" + else + # Last resort - use current time + date +"%H:%M:%S %d-%m-%Y" + fi + ;; + esac +} + +get_current_date() { + date +"%H:%M:%S %d-%m-%Y" +} + +get_timestamp() { + date +%s +} + +trap exit_trap SIGINT SIGTERM + +get_default_storage_class() { + if [ -n "${STORAGE_CLASS:-}" ]; then + echo "$STORAGE_CLASS" + else + kubectl get storageclass -o json \ + | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name' + fi +} + + +# Centralized logging functions +init_logging() { + local scenario_name=$1 + local vi_type=$2 + local count=$3 + local timestamp=$(date +"%Y%m%d_%H%M%S") + local scenario_dir="$REPORT_DIR/${scenario_name}_${vi_type}_${count}vm_${timestamp}" + LOG_FILE="$scenario_dir/test.log" + VM_OPERATIONS_LOG="$scenario_dir/vm_operations.log" + CURRENT_SCENARIO="${scenario_name}_${vi_type}_${count}vm_${timestamp}" + mkdir -p "$(dirname "$LOG_FILE")" + echo "=== Test started at $(get_current_date) ===" > "$LOG_FILE" + echo "=== VM Operations Report started at $(get_current_date) ===" > "$VM_OPERATIONS_LOG" +} + + +# Function to calculate percentage safely +calculate_percentage() { + local duration="$1" + local total="$2" + + # Check if values are valid numbers and not zero + if [[ -z "$duration" || -z "$total" || "$duration" -eq 0 || "$total" -eq 0 ]]; then + echo "0.0" + return + fi + + # Use bc with error handling + local result=$(echo "scale=1; $duration * 100 / $total" | bc 2>/dev/null || echo "0.0") + echo "$result" +} + +# Function to create summary report +create_summary_report() { + local scenario_name="$1" + local vi_type="$2" + local scenario_dir="$3" + local start_time="$4" + local end_time="$5" + local total_duration="$6" + local cleanup_duration="${7:-0}" + local deploy_duration="${8:-0}" + local stats_duration="${9:-0}" + local stop_duration="${10:-0}" + local start_vm_duration="${11:-0}" + local undeploy_duration="${12:-0}" + local deploy_remaining_duration="${13:-0}" + local vm_stats_duration="${14:-0}" + local vm_ops_duration="${15:-0}" + local vm_ops_stop_duration="${16:-0}" + local vm_ops_start_duration="${17:-0}" + local migration_duration="${18:-0}" + local cleanup_ops_duration="${19:-0}" + local migration_percent_duration="${20:-0}" + local controller_duration="${21:-0}" + local final_stats_duration="${22:-0}" + local drain_stats_duration="${23:-0}" + local final_cleanup_duration="${24:-0}" + local migration_parallel_2x_duration="${25:-0}" + local migration_parallel_4x_duration="${26:-0}" + local migration_parallel_8x_duration="${27:-0}" + + local summary_file="$scenario_dir/summary.txt" + + # Calculate percentages safely + local cleanup_percent=$(calculate_percentage "$cleanup_duration" "$total_duration") + local deploy_percent=$(calculate_percentage "$deploy_duration" "$total_duration") + local stats_percent=$(calculate_percentage "$stats_duration" "$total_duration") + local stop_percent=$(calculate_percentage "$stop_duration" "$total_duration") + local start_vm_percent=$(calculate_percentage "$start_vm_duration" "$total_duration") + local undeploy_percent=$(calculate_percentage "$undeploy_duration" "$total_duration") + local deploy_remaining_percent=$(calculate_percentage "$deploy_remaining_duration" "$total_duration") + local vm_stats_percent=$(calculate_percentage "$vm_stats_duration" "$total_duration") + local vm_ops_percent=$(calculate_percentage "$vm_ops_duration" "$total_duration") + local vm_ops_stop_percent=$(calculate_percentage "$vm_ops_stop_duration" "$total_duration") + local vm_ops_start_percent=$(calculate_percentage "$vm_ops_start_duration" "$total_duration") + local migration_percent=$(calculate_percentage "$migration_duration" "$total_duration") + local cleanup_ops_percent=$(calculate_percentage "$cleanup_ops_duration" "$total_duration") + local migration_percent_percent=$(calculate_percentage "$migration_percent_duration" "$total_duration") + local controller_percent=$(calculate_percentage "$controller_duration" "$total_duration") + local final_stats_percent=$(calculate_percentage "$final_stats_duration" "$total_duration") + local drain_stats_percent=$(calculate_percentage "$drain_stats_duration" "$total_duration") + local final_cleanup_percent=$(calculate_percentage "$final_cleanup_duration" "$total_duration") + local migration_parallel_2x_percent=$(calculate_percentage "$migration_parallel_2x_duration" "$total_duration") + local migration_parallel_4x_percent=$(calculate_percentage "$migration_parallel_4x_duration" "$total_duration") + local migration_parallel_8x_percent=$(calculate_percentage "$migration_parallel_8x_duration" "$total_duration") + + cat > "$summary_file" << EOF +================================================================================ + PERFORMANCE TEST SUMMARY REPORT +================================================================================ + +Scenario: $scenario_name +Virtual Image Type: $vi_type +Test Date: $(formatted_date $start_time) +Duration: $(format_duration $total_duration) + +================================================================================ + EXECUTION TIMELINE +================================================================================ + +Start Time: $(formatted_date $start_time) +End Time: $(formatted_date $end_time) +Total Duration: $(format_duration $total_duration) + +================================================================================ + STEP DURATION BREAKDOWN +================================================================================ + +$(printf "%-55s %10s %10s\n" "Phase" "Duration" "Percentage") +$(printf "%-55s %10s %10s\n" "-------------------------------------------------------" "----------" "----------") +$(printf "%-55s %10s %10s\n" "Cleanup" "$(format_duration $cleanup_duration)" "$(printf "%5.1f" $cleanup_percent)%") +$(printf "%-55s %10s %10s\n" "Deploy VMs [$MAIN_COUNT_RESOURCES]" "$(format_duration $deploy_duration)" "$(printf "%5.1f" $deploy_percent)%") +$(printf "%-55s %10s %10s\n" "Statistics Collection" "$(format_duration $stats_duration)" "$(printf "%5.1f" $stats_percent)%") +$(printf "%-55s %10s %10s\n" "VM Stop [$MAIN_COUNT_RESOURCES]" "$(format_duration $stop_duration)" "$(printf "%5.1f" $stop_percent)%") +$(printf "%-55s %10s %10s\n" "VM Start [$MAIN_COUNT_RESOURCES]" "$(format_duration $start_vm_duration)" "$(printf "%5.1f" $start_vm_percent)%") +$(printf "%-55s %10s %10s\n" "VM Undeploy 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$(format_duration $undeploy_duration)" "$(printf "%5.1f" $undeploy_percent)%") +$(printf "%-55s %10s %10s\n" "Deploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$(format_duration $deploy_remaining_duration)" "$(printf "%5.1f" $deploy_remaining_percent)%") +$(printf "%-55s %10s %10s\n" "VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" "$(format_duration $vm_stats_duration)" "$(printf "%5.1f" $vm_stats_percent)%") +$(printf "%-55s %10s %10s\n" "Migration Setup (${MIGRATION_PERCENTAGE_5}% - ${MIGRATION_5_COUNT} VMs)" "$(format_duration $migration_duration)" "$(printf "%5.1f" $migration_percent)%") +$(printf "%-55s %10s %10s\n" "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" "$(format_duration $vm_ops_stop_duration)" "$(printf "%5.1f" $vm_ops_stop_percent)%") +$(printf "%-55s %10s %10s\n" "VM Operations: Start VMs [$PERCENT_RESOURCES]" "$(format_duration $vm_ops_start_duration)" "$(printf "%5.1f" $vm_ops_start_percent)%") +$(printf "%-55s %10s %10s\n" "Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" "$(format_duration $cleanup_ops_duration)" "$(printf "%5.1f" $cleanup_ops_percent)%") +$(printf "%-55s %10s %10s\n" "Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" "$(format_duration $migration_percent_duration)" "$(printf "%5.1f" $migration_percent_percent)%") +$(printf "%-55s %10s %10s\n" "Migration parallelMigrationsPerCluster 2x nodes" "$(format_duration $migration_parallel_2x_duration)" "$(printf "%5.1f" $migration_parallel_2x_percent)%") +$(printf "%-55s %10s %10s\n" "Migration parallelMigrationsPerCluster 4x nodes" "$(format_duration $migration_parallel_4x_duration)" "$(printf "%5.1f" $migration_parallel_4x_percent)%") +$(printf "%-55s %10s %10s\n" "Migration parallelMigrationsPerCluster 8x nodes" "$(format_duration $migration_parallel_8x_duration)" "$(printf "%5.1f" $migration_parallel_8x_percent)%") +$(printf "%-55s %10s %10s\n" "Controller Restart" "$(format_duration $controller_duration)" "$(printf "%5.1f" $controller_percent)%") +$(printf "%-55s %10s %10s\n" "Final Statistics" "$(format_duration $final_stats_duration)" "$(printf "%5.1f" $final_stats_percent)%") +$(printf "%-55s %10s %10s\n" "Drain node" "$(format_duration $drain_stats_duration)" "$(printf "%5.1f" $drain_stats_percent)%") +$(printf "%-55s %10s %10s\n" "Final Cleanup" "$(format_duration $final_cleanup_duration)" "$(printf "%5.1f" $final_cleanup_percent)%") + +================================================================================ + PERFORMANCE METRICS +================================================================================ + +$(printf "%-25s %10s\n" "Total VMs Tested:" "$MAIN_COUNT_RESOURCES") +$(printf "%-25s %10s\n" "VM Deployment Time:" "$(format_duration $deploy_duration)") +$(printf "%-25s %10s\n" "VM Stop Time:" "$(format_duration $stop_duration)") +$(printf "%-25s %10s\n" "VM Start Time:" "$(format_duration $start_vm_duration)") +$(printf "%-25s %10s\n" "Controller Restart Time:" "$(format_duration $controller_duration)") +$(printf "%-25s %10s\n" "Migration 5% Time:" "$(format_duration $migration_duration)") +$(printf "%-25s %10s\n" "Migration 10% Time:" "$(format_duration $migration_percent_duration)") +$(printf "%-25s %10s\n" "Drain Node Time:" "$(format_duration $drain_stats_duration)") +================================================================================ + FILES GENERATED +================================================================================ + +$(printf "%-25s %s\n" "Log File:" "$scenario_dir/test.log") +$(printf "%-25s %s\n" "VM Operations Log:" "$scenario_dir/vm_operations.log") +$(printf "%-25s %s\n" "Statistics Directory:" "$scenario_dir/statistics/") +$(printf "%-25s %s\n" "VPA Data Directory:" "$scenario_dir/vpa/") +$(printf "%-25s %s\n" "Summary Report:" "$scenario_dir/summary.txt") + +================================================================================ +EOF + + log_info "Summary report created: $summary_file" +} + + +create_report_dir() { + local scenario_name=$1 + local vi_type=$2 + local count=$3 + local timestamp=$(date +"%Y%m%d_%H%M%S") + local base_dir="$REPORT_DIR/${scenario_name}_${vi_type}_${count}vm_${timestamp}" + mkdir -p "$base_dir/statistics" + mkdir -p "$base_dir/vpa" + echo "$base_dir" +} + +remove_report_dir() { + local dir=${1:-$REPORT_DIR} + rm -rf $dir +} + +clean_all_reports() { + if [ -d "$REPORT_DIR" ]; then + log_info "Cleaning all report directories in $REPORT_DIR" + rm -rf "$REPORT_DIR"/* + log_success "All report directories cleaned" + else + log_info "Report directory $REPORT_DIR does not exist, nothing to clean" + fi +} + +gather_all_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Gathering all statistics to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:all NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:all completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:all" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "All statistics gathering completed in $(format_duration $duration)" + log_success "All statistics gathered" +} + +gather_vm_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Gathering VM statistics to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:vm NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:vm completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:vm" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "VM statistics gathering completed in $(format_duration $duration)" + log_success "VM statistics gathered" +} + +gather_vd_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Gathering VD statistics to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:vd NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:vd completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:vd" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "VD statistics gathering completed in $(format_duration $duration)" + log_success "VD statistics gathered" +} + + +gather_specific_vm_statistics() { + local report_dir=${1:-"$REPORT_DIR/statistics"} + local namespace=${2:-$NAMESPACE} + local vm_count=${3:-0} + local start_time=$(get_timestamp) + + log_info "Gathering statistics for specific VMs (count: $vm_count) to $report_dir" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task statistic:get-stat:vm NAMESPACE=$namespace OUTPUT_DIR=$(realpath $report_dir) VM_COUNT=$vm_count + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task statistic:get-stat:vm for specific VMs completed in $(format_duration $task_duration)" + log_duration "Task statistic:get-stat:vm specific" "$task_duration" + + mv tools/statistic/*.csv ${report_dir} 2>/dev/null || true + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "Specific VM statistics gathering completed in $(format_duration $duration)" + log_success "Specific VM statistics gathered" +} + +collect_vpa() { + local scenario_dir=$1 + local vpa_dir="$scenario_dir/vpa" + local start_time=$(get_timestamp) + + mkdir -p ${vpa_dir} + log_info "Collecting VPA data to $vpa_dir" + log_info "Start time: $(formatted_date $start_time)" + + local list_start=$(get_timestamp) + local VPAS=( $(kubectl -n d8-virtualization get vpa -o name 2>/dev/null || true) ) + local list_end=$(get_timestamp) + local list_duration=$((list_end - list_start)) + log_info "VPA list retrieval completed in $(format_duration $list_duration)" + log_duration "VPA list retrieval" "$list_duration" + + if [ ${#VPAS[@]} -eq 0 ]; then + log_warning "No VPA resources found" + return + fi + + local collect_start=$(get_timestamp) + for vpa in "${VPAS[@]}"; do + vpa_name=$(echo $vpa | cut -d "/" -f2) + file="vpa_${vpa_name}.yaml" + kubectl -n d8-virtualization get $vpa -o yaml > "${vpa_dir}/${file}_$(formatted_date $(get_timestamp))" 2>/dev/null || true + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + local collect_end=$(get_timestamp) + local collect_duration=$((collect_end - collect_start)) + log_info "VPA data collection completed in $(format_duration $collect_duration)" + log_duration "VPA data collection" "$collect_duration" + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "VPA collection completed in $(format_duration $duration)" + log_success "VPA data collected" +} + +wait_vm_vd() { + local sleep_time=${1:-10} + + while true; do + local VDReady=$(kubectl -n $NAMESPACE get vd | grep "Ready" | wc -l) + local VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + + local VMReady=$(kubectl -n $NAMESPACE get vm | grep "Running" | wc -l) + local VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + + if [ $VDReady -eq $VDTotal ] && [ $VMReady -eq $VMTotal ]; then + echo "All vms and vds are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vms and vds to be ready..." + echo "VM Running: $VMReady/$VMTotal" + echo "VD Ready: $VDReady/$VDTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_vm() { + local sleep_time=${1:-10} + local expected_count=$2 + local VMTotal + local VMRunning + + while true; do + VMRunning=$(kubectl -n $NAMESPACE get vm | grep -c "Running" || echo 0 ) + + if [ -n "$expected_count" ]; then + VMTotal=$expected_count + else + VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + fi + + if [ $VMRunning -eq $VMTotal ]; then + echo "All vms are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vms to be running..." + echo "VM Running: $VMRunning/$VMTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_vd() { + local sleep_time=${1:-10} + local expected_count=$2 + local VDReady + local VDTotal + + while true; do + VDReady=$(kubectl -n $NAMESPACE get vd | grep "Ready" | wc -l) + + if [ -n "$expected_count" ]; then + VDTotal=$expected_count + else + VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + fi + + if [ $VDReady -eq $VDTotal ]; then + echo "All vds are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vds to be ready..." + echo "VD ready: $VDReady/$VDTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_for_resources() { + local resource_type=$1 + local expected_count=$2 + local start_time=$(get_timestamp) + local check_interval=5 # seconds + + case $resource_type in + "all") + log_info "Waiting for VMs and VDs to be ready" + wait_vm_vd $check_interval + ;; + "vm") + log_info "Waiting for VMs to be ready" + wait_vm $check_interval $expected_count + ;; + "vd") + log_info "Waiting for VDs to be ready" + wait_vd $check_interval $expected_count + ;; + *) + log_error "Unknown resource type: $resource_type" + return 1 + ;; + esac + +} + +start_migration_old() { + # supoprt duration format: 0m - infinite, 30s - 30 seconds, 1h - 1 hour, 2h30m - 2 hours and 30 minutes + local duration=${1:-"0m"} + local target=${2:-"5"} + local session="test-perf" + + echo "Create tmux session: $session" + tmux -2 new-session -d -s "${session}" + + tmux new-window -t "$session:1" -n "$NAMESPACE" + tmux split-window -h -t 0 # Pane 0 (left), Pane 1 (right) + tmux split-window -v -t 1 # Pane 1 (top), Pane 2 (bottom) + + tmux select-pane -t 0 + tmux send-keys "k9s -n $NAMESPACE" C-m + tmux resize-pane -t 1 -x 50% + + echo "Start migration in $session, pane 1" + tmux select-pane -t 1 + tmux send-keys "NS=$NAMESPACE TARGET=${target} DURATION=${duration} task evicter:run:migration" C-m + tmux resize-pane -t 1 -x 50% + + tmux select-pane -t 2 + tmux resize-pane -t 2 -x 50% + echo "For watching migration in $session, connect to session with command:" + echo "tmux -2 attach -t ${session}" + + echo "" + +} + +start_migration() { + local duration=${1:-"0m"} + local target=${2:-"5"} + local session="test-perf" + local ns="${NAMESPACE:-perf}" + + echo "Create tmux session: $session" + tmux new-session -d -s "${session}" -n "${ns}" # windows named "ns" + + # split window + tmux split-window -h -t "${session}:0" # Pane 0 (left), Pane 1 (right) + tmux split-window -v -t "${session}:0.1" # Split right pane; .1 + + # 3) Посылаем команды в нужные панели явно + tmux select-pane -t "${session}:0.0" + tmux send-keys -t "${session}:0.0" "k9s -n ${ns}" C-m + tmux resize-pane -t "${session}:0.1" -x 50% + + echo "Start migration in $session, pane 1" + tmux select-pane -t "${session}:0.1" + tmux send-keys -t "${session}:0.1" "NS=${ns} TARGET=${target} DURATION=${duration} task evicter:run:migration" C-m + tmux resize-pane -t "${session}:0.1" -x 50% + + tmux select-pane -t "${session}:0.2" + tmux resize-pane -t "${session}:0.2" -x 50% + + echo "For watching migration in $session, attach with:" + echo "tmux -2 attach -t ${session}" + + # Optional + # if [ -n "${TMUX:-}" ]; then + # tmux switch-client -t "${session}" # switch client to created session inside tmux + # else + # tmux -2 attach -t "${session}" # from bash tmux — just attach to created session + # fi +} + + +stop_migration() { + local SESSION="test-perf" + tmux send-keys -t "${SESSION}:1.1" C-c || true + sleep 1 + tmux -2 kill-session -t "${SESSION}" || true +} + +wait_migration() { + local timeout=${1:-"5m"} + local wait_migration=$( echo "$timeout" | sed 's/m//' ) + local start_time=$(get_timestamp) + + log_info "Waiting for migration to complete" + log_info "Duration: $timeout minutes" + + while true; do + current_time=$(get_timestamp) + duration=$((current_time - start_time)) + if [ $duration -ge $(( $wait_migration*60 )) ]; then + log_info "Migration timeout reached, stopping migrator" + stop_migration + log_success "Migration completed" + break + fi + log_info "Waiting for migration to complete" + log_info "Duration: $duration seconds from $(( $WAIT_MIGRATION*60 ))" + sleep 1 + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +# NEW: Wait for migration completion before proceeding +wait_migration_completion() { + local start_time=$(get_timestamp) + + log_info "Waiting for migration to complete" + log_vmop_operation "Waiting for migration to complete" + + # Wait for all vmops to complete + wait_vmops_complete + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + log_info "Migration completion wait finished in $(format_duration $duration)" + log_vmop_operation "Migration completion wait finished in $(format_duration $duration)" +} + +remove_vmops() { + local namespace=${1:-$NAMESPACE} + + while true; do + log_info "Check if all vmops are removed" + log_vmop_operation "Checking vmops for removal" + local vmop_total=$(( $(kubectl -n $namespace get vmop | wc -l )-1 )) + local vmop_list=$(kubectl -n $namespace get vmop | grep "Completed" | awk '{print $1}') + local vmop_failed_list=$(kubectl -n $namespace get vmop | grep "Failed" | awk '{print $1}') + log_warning "VMOP failed list: $vmop_failed_list" + log_vmop_operation "VMOP failed list: $vmop_failed_list" + + vmop_list+=" $vmop_failed_list" + + log_info "VMOP total: $( if [ $vmop_total -le 0 ]; then echo "0"; else echo $vmop_total; fi )" + log_vmop_operation "VMOP total: $( if [ $vmop_total -le 0 ]; then echo "0"; else echo $vmop_total; fi )" + if [ $vmop_total -le 0 ]; then + log_success "All vmops are removed" + log_vmop_operation "All vmops are removed" + break + fi + + for vmop in $vmop_list; do + kubectl -n $namespace delete vmop $vmop --wait=false || true + log_vmop_operation "Deleted vmop: $vmop" + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + log_info "Wait 2 sec" + sleep 2 + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_vmops() { + local sleep_time=${1:-2} + + while true; do + local VMOPInProgress=$(kubectl -n $NAMESPACE get vmop | grep "InProgress" | wc -l) + + if [ $VMOPInProgress -eq 0 ]; then + echo "All vmops are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vmops to be ready..." + echo "VMOP InProgress: $VMOPInProgress" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +wait_ptc_vmops_complete() { + local sleep_time=${1:-2} + local target_count=${2:-$MIGRATION_10_COUNT} + local start_time=$(get_timestamp) + local migrated_vms=0 + + local vms_count=( $(kubectl -n $NAMESPACE get vm --no-headers | awk '{print $1}' | tail -n $target_count) ) + + while true; do + migrated_vms=0 + + for vm in "${vms_count[@]}"; do + completed=$(kubectl get vmop -n "$NAMESPACE" -o json | \ + jq -r --arg vm "$vm" ' + .items[] | + select( + .spec.virtualMachineName == $vm and + ( + (.status.phase == "Completed") or + (any(.status.conditions[]; .type == "Completed" and .status == "True")) + ) + ) | + .metadata.name + ' | head -n1 + ) + if [ -n "$completed" ]; then + ((migrated_vms++)) + fi + done + + if [ $migrated_vms -eq $target_count ]; then + break + fi + log_info "Migration progress: $migrated_vms/$target_count" + sleep "$sleep_time" + done +} + + +# Wait for vmops to complete (including Failed status) and check VMs are Running +wait_vmops_complete() { + local sleep_time=${1:-2} + local start_time=$(get_timestamp) + + while true; do + local vmop_total=$(( $(kubectl -n $NAMESPACE get vmop | wc -l)-1 )) + local VMOPCompleted=$(kubectl -n $NAMESPACE get vmop | grep "Completed" | wc -l) + local VMOPFailed=$(kubectl -n $NAMESPACE get vmop | grep "Failed" | wc -l) + local VMOPInProgress=$(kubectl -n $NAMESPACE get vmop | grep "InProgress" | wc -l) + + if [ $vmop_total -eq -1 ]; then + vmop_total=0 + fi + + # Consider vmops complete if they are either Completed or Failed (not InProgress) + local VMOPFinished=$((VMOPCompleted + VMOPFailed)) + + if [[ "$VMOPFinished" -eq "$vmop_total" ]] && [[ "$VMOPInProgress" -eq 0 ]]; then + # Additional check: ensure all VMs are Running + local VMRunning=$(kubectl -n $NAMESPACE get vm | grep "Running" | wc -l) + local VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + + if [ $VMRunning -eq $VMTotal ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + + log_info "VMOPs completed - Duration: $duration seconds" + log_info "Execution time: $formatted_duration" + log_info "Completed: $VMOPCompleted, Failed: $VMOPFailed, Total: $vmop_total" + log_info "All VMs are Running: $VMRunning/$VMTotal" + log_vmop_operation "VMOPs completed - Duration: $duration seconds" + log_vmop_operation "Completed: $VMOPCompleted, Failed: $VMOPFailed, Total: $vmop_total" + log_vmop_operation "All VMs are Running: $VMRunning/$VMTotal" + break + else + log_info "VMOPs finished but VMs not all Running yet: $VMRunning/$VMTotal" + log_vmop_operation "VMOPs finished but VMs not all Running yet: $VMRunning/$VMTotal" + fi + fi + + log_info "Waiting for vmops to be ready... Completed: $VMOPCompleted, Failed: $VMOPFailed, InProgress: $VMOPInProgress, Total: $vmop_total" + log_vmop_operation "Waiting for vmops to be ready... Completed: $VMOPCompleted, Failed: $VMOPFailed, InProgress: $VMOPInProgress, Total: $vmop_total" + sleep $sleep_time + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +stop_vm() { + local count=$1 + local sleep_time=${2:-5} + local start_time=$(get_timestamp) + local stopped_vm + + if [ -z "$count" ]; then + local vms=($(kubectl -n $NAMESPACE get vm | grep "Running" | awk '{print $1}')) + else + # Stop vm from the end + local vms=($(kubectl -n $NAMESPACE get vm | grep "Running" | awk '{print $1}' | tail -n $count)) + fi + + if [ ${#vms[@]} -eq 0 ]; then + log_warning "No running VMs found to stop" + echo "0" + return 0 + fi + + log_info "Stopping ${#vms[@]} VMs" + log_vm_operation "Stopping ${#vms[@]} VMs: ${vms[*]}" + for vm in "${vms[@]}"; do + log_info "Stopping VM $vm" + log_vm_operation "Stopping VM $vm" + d8 v -n $NAMESPACE stop $vm --wait=false + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + local total=${#vms[@]} + + # Wait for vms to stop + while true; do + stopped_vm=0 + + for vm in "${vms[@]}"; do + local status=$(kubectl -n $NAMESPACE get vm $vm -o jsonpath='{.status.phase}') + if [ "$status" == "Stopped" ]; then + (( stopped_vm+=1 )) + fi + done + + # Additional wait using kubectl wait + # log_info "Additional wait for deployment to be fully available..." + # kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + if [ $stopped_vm -eq $total ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + + log_success "All VMs stopped - Duration: $duration seconds" + log_info "Execution time: $formatted_duration" + log_vm_operation "All VMs stopped - Duration: $duration seconds" + break + fi + + log_info "Waiting for VMs to be stopped... VM stopped: $stopped_vm/$total" + log_vm_operation "Waiting for VMs to be stopped... VM stopped: $stopped_vm/$total" + sleep $sleep_time + done + + # Additional wait using kubectl wait + # log_info "Additional wait for deployment to be fully available..." + # kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +# Properly wait for VMs to be Running +start_vm() { + local count=$1 + local sleep_time=${2:-5} + local start_time=$(get_timestamp) + + if [ -z "$count" ]; then + local vms=($(kubectl -n $NAMESPACE get vm | grep "Stopped" | awk '{print $1}')) + else + # Start vm from the end + local vms=($(kubectl -n $NAMESPACE get vm | grep "Stopped" | awk '{print $1}' | tail -n $count)) + fi + + if [ ${#vms[@]} -eq 0 ]; then + log_warning "No stopped VMs found to start" + echo "0" + return + fi + + log_info "Starting ${#vms[@]} VMs" + log_vm_operation "Starting ${#vms[@]} VMs: ${vms[*]}" + for vm in "${vms[@]}"; do + log_info "Starting VM $vm" + log_vm_operation "Starting VM $vm" + d8 v -n $NAMESPACE start $vm + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + # Store the VMs we started for monitoring + local started_vms=("${vms[@]}") + local total=${#started_vms[@]} + + while true; do + local running_vm=0 + + for vm in "${started_vms[@]}"; do + local status=$(kubectl -n $NAMESPACE get vm $vm -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound") + if [ "$status" == "Running" ]; then + (( running_vm+=1 )) + fi + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + if [ $running_vm -eq $total ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + formatted_duration=$(format_duration "$duration") + + log_success "All VMs started - Duration: $duration seconds" + log_info "Execution time: $formatted_duration" + log_vm_operation "All VMs started - Duration: $duration seconds" + break + fi + + log_info "Waiting for VMs to be running... VM running: $running_vm/$total" + log_vm_operation "Waiting for VMs to be running... VM running: $running_vm/$total" + sleep $sleep_time + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +migration_percent_vms() { + local target_count=${1:-$PERCENT_RESOURCES} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Starting migration of $target_count VMs" + log_info "Start time: $(formatted_date $start_time)" + log_vm_operation "Starting migration of $target_count VMs" + + local vms=( $(kubectl -n $NAMESPACE get vm --no-headers | awk '$2 == "Running" {print $1}' | tail -n $target_count) ) + + for vm in "${vms[@]}"; do + log_info "Migrating VM [$vm] via evict" + log_vm_operation "Migrating VM [$vm] via evict" + d8 v -n $NAMESPACE evict $vm + done + + wait_vmops_complete + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Migration completed - End time: $(formatted_date $end_time)" + log_success "Migrated $target_count VMs in $formatted_duration" + log_vm_operation "Migration completed - Migrated $target_count VMs in $formatted_duration" +} + +migration_percent_vms_waitptc_vmops() { + local target_count=${1:-$PERCENT_RESOURCES} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Starting migration of $target_count VMs" + log_info "Start time: $(formatted_date $start_time)" + log_vm_operation "Starting migration of $target_count VMs" + + local vms=( $(kubectl -n $NAMESPACE get vm --no-headers | awk '$2 == "Running" {print $1}' | tail -n $target_count) ) + + for vm in "${vms[@]}"; do + log_info "Migrating VM [$vm] via evict" + log_vm_operation "Migrating VM [$vm] via evict" + d8 v -n $NAMESPACE evict $vm + done + + wait_ptc_vmops_complete + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Migration completed - End time: $(formatted_date $end_time)" + log_success "Migrated $target_count VMs in $formatted_duration" + log_vm_operation "Migration completed - Migrated $target_count VMs in $formatted_duration" +} + +undeploy_resources() { + local sleep_time=${1:-5} + local start_time=$(get_timestamp) + local VDTotal + local VMTotal + local VMITotal + + log_info "Undeploying all VMs and disks" + log_info "Start time: $(formatted_date $start_time)" + + task destroy:all \ + NAMESPACE=$NAMESPACE + # Wait a bit for Helm to process the deletion + sleep 5 + + while true; do + local current_time=$(get_timestamp) + + VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + VMTotal=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + VMITotal=$(kubectl -n $NAMESPACE get vi -o name | wc -l) + + if [ $VDTotal -eq 0 ] && [ $VMTotal -eq 0 ] && [ $VMITotal -eq 0 ]; then + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "All VMs and VDs destroyed - End time: $(formatted_date $end_time)" + log_success "Undeploy completed in $formatted_duration" + break + fi + + log_info "Waiting for VMs and VDs to be destroyed... VM: $VMTotal, VD: $VDTotal, VI: $VMITotal" + sleep $sleep_time + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +deploy_vms_with_disks() { + local count=$1 + local vi_type=$2 + local start_time=$(get_timestamp) + + log_info "Deploying $count VMs with disks from $vi_type" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task apply:all \ + COUNT=$count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task apply:all completed in $(format_duration $task_duration)" + log_duration "Task apply:all" "$task_duration" + + local wait_start=$(get_timestamp) + wait_vm_vd $SLEEP_TIME + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + log_info "Wait for VMs and VDs completed in $(format_duration $wait_duration)" + log_duration "Wait for VMs and VDs" "$wait_duration" + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Deployment completed - End time: $(formatted_date $end_time)" + log_info "Task execution: $(format_duration $task_duration), Wait time: $(format_duration $wait_duration)" + log_success "Deployed $count VMs with disks in $formatted_duration" +} + +# New function for batch deployment of large numbers of resources +deploy_vms_with_disks_batch() { + local total_count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + local start_time=$(get_timestamp) + + log_info "Starting batch deployment of $total_count VMs with disks from $vi_type" + log_info "Batch size: $batch_size resources per batch" + log_info "Start time: $(formatted_date $start_time)" + + local deployed_count=0 + local batch_number=1 + local total_batches=$(( (total_count + batch_size - 1) / batch_size )) + + log_info "Total batches to deploy: $total_batches" + + while [ $deployed_count -lt $total_count ]; do + local remaining_count=$((total_count - deployed_count)) + local current_batch_size=$batch_size + + # Adjust batch size for the last batch if needed + if [ $remaining_count -lt $batch_size ]; then + current_batch_size=$remaining_count + fi + + log_info "=== Batch $batch_number/$total_batches ===" + show_deployment_progress "$deployed_count" "$total_count" "$batch_number" "$total_batches" "$start_time" + + local batch_start=$(get_timestamp) + + # Deploy current batch (COUNT should be cumulative, not absolute) + local cumulative_count=$((deployed_count + current_batch_size)) + log_info "Deploying batch $batch_number: $current_batch_size new resources (total will be: $cumulative_count)" + task apply:all \ + COUNT=$cumulative_count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + # Wait for current batch to be ready + wait_vm_vd $SLEEP_TIME + + local batch_end=$(get_timestamp) + local batch_duration=$((batch_end - batch_start)) + deployed_count=$((deployed_count + current_batch_size)) + + log_success "Batch $batch_number completed in $(format_duration $batch_duration)" + log_info "Total deployed so far: $deployed_count/$total_count" + + # Add delay between batches to avoid overwhelming the system + if [ $batch_number -lt $total_batches ]; then + log_info "Waiting 30 seconds before next batch..." + sleep 30 + fi + + ((batch_number++)) + done + + local end_time=$(get_timestamp) + local total_duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$total_duration") + + log_success "Batch deployment completed: $deployed_count VMs with disks in $formatted_duration" + log_info "Average time per resource: $(( total_duration / deployed_count )) seconds" + + echo "$total_duration" +} + +# Function to check if batch deployment should be used +should_use_batch_deployment() { + local count=$1 + # Don't use batch deployment if batch size is too small (less than 10% of total) + local min_batch_size=$((count / 10)) + if [ $min_batch_size -lt 1 ]; then + min_batch_size=1 + fi + + # Warn if batch size is too small + if [ $MAX_BATCH_SIZE -lt $min_batch_size ]; then + log_warning "Batch size ($MAX_BATCH_SIZE) is too small for $count resources" + log_warning "Minimum recommended batch size: $min_batch_size" + log_warning "Using regular deployment instead of batch deployment" + return 1 # false + fi + + if [ "$BATCH_DEPLOYMENT_ENABLED" = "true" ] || [ $count -gt $MAX_BATCH_SIZE ]; then + return 0 # true + else + return 1 # false + fi +} + +# Function to show deployment progress +show_deployment_progress() { + local current_count=$1 + local total_count=$2 + local batch_number=$3 + local total_batches=$4 + local start_time=$5 + + local current_time=$(get_timestamp) + local elapsed_time=$((current_time - start_time)) + local progress_percent=$(( (current_count * 100) / total_count )) + + # Calculate estimated time remaining + local estimated_total_time=0 + local estimated_remaining_time=0 + if [ $current_count -gt 0 ]; then + estimated_total_time=$(( (elapsed_time * total_count) / current_count )) + estimated_remaining_time=$((estimated_total_time - elapsed_time)) + fi + + log_info "Progress: $current_count/$total_count ($progress_percent%)" + log_info "Batch: $batch_number/$total_batches" + log_info "Elapsed: $(format_duration $elapsed_time)" + if [ $estimated_remaining_time -gt 0 ]; then + log_info "Estimated remaining: $(format_duration $estimated_remaining_time)" + fi +} + +# Function to check cluster resources before large deployment +check_cluster_resources() { + local target_count=$1 + local batch_size=${2:-$MAX_BATCH_SIZE} + + log_info "Checking cluster resources for deployment of $target_count resources" + + # Check available nodes + local node_count=$(kubectl get nodes --no-headers | wc -l) + log_info "Available nodes: $node_count" + + # Check available storage + local storage_class=$(get_default_storage_class) + log_info "Default storage class: $storage_class" + + # Check namespace resources + local existing_vms=$(kubectl -n $NAMESPACE get vm --no-headers 2>/dev/null | wc -l || echo "0") + local existing_vds=$(kubectl -n $NAMESPACE get vd --no-headers 2>/dev/null | wc -l || echo "0") + + log_info "Existing VMs in namespace: $existing_vms" + log_info "Existing VDs in namespace: $existing_vds" + + # Calculate total resources needed + local total_resources_needed=$((target_count * 2)) # VMs + VDs + local total_existing=$((existing_vms + existing_vds)) + local total_after_deployment=$((total_existing + total_resources_needed)) + + log_info "Total resources after deployment: $total_after_deployment" + + # Estimate time for deployment + local estimated_batches=$(( (target_count + batch_size - 1) / batch_size )) + local estimated_time_per_batch=300 # 5 minutes per batch (conservative estimate) + local estimated_total_time=$((estimated_batches * estimated_time_per_batch)) + + log_info "Estimated batches: $estimated_batches" + log_info "Estimated total time: $(format_duration $estimated_total_time)" + + # Warning for very large deployments + if [ $target_count -gt 10000 ]; then + log_warning "Large deployment detected: $target_count resources" + log_warning "This may take several hours to complete" + log_warning "Consider running in background or with screen/tmux" + fi + + return 0 +} + +# Universal deployment function that automatically chooses between regular and batch deployment +deploy_vms_with_disks_smart() { + local count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + + log_info "Deployment decision for $count resources:" + log_info " - Batch size: $batch_size" + log_info " - Batch deployment enabled: $BATCH_DEPLOYMENT_ENABLED" + + if should_use_batch_deployment "$count"; then + log_info "Using batch deployment for $count resources (batch size: $batch_size)" + deploy_vms_with_disks_batch "$count" "$vi_type" "$batch_size" + else + log_info "Using regular deployment for $count resources" + deploy_vms_with_disks "$count" "$vi_type" + fi +} + +# Universal deployment function for disks only +deploy_disks_only_smart() { + local count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + + log_info "Disk deployment decision for $count resources:" + log_info " - Batch size: $batch_size" + log_info " - Batch deployment enabled: $BATCH_DEPLOYMENT_ENABLED" + + if should_use_batch_deployment "$count"; then + log_info "Using batch deployment for $count disks (batch size: $batch_size)" + deploy_disks_only_batch "$count" "$vi_type" "$batch_size" + else + log_info "Using regular deployment for $count disks" + deploy_disks_only "$count" "$vi_type" + fi +} + +# Universal deployment function for VMs only +deploy_vms_only_smart() { + local count=$1 + local namespace=${2:-$NAMESPACE} + local batch_size=${3:-$MAX_BATCH_SIZE} + + log_info "VM deployment decision for $count resources:" + log_info " - Batch size: $batch_size" + log_info " - Batch deployment enabled: $BATCH_DEPLOYMENT_ENABLED" + + if should_use_batch_deployment "$count"; then + log_info "Using batch deployment for $count VMs (batch size: $batch_size)" + deploy_vms_only_batch "$count" "$namespace" "$batch_size" + else + log_info "Using regular deployment for $count VMs" + deploy_vms_only "$count" "$namespace" + fi +} + +deploy_disks_only() { + local count=$1 + local vi_type=$2 + local start_time=$(get_timestamp) + + log_info "Deploying $count disks from $vi_type" + log_info "Start time: $(formatted_date $start_time)" + + task apply:disks \ + COUNT=$count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + wait_vd $SLEEP_TIME + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Disk deployment completed - End time: $(formatted_date $end_time)" + log_success "Deployed $count disks in $formatted_duration" + echo "$duration" +} + +# New function for batch deployment of disks only +deploy_disks_only_batch() { + local total_count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + local start_time=$(get_timestamp) + + log_info "Starting batch deployment of $total_count disks from $vi_type" + log_info "Batch size: $batch_size resources per batch" + log_info "Start time: $(formatted_date $start_time)" + + local deployed_count=0 + local batch_number=1 + local total_batches=$(( (total_count + batch_size - 1) / batch_size )) + + log_info "Total batches to deploy: $total_batches" + + while [ $deployed_count -lt $total_count ]; do + local remaining_count=$((total_count - deployed_count)) + local current_batch_size=$batch_size + + # Adjust batch size for the last batch if needed + if [ $remaining_count -lt $batch_size ]; then + current_batch_size=$remaining_count + fi + + log_info "=== Batch $batch_number/$total_batches ===" + show_deployment_progress "$deployed_count" "$total_count" "$batch_number" "$total_batches" "$start_time" + + local batch_start=$(get_timestamp) + + # Deploy current batch of disks (COUNT should be cumulative, not absolute) + local cumulative_count=$((deployed_count + current_batch_size)) + log_info "Deploying disk batch $batch_number: $current_batch_size new disks (total will be: $cumulative_count)" + task apply:disks \ + COUNT=$cumulative_count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + # Wait for current batch to be ready + wait_vd $SLEEP_TIME + + local batch_end=$(get_timestamp) + local batch_duration=$((batch_end - batch_start)) + deployed_count=$((deployed_count + current_batch_size)) + + log_success "Batch $batch_number completed in $(format_duration $batch_duration)" + log_info "Total deployed so far: $deployed_count/$total_count" + + # Add delay between batches to avoid overwhelming the system + if [ $batch_number -lt $total_batches ]; then + log_info "Waiting 30 seconds before next batch..." + sleep 30 + fi + + ((batch_number++)) + done + + local end_time=$(get_timestamp) + local total_duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$total_duration") + + log_success "Batch disk deployment completed: $deployed_count disks in $formatted_duration" + log_info "Average time per disk: $(( total_duration / deployed_count )) seconds" + + echo "$total_duration" +} + +deploy_vms_only() { + local count=$1 + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Deploying $count VMs (disks already exist)" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + task apply:vms \ + COUNT=$count \ + NAMESPACE=$NAMESPACE + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task apply:vms completed in $(format_duration $task_duration)" + log_duration "Task apply:vms" "$task_duration" + + local wait_start=$(get_timestamp) + wait_vm $SLEEP_TIME + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + log_info "Wait for VMs completed in $(format_duration $wait_duration)" + log_duration "Wait for VMs" "$wait_duration" + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "VM deployment completed - End time: $(formatted_date $end_time)" + log_info "Task execution: $(format_duration $task_duration), Wait time: $(format_duration $wait_duration)" + log_success "Deployed $count VMs in $formatted_duration" + echo "$duration" +} + +# New function for batch deployment of VMs only +deploy_vms_only_batch() { + local total_count=$1 + local namespace=${2:-$NAMESPACE} + local batch_size=${3:-$MAX_BATCH_SIZE} + local start_time=$(get_timestamp) + + log_info "Starting batch deployment of $total_count VMs (disks already exist)" + log_info "Batch size: $batch_size resources per batch" + log_info "Start time: $(formatted_date $start_time)" + + local deployed_count=0 + local batch_number=1 + local total_batches=$(( (total_count + batch_size - 1) / batch_size )) + + log_info "Total batches to deploy: $total_batches" + + while [ $deployed_count -lt $total_count ]; do + local remaining_count=$((total_count - deployed_count)) + local current_batch_size=$batch_size + + # Adjust batch size for the last batch if needed + if [ $remaining_count -lt $batch_size ]; then + current_batch_size=$remaining_count + fi + + log_info "=== Batch $batch_number/$total_batches ===" + show_deployment_progress "$deployed_count" "$total_count" "$batch_number" "$total_batches" "$start_time" + + local batch_start=$(get_timestamp) + + # Deploy current batch of VMs (COUNT should be cumulative, not absolute) + local cumulative_count=$((deployed_count + current_batch_size)) + log_info "Deploying VM batch $batch_number: $current_batch_size new VMs (total will be: $cumulative_count)" + task apply:vms \ + COUNT=$cumulative_count \ + NAMESPACE=$NAMESPACE + + # Wait for current batch to be ready + wait_vm $SLEEP_TIME + + local batch_end=$(get_timestamp) + local batch_duration=$((batch_end - batch_start)) + deployed_count=$((deployed_count + current_batch_size)) + + log_success "Batch $batch_number completed in $(format_duration $batch_duration)" + log_info "Total deployed so far: $deployed_count/$total_count" + + # Add delay between batches to avoid overwhelming the system + if [ $batch_number -lt $total_batches ]; then + log_info "Waiting 30 seconds before next batch..." + sleep 30 + fi + + ((batch_number++)) + done + + local end_time=$(get_timestamp) + local total_duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$total_duration") + + log_success "Batch VM deployment completed: $deployed_count VMs in $formatted_duration" + log_info "Average time per VM: $(( total_duration / deployed_count )) seconds" + + echo "$total_duration" +} + +# FIXED: Properly undeploy VMs from the end +undeploy_vms_only() { + local count=${1:-0} + local namespace=${2:-$NAMESPACE} + local start_time=$(get_timestamp) + + log_info "Undeploying $count VMs from the end (disks will remain)" + log_info "Start time: $(formatted_date $start_time)" + + # Get list of VMs and select the last 'count' ones + local vms=($(kubectl -n $NAMESPACE get vm -o name | tail -n $count)) + + if [ ${#vms[@]} -eq 0 ]; then + log_warning "No VMs found to undeploy" + echo "0" + return 0 + fi + + log_info "Undeploying ${#vms[@]} VMs: ${vms[*]}" + log_vm_operation "Undeploying ${#vms[@]} VMs from the end: ${vms[*]}" + + local delete_start=$(get_timestamp) + for vm in "${vms[@]}"; do + log_info "Deleting VM $vm" + log_vm_operation "Deleting VM $vm" + kubectl -n $NAMESPACE delete $vm --wait=false || true + done + + local delete_end=$(get_timestamp) + local delete_duration=$((delete_end - delete_start)) + log_info "VM deletion commands completed in $(format_duration $delete_duration)" + log_vm_operation "VM deletion commands completed in $(format_duration $delete_duration)" + + local wait_start=$(get_timestamp) + while true; do + local remaining_vms=0 + local current_time=$(get_timestamp) + + # Check if any VMs still exist + for vm in "${vms[@]}"; do + if kubectl -n $NAMESPACE get $vm >/dev/null 2>&1; then + log_info "VM $vm still exists, attempting deletion" + kubectl -n $NAMESPACE delete $vm --wait=false || true + fi + done + + for vm in "${vms[@]}"; do + # Check if VM exists and is not in Terminating state + local vm_status=$(kubectl -n $NAMESPACE get $vm -o jsonpath='{.status.phase}' 2>/dev/null || echo "NotFound") + if [ "$vm_status" != "NotFound" ] && [ "$vm_status" != "Terminating" ]; then + ((remaining_vms++)) + log_info "VM $vm still exists with status: $vm_status" + fi + done + + if [ $remaining_vms -eq 0 ]; then + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Wait for VMs undeploy completed in $(format_duration $wait_duration)" + log_info "All $count VMs undeployed - End time: $(formatted_date $end_time)" + log_info "Delete commands: $(format_duration $delete_duration), Wait time: $(format_duration $wait_duration)" + log_success "Undeployed $count VMs in $formatted_duration" + log_vm_operation "Undeployed $count VMs in $formatted_duration" + break + fi + + log_info "Waiting for VMs to be undeployed... Remaining: $remaining_vms/$count" + log_vm_operation "Waiting for VMs to be undeployed... Remaining: $remaining_vms/$count" + sleep $SLEEP_TIME + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + + # echo "$duration" +} + +stop_virtualization_controller() { + local start_time=$(get_timestamp) + + log_info "Stopping virtualization controller" + # Get original replicas count before stopping + ORIGINAL_CONTROLLER_REPLICAS=$(kubectl -n d8-virtualization get deployment virtualization-controller -o jsonpath="{.spec.replicas}" 2>/dev/null || echo "1") + log_info "Original controller replicas: $ORIGINAL_CONTROLLER_REPLICAS" + log_info "Start time: $(formatted_date $start_time)" + + local scale_start=$(get_timestamp) + kubectl -n d8-virtualization scale --replicas 0 deployment virtualization-controller + local scale_end=$(get_timestamp) + local scale_duration=$((scale_end - scale_start)) + log_info "Scale down command completed in $(format_duration $scale_duration)" + + local wait_start=$(get_timestamp) + while true; do + local count_pods=$(kubectl -n d8-virtualization get pods | grep virtualization-controller | wc -l) + + if [ $count_pods -eq 0 ]; then + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Wait for controller stop completed in $(format_duration $wait_duration)" + log_info "Controller stopped - End time: $(formatted_date $end_time)" + log_info "Scale command: $(format_duration $scale_duration), Wait time: $(format_duration $wait_duration)" + log_success "Controller stopped in $formatted_duration" + break + fi + + log_info "Waiting for virtualization-controller to be stopped... Pods: $count_pods" + sleep 2 + done + + # Additional wait using kubectl wait + log_info "Additional wait for deployment to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s +} + +start_virtualization_controller() { + local start_time=$(get_timestamp) + + log_info "Starting Virtualization-controller" + log_info "Restoring controller to original replicas: ${ORIGINAL_CONTROLLER_REPLICAS:-1}" + log_info "Start time: $(formatted_date $start_time)" + + local scale_start=$(get_timestamp) + kubectl -n d8-virtualization scale --replicas ${ORIGINAL_CONTROLLER_REPLICAS:-1} deployment virtualization-controller + local scale_end=$(get_timestamp) + local scale_duration=$((scale_end - scale_start)) + log_info "Scale up command completed in $(format_duration $scale_duration)" + + log_info "Wait for deployment for Virtualization-controller to be fully available..." + kubectl wait --for=condition=Available=True deployment/virtualization-controller -n d8-virtualization --timeout=300s + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_info "Virtualization-controller started - End time: $(formatted_date $end_time)" + log_success "Virtualization-controller started in $formatted_duration" + +} + +create_vm_while_controller_stopped() { + local vi_type=$1 + local start_time=$(get_timestamp) + + log_info "Creating 1 VM and disk while controller is stopped using task apply:all" + log_info "Start time: $(formatted_date $start_time)" + log_vm_operation "Creating 1 VM and disk while controller is stopped using task apply:all" + + # Deploy MAIN_COUNT_RESOURCES + 1 VMs using task apply:all + log_info "Deploying 1 new VM" + + local task_start=$(get_timestamp) + task apply:all \ + COUNT=$((MAIN_COUNT_RESOURCES + 1)) \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type || true + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + log_info "Task apply:all completed in $(format_duration $task_duration)" + +} + +wait_for_new_vm_after_controller_start() { + # Wait for the last VM and VD to be ready + log_info "Waiting for the last VM and VD to be ready" + local wait_start=$(get_timestamp) + + # Get the name of the last VM and VD + local last_vm=$(kubectl -n $NAMESPACE get vm --no-headers | tail -n 1 | awk '{print $1}') + local last_vd=$(kubectl -n $NAMESPACE get vd --no-headers | tail -n 1 | awk '{print $1}') + + log_info "Waiting for last VM: $last_vm and last VD: $last_vd" + + # Wait for the last VM to be Running + while true; do + local vm_status=$(kubectl -n $NAMESPACE get vm $last_vm -o jsonpath="{.status.phase}" 2>/dev/null || echo "NotFound") + local vd_status=$(kubectl -n $NAMESPACE get vd $last_vd -o jsonpath="{.status.phase}" 2>/dev/null || echo "NotFound") + + if [ "$vm_status" == "Running" ] && [ "$vd_status" == "Ready" ]; then + local wait_end=$(get_timestamp) + local wait_duration=$((wait_end - wait_start)) + log_info "Last VM and VD are ready in $(format_duration $wait_duration)" + break + fi + + log_info "Waiting for last VM ($last_vm): $vm_status, last VD ($last_vd): $vd_status" + sleep 5 + done +} + +drain_node() { + local start_time=$(get_timestamp) + + log_info "Start draining node" + log_info "Start time: $(formatted_date $start_time)" + + local task_start=$(get_timestamp) + + local KUBECONFIG_MERGE=$(kubectl config view --merge --flatten | base64 -w 0) + KUBECONFIG_BASE64=$KUBECONFIG_MERGE task shatal:run + + local task_end=$(get_timestamp) + local task_duration=$((task_end - task_start)) + local end_time=$(get_timestamp) + local formatted_duration=$(format_duration "$task_duration") + + log_info "Duration node completed - End time: $(formatted_date $end_time)" + log_info "Task Duration node execution: $(format_duration $task_duration)" + log_success "Duration node completed in $formatted_duration" + echo "$task_duration" +} + +scale_deckhouse() { + local replicas=${1} + ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS=$(kubectl -n d8-system get deployment deckhouse -o jsonpath="{.spec.replicas}" 2>/dev/null || echo "1") + log_info "Deckhouse controller replicas: $ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS" + log_info "Deckhouse controller scaled to $replicas" + kubectl -n d8-system scale --replicas $replicas deployment deckhouse + log_success "Deckhouse controller scaled to $replicas" +} + +migration_config() { + # default values + # { + # "bandwidthPerMigration": "640Mi", + # "completionTimeoutPerGiB": 800, + # "parallelMigrationsPerCluster": 8, # count all nodes + # "parallelOutboundMigrationsPerNode": 1, + # "progressTimeout": 150 + # } + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + + local bandwidthPerMigration=${1:-"640Mi"} + local completionTimeoutPerGiB=${2:-"800"} + local parallelMigrationsPerCluster=${3:-$amountNodes} + local parallelOutboundMigrationsPerNode=${4:-"1"} + local progressTimeout=${5:-"150"} + + echo "====== configure patch ======" + echo "bandwidthPerMigration: $bandwidthPerMigration" + echo "completionTimeoutPerGiB: $completionTimeoutPerGiB" + echo "parallelMigrationsPerCluster: $parallelMigrationsPerCluster" + echo "parallelOutboundMigrationsPerNode: $parallelOutboundMigrationsPerNode" + echo "progressTimeout: $progressTimeout" + + patch_json=$( + jq -n \ + --arg bpm "$bandwidthPerMigration" \ + --argjson ct $completionTimeoutPerGiB \ + --argjson pmc $parallelMigrationsPerCluster \ + --argjson pmon $parallelOutboundMigrationsPerNode \ + --argjson pt $progressTimeout \ + '{ + spec: { + configuration: { + migrations: { + bandwidthPerMigration: $bpm, + completionTimeoutPerGiB: $ct, + parallelMigrationsPerCluster: $pmc, + parallelOutboundMigrationsPerNode: $pmon, + progressTimeout: $pt + } + } + } + }' + ) + log_info "Checking restricted access policy" + + if kubectl get validatingadmissionpolicies.admissionregistration.k8s.io virtualization-restricted-access-policy >/dev/null 2>&1; then + log_info "Deleting restricted access policy" + kubectl delete validatingadmissionpolicies.admissionregistration.k8s.io virtualization-restricted-access-policy + else + log_info "No restricted access policy" + fi + + sleep 1 + + log_info "Patching kubevirt config" + + kubectl -n d8-virtualization patch \ + --as=system:sudouser \ + internalvirtualizationkubevirts.internal.virtualization.deckhouse.io config \ + --type=merge -p "$patch_json" + + log_success "Migration settings applyed" +} + + +# === Test configuration === +# Default values (can be overridden by command line arguments) +SCENARIO_NUMBER=${SCENARIO_NUMBER:-1} +MAIN_COUNT_RESOURCES=${MAIN_COUNT_RESOURCES:-2} # vms and vds (reduced for testing) +PERCENT_VMS=10 # 10% of total resources +MIGRATION_DURATION="1m" +MIGRATION_PERCENTAGE_10=10 # 10% for migration +MIGRATION_PERCENTAGE_5=10 # 5% for migration +WAIT_MIGRATION=$( echo "$MIGRATION_DURATION" | sed 's/m//' ) + + +# Calculate resources for migration percentages +MIGRATION_5_COUNT=$(( $MAIN_COUNT_RESOURCES * $MIGRATION_PERCENTAGE_5 / 100 )) +MIGRATION_10_COUNT=$(( $MAIN_COUNT_RESOURCES * $MIGRATION_PERCENTAGE_10 / 100 )) +if [ $MIGRATION_5_COUNT -eq 0 ]; then + MIGRATION_5_COUNT=1 +fi +if [ $MIGRATION_10_COUNT -eq 0 ]; then + MIGRATION_10_COUNT=1 +fi + +# Function to run a single scenario +GLOBAL_WAIT_TIME_STEP=5 +run_scenario() { + local scenario_name=$1 + local vi_type=$2 + + log_info "=== Starting scenario: $scenario_name with $vi_type ===" + + # Initialize logging and create report directory + init_logging "$scenario_name" "$vi_type" "$MAIN_COUNT_RESOURCES" + local scenario_dir=$(create_report_dir "$scenario_name" "$vi_type" "$MAIN_COUNT_RESOURCES") + + # Handle bootstrap-only mode + if [ "$BOOTSTRAP_ONLY" = "true" ]; then + log_info "=== BOOTSTRAP ONLY MODE ===" + log_info "Deploying $MAIN_COUNT_RESOURCES resources without running tests" + log_info "DEBUG: Starting bootstrap-only mode" + + # Skip cleanup if continuing after bootstrap or in bootstrap-only mode + if [ "$CONTINUE_AFTER_BOOTSTRAP" = "false" ] && [ "$BOOTSTRAP_ONLY" = "false" ]; then + # Step 1: Clean up any existing resources + if [ "$SKIP_CLEANUP" = "false" ]; then + log_info "Step 1: Cleaning up existing resources" + log_step_start "Step 1: Cleanup up existing resources" + local cleanup_start=$(get_timestamp) + stop_migration + remove_vmops + undeploy_resources + local cleanup_end=$(get_timestamp) + local cleanup_duration=$((cleanup_end - cleanup_start)) + log_info "Cleanup completed in $(format_duration $cleanup_duration)" + log_step_end "Step 1: Cleanup up existing resources" "$cleanup_duration" + fi + else + log_info "Step 1: Skipping cleanup (--continue or --bootstrap-only mode, preserving existing resources)" + fi + + # Step 2: Check cluster resources before deployment + log_step_start "Step 2: Check cluster resources" + check_cluster_resources $MAIN_COUNT_RESOURCES + log_step_end "Step 2: Check cluster resources" "0" + + # Step 3: Deploy resources only + log_step_start "Step 3: Deploy VMs [$MAIN_COUNT_RESOURCES]" + local deploy_start=$(get_timestamp) + deploy_vms_only_smart $MAIN_COUNT_RESOURCES $vi_type + local deploy_end=$(get_timestamp) + local deploy_duration=$((deploy_end - deploy_start)) + log_info "VM [$MAIN_COUNT_RESOURCES] deploy completed in $(format_duration $deploy_duration)" + log_step_end "Step 3: End VM Deployment [$MAIN_COUNT_RESOURCES]" "$deploy_duration" + + log_success "Bootstrap completed: $MAIN_COUNT_RESOURCES resources deployed" + log_info "Use --continue to run tests on deployed resources" + log_info "Resources are preserved and ready for testing" + log_info "DEBUG: Exiting run_scenario with return 0 (bootstrap-only mode)" + return 0 + fi + + # Handle continue mode (skip deployment, assume resources already exist) + if [ "$CONTINUE_AFTER_BOOTSTRAP" = "true" ]; then + log_info "=== CONTINUE MODE ===" + log_info "Continuing tests on existing resources (--continue enabled)" + log_info "Skipping deployment, assuming $MAIN_COUNT_RESOURCES resources already exist" + + # Check if resources exist + local existing_vms=$(kubectl -n $NAMESPACE get vm -o name | wc -l) + local existing_vds=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + + if [ $existing_vms -eq 0 ] && [ $existing_vds -eq 0 ]; then + log_warning "No existing resources found. Please run bootstrap first:" + log_warning "./tests.sh --bootstrap-only -c $MAIN_COUNT_RESOURCES" + exit 1 + fi + + log_info "Found existing resources: $existing_vms VMs, $existing_vds VDs" + log_info "Continuing with tests..." + else + # Step 1: Clean up any existing resources (skip in bootstrap-only mode) + if [ "$BOOTSTRAP_ONLY" = "false" ]; then + if [ "$SKIP_CLEANUP" = "false" ]; then + log_info "Step 1: Cleaning up existing resources" + log_step_start "Step 1: Cleanup up existing resources" + local cleanup_start=$(get_timestamp) + stop_migration + remove_vmops + undeploy_resources + local cleanup_end=$(get_timestamp) + local cleanup_duration=$((cleanup_end - cleanup_start)) + log_info "Cleanup completed in $(format_duration $cleanup_duration)" + log_step_end "Step 1: Cleanup up existing resources" "$cleanup_duration" + fi + else + log_info "Step 1: Skipping cleanup (--bootstrap-only mode, preserving existing resources)" + fi + fi + + local start_time=$(get_timestamp) + log_info "== Scenario started at $(formatted_date $start_time) ==" + + # Step 2: Check cluster resources before deployment + log_step_start "Step 2: Check cluster resources" + check_cluster_resources $MAIN_COUNT_RESOURCES + log_step_end "Step 2: Check cluster resources" "0" + + # Step 3: Main test sequence (skip deployment in continue mode) + if [ "$CONTINUE_AFTER_BOOTSTRAP" = "true" ]; then + log_info "Step 3: Skipping deployment (--continue mode, resources already exist)" + log_step_start "Step 3: Deploy VMs [$MAIN_COUNT_RESOURCES]" + log_info "VM [$MAIN_COUNT_RESOURCES] deployment skipped (continue mode)" + log_step_end "Step 3: End VM Deployment [$MAIN_COUNT_RESOURCES]" "0" + else + log_step_start "Step 3: Deploy VMs [$MAIN_COUNT_RESOURCES]" + local deploy_start=$(get_timestamp) + deploy_vms_only_smart $MAIN_COUNT_RESOURCES $vi_type + local deploy_end=$(get_timestamp) + local deploy_duration=$((deploy_end - deploy_start)) + log_info "VM [$MAIN_COUNT_RESOURCES] deploy completed in $(format_duration $deploy_duration)" + log_step_end "Step 3: End VM Deployment [$MAIN_COUNT_RESOURCES]" "$deploy_duration" + fi + + # Step 4: Statistics Collection + log_step_start "Step 4: Start Statistics Collection" + local stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local stats_end=$(get_timestamp) + local stats_duration=$((stats_end - stats_start)) + log_info "Statistics collection completed in $(format_duration $stats_duration)" + log_step_end "Step 4: End Statistics Collection" "$stats_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds before stopping VMs" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 5: VM Stop + log_info "Step 5: Stopping all VMs [$MAIN_COUNT_RESOURCES]" + log_step_start "Step 5: VM Stop" + local stop_start=$(get_timestamp) + stop_vm + local stop_end=$(get_timestamp) + local stop_duration=$((stop_end - stop_start)) + log_info "VM stop completed in $(format_duration $stop_duration)" + log_step_end "Step 5: End Stopping all VMs [$MAIN_COUNT_RESOURCES]" "$stop_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds before starting VMs" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 6: VM Start + log_info "Step 6: Starting all VMs [$MAIN_COUNT_RESOURCES]" + log_step_start "Step 6: VM Start [$MAIN_COUNT_RESOURCES]" + local start_vm_start=$(get_timestamp) + start_vm + local start_vm_end=$(get_timestamp) + local start_vm_duration=$((start_vm_end - start_vm_start)) + log_info "VM start completed in $(format_duration $start_vm_duration)" + log_step_end "Step 6: End Starting all VMs [$MAIN_COUNT_RESOURCES]" "$start_vm_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds before Starting migration test ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 7: Start 5% migration in background + local migration_duration_time="0m" + log_info "Step 7: Starting migration test ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + log_step_start "Step 7: Migration Setup" + local migration_start=$(get_timestamp) + start_migration $migration_duration_time $MIGRATION_PERCENTAGE_5 + local migration_end=$(get_timestamp) + local migration_duration=$((migration_end - migration_start)) + log_info "Migration test ${MIGRATION_PERCENTAGE_5}% VMs setup completed in $(format_duration $migration_duration)" + log_step_end "Step 7: Migration Setup ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs) Started" "$migration_duration" + + log_info "Waiting 10 seconds before Undeploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + sleep 10 + + # Step 8: Start deploy undeploy vms + log_step_start "Step 8: Undeploy VMs 10% [$PERCENT_RESOURCES]" + local undeploy_pct_start=$(get_timestamp) + deploy_vms_only $((MAIN_COUNT_RESOURCES-PERCENT_RESOURCES)) $vi_type + local undeploy_pct_end=$(get_timestamp) + local undeploy_pct_duration=$((undeploy_pct_end - undeploy_pct_start)) + log_info "Undeploy VMs 10% [$((MAIN_COUNT_RESOURCES-PERCENT_RESOURCES))] completed in $(format_duration $undeploy_pct_duration)" + log_step_end "Step 8: Undeploy VMs 10% [$PERCENT_RESOURCES]" "$undeploy_pct_duration" + + log_info "Waiting 10 seconds before Deploying 10% VMs [$PERCENT_RESOURCES]" + sleep 10 + + # Step 9: Deploy VMs 10% + log_step_start "Step 9: Deploy VMs 10% [$PERCENT_RESOURCES]" + local deploy_pct_start=$(get_timestamp) + deploy_vms_only $MAIN_COUNT_RESOURCES $vi_type + local deploy_pct_end=$(get_timestamp) + local deploy_pct_duration=$((deploy_pct_end - deploy_pct_start)) + log_info "Deploy VMs 10% [$PERCENT_RESOURCES] completed in $(format_duration $deploy_pct_duration)" + log_step_end "Step 9: Deploy VMs 10% [$PERCENT_RESOURCES]" "$deploy_pct_duration" + + # Step 10: Statistics Collection Deploy 10% + log_step_start "Step 10: Start Statistics Collection Deploy 10% [$PERCENT_RESOURCES]" + local stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local stats_end=$(get_timestamp) + local stats_duration=$((stats_end - stats_start)) + log_info "Statistics collection completed in $(format_duration $stats_duration)" + log_step_end "Step 10: End Statistics Collection Deploy 10% [$PERCENT_RESOURCES]" "$stats_duration" + # ==== + + # Step 11: VM Undeploy 10% VMs + log_info "Step 11: Undeploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + log_step_start "Step 11: VM Undeploy 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + local undeploy_start=$(get_timestamp) + undeploy_vms_only $PERCENT_RESOURCES + local undeploy_end=$(get_timestamp) + local undeploy_duration=$((undeploy_end - undeploy_start)) + log_info "VM Undeploy 10% VMs [$PERCENT_RESOURCES] completed in $(format_duration $undeploy_duration)" + log_step_end "Step 11: VM Undeploy 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$undeploy_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds before Deploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 12: Deploy 10% VMs and gather statistics + log_info "Step 12: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs) (keeping disks)" + log_step_start "Step 12: Deploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + local deploy_remaining_start=$(get_timestamp) + deploy_vms_only $MAIN_COUNT_RESOURCES + local deploy_remaining_end=$(get_timestamp) + local deploy_remaining_duration=$((deploy_remaining_end - deploy_remaining_start)) + log_info "10% VMs deployment completed in $(format_duration $deploy_remaining_duration)" + log_step_end "Step 12: End Deploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" "$deploy_remaining_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds before gather VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 13: Gather statistics for 10% VMs + log_step_start "Step 13: VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" + local vm_stats_start=$(get_timestamp) + gather_specific_vm_statistics "$scenario_dir/statistics" "$NAMESPACE" "$PERCENT_RESOURCES" + local vm_stats_end=$(get_timestamp) + local vm_stats_duration=$((vm_stats_end - vm_stats_start)) + log_info "VM statistics collection completed in $(format_duration $vm_stats_duration)" + log_step_end "Step 13: End VM Statistics: Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" "$vm_stats_duration" + + # Step 14: VM operations test - stop/start 10% VMs + log_info "Step 14: Testing VM stop/start operations for 10% VMs" + log_step_start "Step 14: VM Operations" + local vm_ops_start=$(get_timestamp) + + log_step_start "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" + local vm_ops_stop_start=$(get_timestamp) + stop_vm $PERCENT_RESOURCES + local vm_ops_stop_end=$(get_timestamp) + local vm_ops_stop_duration=$((vm_ops_stop_end - vm_ops_stop_start)) + log_step_end "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" "$vm_ops_stop_duration" + + sleep $GLOBAL_WAIT_TIME_STEP + + log_step_start "VM Operations: Start VMs [$PERCENT_RESOURCES]" + local vm_ops_start_vm_start=$(get_timestamp) + start_vm $PERCENT_RESOURCES + local vm_ops_start_vm_end=$(get_timestamp) + local vm_ops_start_vm_duration=$((vm_ops_start_vm_end - vm_ops_start_vm_start)) + log_step_end "VM Operations: Start VMs [$PERCENT_RESOURCES]" "$vm_ops_start_vm_duration" + + local vm_ops_end=$(get_timestamp) + local vm_ops_duration=$((vm_ops_end - vm_ops_start)) + log_info "VM operations test completed in $(format_duration $vm_ops_duration)" + log_step_end "Step 14: VM Operations: Stop/Start VMs [$PERCENT_RESOURCES]" "$vm_ops_duration" + + # Step 15: Stop migration and wait for completion + log_step_start "Step 15: Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + local cleanup_ops_start=$(get_timestamp) + stop_migration + wait_migration_completion + remove_vmops + local cleanup_ops_end=$(get_timestamp) + local cleanup_ops_duration=$((cleanup_ops_end - cleanup_ops_start)) + log_info "Migration stop and cleanup completed in $(format_duration $cleanup_ops_duration)" + log_step_end "Step 15: Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" "$cleanup_ops_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds before Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 16: Migration percentage test - Migrate 10% VMs + log_info "Step 16: Testing migration of ${MIGRATION_10_COUNT} VMs (10%)" + log_step_start "Step 16: Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" + local migration_percent_start=$(get_timestamp) + migration_percent_vms $MIGRATION_10_COUNT + local migration_percent_end=$(get_timestamp) + local migration_percent_duration=$((migration_percent_end - migration_percent_start)) + log_info "Migration percentage test completed in $(format_duration $migration_percent_duration)" + log_step_end "Step 16: End Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" "$migration_percent_duration" + + remove_vmops + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds" + sleep $GLOBAL_WAIT_TIME_STEP + + #======== + # Step 17: Migration config + # bandwidthPerMigration=${1:-"640Mi"} + # completionTimeoutPerGiB=${2:-"800"} + # parallelMigrationsPerCluster=${3:-$amountNodes} + # parallelOutboundMigrationsPerNode=${4:-"1"} + # progressTimeout=${5:-"150"} + + log_info "Step 17: Set deckhouse controller replicas to [0]" + scale_deckhouse 0 + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + sleep 5 + + local migration_parallel_2x=$(( $amountNodes*2 )) + local migration_parallel_2x_start=$(get_timestamp) + log_info "Step 17: Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" + log_step_start "Step 17: Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" + migration_config "640Mi" "800" "$migration_parallel_2x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local migration_parallel_2x_end=$(get_timestamp) + local migration_parallel_2x_duration=$((migration_parallel_2x_end - migration_parallel_2x_start)) + log_step_end "Step 17: Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" "$migration_parallel_2x_duration" + + log_info "Waiting 2 seconds before Cleanup vmops" + sleep 2 + remove_vmops + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 18: Migration parallel 4x + local migration_parallel_4x=$(( $amountNodes*4 )) + local migration_parallel_4x_start=$(get_timestamp) + log_info "Step 18: Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x] (4x)" + log_step_start "Step 18: Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x] (4x)" + migration_config "640Mi" "800" "$migration_parallel_4x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local migration_parallel_4x_end=$(get_timestamp) + local migration_parallel_4x_duration=$((migration_parallel_4x_end - migration_parallel_4x_start)) + log_step_end "Step 18: Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x] (4x)" "$migration_parallel_4x_duration" + + log_info "Waiting 2 seconds before Cleanup vmops" + sleep 2 + remove_vmops + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 19: Migration parallel 8x + local migration_parallel_8x=$(( $amountNodes*8 )) + local migration_parallel_8x_start=$(get_timestamp) + log_info "Step 19: Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x] (8x)" + log_step_start "Step 19: Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x] (8x)" + migration_config "640Mi" "800" "$migration_parallel_8x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local migration_parallel_8x_end=$(get_timestamp) + local migration_parallel_8x_duration=$((migration_parallel_8x_end - migration_parallel_8x_start)) + log_step_end "Step 19: Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x] (8x)" "$migration_parallel_8x_duration" + + log_info "Waiting 2 seconds before Cleanup vmops" + sleep 2 + remove_vmops + + log_info "Back configuration migration back to original" + migration_config + log_info "Restoring original deckhouse controller replicas to [$ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS]" + scale_deckhouse $ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds" + sleep $GLOBAL_WAIT_TIME_STEP + #======== + + # Step 20: Controller restart test + log_info "Step 20: Testing controller restart with 1 VM creation" + log_step_start "Step 20: Controller Restart" + local controller_start=$(get_timestamp) + + # Stop controller first + stop_virtualization_controller + + # Create 1 VM and disk while controller is stopped + log_info "Creating 1 VM and disk while controller is stopped [$((MAIN_COUNT_RESOURCES + 1)) VMs total]" + local vm_creation_start=$(get_timestamp) + create_vm_while_controller_stopped $vi_type + local vm_creation_end=$(get_timestamp) + local vm_creation_duration=$((vm_creation_end - vm_creation_start)) + log_info "VM creation while controller stopped completed in $(format_duration $vm_creation_duration)" + + # Start controller and measure time for VM to become ready + log_info "Starting controller and waiting for VM to become ready" + local controller_start_time=$(get_timestamp) + start_virtualization_controller + create_vm_while_controller_stopped $vi_type + wait_for_new_vm_after_controller_start + local controller_end_time=$(get_timestamp) + local controller_duration=$((controller_end_time - controller_start)) + local vm_ready_duration=$((controller_end_time - controller_start_time)) + + log_info "Controller restart test completed in $(format_duration $controller_duration)" + log_info "VM became ready after controller start in $(format_duration $vm_ready_duration)" + log_step_end "Step 20: Controller Restart" "$controller_duration" + + log_info "Waiting $GLOBAL_WAIT_TIME_STEP seconds" + sleep $GLOBAL_WAIT_TIME_STEP + + # Step 21: Final Statistics + log_step_start "Step 21: Final Statistics" + local final_stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local final_stats_end=$(get_timestamp) + local final_stats_duration=$((final_stats_end - final_stats_start)) + log_info "Final statistics collection completed in $(format_duration $final_stats_duration)" + log_step_end "Step 21: Final Statistics" "$final_stats_duration" + + log_info "Waiting 30 second before drain node" + sleep 30 + + # Step 22: Drain node + log_step_start "Step 22: Drain node with parallelOutboundMigrationsPerNode 50" + migration_config "640Mi" "800" "50" "50" "150" + local drain_node_start=$(get_timestamp) + drain_node + local drain_stats_end=$(get_timestamp) + local drain_stats_duration=$((drain_stats_end - drain_node_start)) + log_info "Drain node completed in $(format_duration $drain_stats_duration)" + log_step_end "Step 22: Drain node with parallelOutboundMigrationsPerNode 50" "$drain_stats_duration" + + # Skip final cleanup in bootstrap-only mode or when keep-resources is enabled + if [ "$BOOTSTRAP_ONLY" = "false" ] && [ "$KEEP_RESOURCES" = "false" ]; then + log_info "Waiting 30 second before cleanup" + sleep 30 + + # Step 23: Final Cleanup + log_step_start "Step 23: Final Cleanup" + local final_cleanup_start=$(get_timestamp) + undeploy_resources + local final_cleanup_end=$(get_timestamp) + local final_cleanup_duration=$((final_cleanup_end - final_cleanup_start)) + log_info "Final cleanup completed in $(format_duration $final_cleanup_duration)" + log_step_end "Step 23: Final Cleanup" "$final_cleanup_duration" + else + if [ "$BOOTSTRAP_ONLY" = "true" ]; then + log_info "Skipping final cleanup (--bootstrap-only mode, resources preserved)" + elif [ "$KEEP_RESOURCES" = "true" ]; then + log_info "Skipping final cleanup (--keep-resources mode, resources preserved)" + fi + fi + + local end_time=$(get_timestamp) + local duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$duration") + + log_success "Scenario $scenario_name completed in $formatted_duration" + log_info "Scenario ended at $(formatted_date $end_time)" + + # Create summary report + create_summary_report "$scenario_name" "$vi_type" "$scenario_dir" \ + "$start_time" "$end_time" "$duration" \ + "$cleanup_duration" "$deploy_duration" "$stats_duration" \ + "$stop_duration" "$start_vm_duration" "$undeploy_duration" \ + "$deploy_remaining_duration" "$vm_stats_duration" "$vm_ops_duration" \ + "$vm_ops_stop_duration" "$vm_ops_start_vm_duration" "$migration_duration" \ + "$cleanup_ops_duration" "$migration_percent_duration" "$controller_duration" \ + "$final_stats_duration" "$drain_stats_duration" "$final_cleanup_duration" \ + "$migration_parallel_2x_duration" "$migration_parallel_4x_duration" "$migration_parallel_8x_duration" + + # Summary of all step durations + log_info "=== Scenario $scenario_name Duration Summary ===" + log_duration "Step 1: Cleanup" "$cleanup_duration" + log_duration "Step 2: Check cluster resources" "0" + log_duration "Step 3: VM Deployment" "$deploy_duration" + log_duration "Step 4: Statistics Collection" "$stats_duration" + log_duration "Step 5: VM Stop" "$stop_duration" + log_duration "Step 6: VM Start" "$start_vm_duration" + log_duration "Step 7: Migration Setup" "$migration_duration" + log_duration "Step 8: Undeploy VMs 10%" "$undeploy_duration" + log_duration "Step 9: Deploy VMs 10%" "$deploy_remaining_duration" + log_duration "Step 10: Statistics Collection Deploy 10%" "$vm_stats_duration" + log_duration "Step 11: VM Undeploy 10% VMs" "$undeploy_duration" + log_duration "Step 12: Deploying 10% VMs" "$deploy_remaining_duration" + log_duration "Step 13: VM Statistics" "$vm_stats_duration" + log_duration "Step 14: VM Operations" "$vm_ops_duration" + log_duration "Step 14: VM Operations: Stopping VMs" "$vm_ops_stop_duration" + log_duration "Step 14: VM Operations: Start VMs" "$vm_ops_start_vm_duration" + log_duration "Step 15: Migration Cleanup" "$cleanup_ops_duration" + log_duration "Step 16: Migration Percentage" "$migration_percent_duration" + log_duration "Step 17: Migration parallelMigrationsPerCluster 2x nodes" "$migration_parallel_2x_duration" + log_duration "Step 18: Migration parallelMigrationsPerCluster 4x nodes" "$migration_parallel_4x_duration" + log_duration "Step 19: Migration parallelMigrationsPerCluster 8x nodes" "$migration_parallel_8x_duration" + log_duration "Step 20: Controller Restart" "$controller_duration" + log_duration "Step 21: Final Statistics" "$final_stats_duration" + log_duration "Step 22: Drain node with parallelMigrationsPerCluster 50" "$drain_stats_duration" + log_duration "Step 23: Final Cleanup" "$final_cleanup_duration" + log_duration "Total Scenario Duration" "$duration" + log_info "=== End Duration Summary ===" +} + +# Function to prepare for tests +prepare_for_tests() { + log_info "Preparing for tests" + log_info "Operating System: $OS_TYPE" + + # Clean reports if requested + if [ "${CLEAN_REPORTS:-false}" = "true" ]; then + clean_all_reports + fi + + # remove_report_dir + # stop_migration + # remove_vmops + # undeploy_resources +} + +# Parse command line arguments +parse_arguments "$@" + +# Recalculate resources after parsing command line arguments +PERCENT_RESOURCES=$(( $MAIN_COUNT_RESOURCES * $PERCENT_VMS / 100 )) +if [ $PERCENT_RESOURCES -eq 0 ]; then + PERCENT_RESOURCES=1 +fi + +# Calculate resources for migration percentages +MIGRATION_5_COUNT=$(( $MAIN_COUNT_RESOURCES * $MIGRATION_PERCENTAGE_5 / 100 )) +MIGRATION_10_COUNT=$(( $MAIN_COUNT_RESOURCES * $MIGRATION_PERCENTAGE_10 / 100 )) +if [ $MIGRATION_5_COUNT -eq 0 ]; then + MIGRATION_5_COUNT=1 +fi +if [ $MIGRATION_10_COUNT -eq 0 ]; then + MIGRATION_10_COUNT=1 +fi +# Display configuration +log_info "=== Performance Test Configuration ===" +log_info "Scenario Number: $SCENARIO_NUMBER" +log_info "Resource Count: $MAIN_COUNT_RESOURCES" +log_info "Percent Resources (10%): $PERCENT_RESOURCES" +log_info "Migration 5% Count: $MIGRATION_5_COUNT" +log_info "Migration 10% Count: $MIGRATION_10_COUNT" +log_info "========================================" + +# Main execution +prepare_for_tests + +# Run selected scenario +case $SCENARIO_NUMBER in + 1) + VI_TYPE="persistentVolumeClaim" + run_scenario "scenario_1" "$VI_TYPE" + scenario_exit_code=$? + if [ $scenario_exit_code -eq 0 ]; then + if [ "$BOOTSTRAP_ONLY" = "true" ]; then + log_success "Bootstrap completed successfully" + exit 0 + else + log_success "Scenario 1 (persistentVolumeClaim) completed successfully" + fi + else + log_error "Scenario 1 failed with exit code: $scenario_exit_code" + exit $scenario_exit_code + fi + ;; + 2) + VI_TYPE="containerRegistry" + run_scenario "scenario_2" "$VI_TYPE" + scenario_exit_code=$? + if [ $scenario_exit_code -eq 0 ]; then + if [ "$BOOTSTRAP_ONLY" = "true" ]; then + log_success "Bootstrap completed successfully" + exit 0 + else + log_success "Scenario 2 (containerRegistry) completed successfully" + fi + else + log_error "Scenario 2 failed with exit code: $scenario_exit_code" + exit $scenario_exit_code + fi + ;; + *) + log_error "Invalid scenario number: $SCENARIO_NUMBER. Use 1 or 2." + exit 1 + ;; +esac + +# Handle resource cleanup based on --keep-resources option +if [ "$KEEP_RESOURCES" = "true" ]; then + log_info "=== KEEPING RESOURCES ===" + log_info "Resources will be kept after tests (--keep-resources enabled)" + log_success "All scenarios completed successfully - resources preserved" +else + log_info "=== CLEANING UP RESOURCES ===" + undeploy_resources + log_success "All scenarios completed successfully - resources cleaned up" +fi diff --git a/tests/performance/tests_refactored.sh b/tests/performance/tests_refactored.sh new file mode 100755 index 0000000000..efa1ea7ee9 --- /dev/null +++ b/tests/performance/tests_refactored.sh @@ -0,0 +1,820 @@ +#!/usr/bin/env bash + +set -eEo pipefail +# set -x + +# Performance testing script for Kubernetes Virtual Machines - Refactored Version +# This script provides both full scenario execution and individual step execution capabilities + +# Source all library modules +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "$SCRIPT_DIR/lib/common.sh" +source "$SCRIPT_DIR/lib/vm_operations.sh" +source "$SCRIPT_DIR/lib/vm_operations_batch.sh" +source "$SCRIPT_DIR/lib/migration.sh" +source "$SCRIPT_DIR/lib/statistics.sh" +source "$SCRIPT_DIR/lib/controller.sh" +source "$SCRIPT_DIR/lib/reporting.sh" +source "$SCRIPT_DIR/lib/scenarios.sh" + +# Parse command line arguments +parse_arguments() { + while [[ $# -gt 0 ]]; do + case $1 in + -s|--scenario) + SCENARIO_NUMBER="$2" + shift 2 + ;; + -c|--count) + MAIN_COUNT_RESOURCES="$2" + shift 2 + ;; + --batch-size) + MAX_BATCH_SIZE="$2" + shift 2 + ;; + --enable-batch) + BATCH_DEPLOYMENT_ENABLED=true + shift + ;; + --bootstrap-only) + BOOTSTRAP_ONLY=true + shift + ;; + --continue) + CONTINUE_AFTER_BOOTSTRAP=true + shift + ;; + --keep-resources) + KEEP_RESOURCES=true + shift + ;; + --clean-reports) + CLEAN_REPORTS=true + shift + ;; + --step) + INDIVIDUAL_STEP="$2" + shift 2 + ;; + --from-step) + FROM_STEP="$2" + shift 2 + ;; + --list-steps) + list_available_steps + exit 0 + ;; + --scenario-dir) + SCENARIO_DIR="$2" + shift 2 + ;; + --vi-type) + VI_TYPE="$2" + shift 2 + ;; + --no-pre-cleanup) + NO_PRE_CLEANUP=true + shift + ;; + --no-post-cleanup) + NO_POST_CLEANUP=true + shift + ;; + -h|--help) + show_help + exit 0 + ;; + *) + echo "Unknown option: $1" + show_help + exit 1 + ;; + esac + done +} + +show_help() { + cat << EOF +Usage: $0 [OPTIONS] + +Performance testing script for Kubernetes Virtual Machines + +OPTIONS: + -s, --scenario NUMBER Scenario number to run (1 or 2, default: 1) + -c, --count NUMBER Number of resources to create (default: 2) + --batch-size NUMBER Maximum resources per batch (default: 1200) + --enable-batch Force batch deployment mode + --bootstrap-only Only deploy resources, skip tests + --continue Continue tests after bootstrap (use with --bootstrap-only) + --keep-resources Keep resources after tests (don't cleanup) + --step STEP_NAME Run a specific step only + --from-step STEP_NAME Run all steps starting from STEP_NAME + --list-steps List all available steps + --scenario-dir DIR Directory for scenario data (required for individual steps) + --vi-type TYPE Virtual image type (required for some steps) + --clean-reports Clean all report directories before running + --no-pre-cleanup Do not cleanup resources before running + --no-post-cleanup Do not cleanup resources after running + -h, --help Show this help message + +EXAMPLES: + # Full scenario execution (original behavior) + $0 # Run scenario 1 with 2 resources (default) + $0 -s 1 -c 4 # Run scenario 1 with 4 resources + $0 -s 2 -c 10 # Run scenario 2 with 10 resources + $0 -c 15000 --batch-size 1200 # Deploy 15000 resources in batches of 1200 + + # Individual step execution (new feature) + $0 --list-steps # List available steps + $0 --step cleanup --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim + $0 --step vm-deployment --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim + $0 --step statistics-collection --scenario-dir /path/to/scenario + $0 --step vm-operations --scenario-dir /path/to/scenario + $0 --step vm-undeploy-deploy --scenario-dir /path/to/scenario + $0 --step vm-operations-test --scenario-dir /path/to/scenario + $0 --step migration-tests --scenario-dir /path/to/scenario + $0 --step migration-parallel-2x --scenario-dir /path/to/scenario + $0 --step migration-parallel-4x --scenario-dir /path/to/scenario + $0 --step migration-parallel-8x --scenario-dir /path/to/scenario + $0 --step controller-restart --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim + $0 --step drain-node --scenario-dir /path/to/scenario + $0 --step final-operations --scenario-dir /path/to/scenario + + # Run from a step + $0 --from-step vm-operations --scenario-dir /path/to/scenario --vi-type persistentVolumeClaim + + # Skip cleanup before/after + $0 --no-pre-cleanup --no-post-cleanup + + # Deployment control options + $0 --bootstrap-only -c 1000 # Only deploy 1000 resources, skip tests + $0 --continue -c 1000 # Continue tests after bootstrap + $0 --keep-resources -c 50 # Keep resources after tests (don't cleanup) + +SCENARIOS: + 1 - persistentVolumeClaim (default) + 2 - containerRegistry (currently disabled) + +BATCH DEPLOYMENT: + For large deployments (>1200 resources), the script automatically uses batch deployment. + Each batch deploys up to 1200 resources with 30-second delays between batches. + Use --batch-size to customize batch size and --enable-batch to force batch mode. + +AVAILABLE STEPS: + 1. cleanup - Clean up existing resources + 2. vm-deployment - Deploy VMs with disks + 3. statistics-collection - Gather initial statistics + 4. vm-operations - Stop and start all VMs + 5. vm-undeploy-deploy - Undeploy and redeploy 10% VMs + 6. vm-operations-test - Test stop/start operations on 10% VMs + 7. migration-tests - Run migration tests (5% and 10%) + 8. migration-parallel-2x - Migrate with parallelMigrationsPerCluster at 2x nodes + 9. migration-parallel-4x - Migrate with parallelMigrationsPerCluster at 4x nodes + 10. migration-parallel-8x - Migrate with parallelMigrationsPerCluster at 8x nodes + 11. controller-restart - Test controller restart with VM creation + 12. drain-node - Run drain node workload + 13. final-operations - Final statistics and optional cleanup + +EOF +} + +list_available_steps() { + cat << EOF +Available test steps: + +1. cleanup - Clean up existing resources +2. vm-deployment - Deploy VMs with disks +3. statistics-collection - Gather initial statistics +4. vm-operations - Stop and start all VMs +5. vm-undeploy-deploy - Undeploy and redeploy 10% VMs +6. vm-operations-test - Test stop/start operations on 10% VMs +7. migration-tests - Run migration tests (5% and 10%) +8. migration-parallel-2x - Migrate with parallelMigrationsPerCluster at 2x nodes +9. migration-parallel-4x - Migrate with parallelMigrationsPerCluster at 4x nodes +10. migration-parallel-8x - Migrate with parallelMigrationsPerCluster at 8x nodes +8. controller-restart - Test controller restart with VM creation +11. drain-node - Run drain node workload +12. final-operations - Final statistics and optional cleanup + +Usage: $0 --step STEP_NAME --scenario-dir DIR [--vi-type TYPE] +EOF +} + +# Helper function to get step number +get_step_number() { + local step_name="$1" + local step_number=1 + for step in "${ALL_STEPS[@]}"; do + if [ "$step" = "$step_name" ]; then + echo "$step_number" + return + fi + step_number=$((step_number + 1)) + done + echo "0" +} + +# Individual step execution functions +run_step_cleanup() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "cleanup") + + log_info "=== Running Step $step_number: cleanup ===" + init_logging "step_cleanup" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_step_start "Cleanup existing resources" + local cleanup_start=$(get_timestamp) + stop_migration + remove_vmops + undeploy_resources + local cleanup_end=$(get_timestamp) + local cleanup_duration=$((cleanup_end - cleanup_start)) + log_step_end "Cleanup existing resources" "$cleanup_duration" + + log_success "Cleanup step completed" +} + +run_step_vm_deployment() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "vm-deployment") + + log_info "=== Running Step $step_number: vm-deployment ===" + init_logging "step_vm-deployment" "$vi_type" "$MAIN_COUNT_RESOURCES" + + # Check cluster resources before deployment + log_step_start "Check cluster resources" + check_cluster_resources $MAIN_COUNT_RESOURCES + log_step_end "Check cluster resources" "0" + + log_step_start "Deploy VMs [$MAIN_COUNT_RESOURCES]" + local deploy_start=$(get_timestamp) + deploy_vms_with_disks_smart $MAIN_COUNT_RESOURCES $vi_type + local deploy_end=$(get_timestamp) + local deploy_duration=$((deploy_end - deploy_start)) + log_step_end "Deploy VMs [$MAIN_COUNT_RESOURCES]" "$deploy_duration" + + log_success "VM deployment step completed" +} + +run_step_statistics_collection() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "statistics-collection") + + log_info "=== Running Step $step_number: statistics-collection ===" + init_logging "step_statistics-collection" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_step_start "Statistics Collection" + local stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local stats_end=$(get_timestamp) + local stats_duration=$((stats_end - stats_start)) + log_step_end "Statistics Collection" "$stats_duration" + + log_success "Statistics collection step completed" +} + +run_step_vm_operations() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "vm-operations") + + log_info "=== Running Step $step_number: vm-operations ===" + init_logging "step_vm-operations" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_info "Stopping all VMs [$MAIN_COUNT_RESOURCES]" + log_step_start "VM Stop" + local stop_start=$(get_timestamp) + stop_vm + local stop_end=$(get_timestamp) + local stop_duration=$((stop_end - stop_start)) + log_step_end "VM Stop" "$stop_duration" + + log_info "Waiting 10 seconds before starting VMs" + sleep 10 + + log_info "Starting all VMs [$MAIN_COUNT_RESOURCES]" + log_step_start "VM Start" + local start_vm_start=$(get_timestamp) + start_vm + local start_vm_end=$(get_timestamp) + local start_vm_duration=$((start_vm_end - start_vm_start)) + log_step_end "VM Start" "$start_vm_duration" + + log_success "VM operations step completed" +} + +run_step_vm_undeploy_deploy() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "vm-undeploy-deploy") + + log_info "=== Running Step $step_number: vm-undeploy-deploy ===" + init_logging "step_vm-undeploy-deploy" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_info "Undeploying 10% VMs [$PERCENT_RESOURCES] (keeping disks)" + log_step_start "VM Undeploy 10% VMs [$PERCENT_RESOURCES]" + local undeploy_start=$(get_timestamp) + undeploy_vms_only $PERCENT_RESOURCES + local undeploy_end=$(get_timestamp) + local undeploy_duration=$((undeploy_end - undeploy_start)) + log_step_end "VM Undeploy 10% VMs [$PERCENT_RESOURCES]" "$undeploy_duration" + + log_info "Deploying 10% VMs ([$PERCENT_RESOURCES] VMs)" + log_step_start "Deploying 10% VMs [$PERCENT_RESOURCES]" + local deploy_remaining_start=$(get_timestamp) + deploy_vms_only_smart $MAIN_COUNT_RESOURCES + local deploy_remaining_end=$(get_timestamp) + local deploy_remaining_duration=$((deploy_remaining_end - deploy_remaining_start)) + log_step_end "Deploying 10% VMs [$PERCENT_RESOURCES]" "$deploy_remaining_duration" + + log_success "VM undeploy/deploy step completed" +} + +run_step_vm_operations_test() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "vm-operations-test") + + log_info "=== Running Step $step_number: vm-operations-test ===" + init_logging "step_vm-operations-test" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_info "Testing VM stop/start operations for 10% VMs" + log_step_start "VM Operations Test" + local vm_ops_start=$(get_timestamp) + + log_step_start "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" + local vm_ops_stop_start=$(get_timestamp) + stop_vm $PERCENT_RESOURCES + local vm_ops_stop_end=$(get_timestamp) + local vm_ops_stop_duration=$((vm_ops_stop_end - vm_ops_stop_start)) + log_step_end "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" "$vm_ops_stop_duration" + + sleep 2 + + log_step_start "VM Operations: Start VMs [$PERCENT_RESOURCES]" + local vm_ops_start_vm_start=$(get_timestamp) + start_vm $PERCENT_RESOURCES + local vm_ops_start_vm_end=$(get_timestamp) + local vm_ops_start_vm_duration=$((vm_ops_start_vm_end - vm_ops_start_vm_start)) + log_step_end "VM Operations: Start VMs [$PERCENT_RESOURCES]" "$vm_ops_start_vm_duration" + + local vm_ops_end=$(get_timestamp) + local vm_ops_duration=$((vm_ops_end - vm_ops_start)) + log_step_end "VM Operations Test" "$vm_ops_duration" + + log_success "VM operations test step completed" +} + +run_step_migration_tests() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "migration-tests") + + log_info "=== Running Step $step_number: migration-tests ===" + init_logging "step_migration-tests" "$vi_type" "$MAIN_COUNT_RESOURCES" + + # Start 5% migration in background + local migration_duration_time="0m" + log_info "Starting migration test ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + log_step_start "Migration Setup" + local migration_start=$(get_timestamp) + start_migration $migration_duration_time $MIGRATION_PERCENTAGE_5 + local migration_end=$(get_timestamp) + local migration_duration=$((migration_end - migration_start)) + log_info "Migration test ${MIGRATION_PERCENTAGE_5}% VMs setup completed in $(format_duration $migration_duration)" + log_step_end "Migration Setup ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs) Started" "$migration_duration" + + # VM operations test - stop/start 10% VMs while migration is running in background + log_info "Testing VM stop/start operations for 10% VMs while migration is running" + log_step_start "VM Operations" + local vm_ops_start=$(get_timestamp) + + log_step_start "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" + local vm_ops_stop_start=$(get_timestamp) + stop_vm $PERCENT_RESOURCES + local vm_ops_stop_end=$(get_timestamp) + local vm_ops_stop_duration=$((vm_ops_stop_end - vm_ops_stop_start)) + log_step_end "VM Operations: Stopping VMs [$PERCENT_RESOURCES]" "$vm_ops_stop_duration" + + sleep 2 + + log_step_start "VM Operations: Start VMs [$PERCENT_RESOURCES]" + local vm_ops_start_vm_start=$(get_timestamp) + start_vm $PERCENT_RESOURCES + local vm_ops_start_vm_end=$(get_timestamp) + local vm_ops_start_vm_duration=$((vm_ops_start_vm_end - vm_ops_start_vm_start)) + log_step_end "VM Operations: Start VMs [$PERCENT_RESOURCES]" "$vm_ops_start_vm_duration" + + local vm_ops_end=$(get_timestamp) + local vm_ops_duration=$((vm_ops_end - vm_ops_start)) + log_info "VM operations test completed in $(format_duration $vm_ops_duration)" + log_step_end "VM Operations: Stop/Start VMs [$PERCENT_RESOURCES]" "$vm_ops_duration" + + # Stop migration and wait for completion + log_step_start "Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" + local cleanup_ops_start=$(get_timestamp) + stop_migration + wait_migration_completion + remove_vmops + local cleanup_ops_end=$(get_timestamp) + local cleanup_ops_duration=$((cleanup_ops_end - cleanup_ops_start)) + log_info "Migration stop and cleanup completed in $(format_duration $cleanup_ops_duration)" + log_step_end "Stop Migration ${MIGRATION_PERCENTAGE_5}% (${MIGRATION_5_COUNT} VMs)" "$cleanup_ops_duration" + + # Migration percentage test - Migrate 10% VMs + log_info "Testing migration of ${MIGRATION_10_COUNT} VMs (10%)" + log_step_start "Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" + local migration_percent_start=$(get_timestamp) + migration_percent_vms $MIGRATION_10_COUNT + local migration_percent_end=$(get_timestamp) + local migration_percent_duration=$((migration_percent_end - migration_percent_start)) + log_step_end "Migration Percentage ${MIGRATION_10_COUNT} VMs (10%)" "$migration_percent_duration" + + log_success "Migration tests step completed" +} + +run_step_controller_restart() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "controller-restart") + + log_info "=== Running Step $step_number: controller-restart ===" + init_logging "step_controller-restart" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_info "Testing controller restart with 1 VM creation" + log_step_start "Controller Restart" + local controller_start=$(get_timestamp) + + # Stop controller first + stop_virtualization_controller + + # Create 1 VM and disk while controller is stopped + log_info "Creating 1 VM and disk while controller is stopped [$((MAIN_COUNT_RESOURCES + 1)) VMs total]" + create_vm_while_controller_stopped $vi_type + + # Start controller and measure time for VM to become ready + log_info "Starting controller and waiting for VM to become ready" + start_virtualization_controller + wait_for_new_vm_after_controller_start + local controller_end_time=$(get_timestamp) + local controller_duration=$((controller_end_time - controller_start)) + + log_info "Controller restart test completed in $(format_duration $controller_duration)" + log_step_end "Controller Restart" "$controller_duration" + + log_success "Controller restart step completed" +} + +run_step_final_operations() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "final-operations") + + log_info "=== Running Step $step_number: final-operations ===" + init_logging "step_final-operations" "$vi_type" "$MAIN_COUNT_RESOURCES" + + log_step_start "Final Statistics" + local final_stats_start=$(get_timestamp) + gather_all_statistics "$scenario_dir/statistics" + collect_vpa "$scenario_dir" + local final_stats_end=$(get_timestamp) + local final_stats_duration=$((final_stats_end - final_stats_start)) + log_step_end "Final Statistics" "$final_stats_duration" + + log_info "Waiting 30 seconds before cleanup" + sleep 30 + + if [ "${NO_POST_CLEANUP:-false}" = "true" ]; then + log_warning "Skipping final cleanup as requested" + else + log_step_start "Final Cleanup" + local final_cleanup_start=$(get_timestamp) + undeploy_resources + local final_cleanup_end=$(get_timestamp) + local final_cleanup_duration=$((final_cleanup_end - final_cleanup_start)) + log_step_end "Final Cleanup" "$final_cleanup_duration" + fi + + log_success "Final operations step completed" +} + +# Main execution function for individual steps +run_individual_step() { + local step_name="$1" + local scenario_dir="$2" + local vi_type="$3" + + # Find step number + local step_number=1 + for step in "${ALL_STEPS[@]}"; do + if [ "$step" = "$step_name" ]; then + break + fi + step_number=$((step_number + 1)) + done + + log_info "=== Executing Step $step_number: $step_name ===" + + case "$step_name" in + "cleanup") + run_step_cleanup "$scenario_dir" "$vi_type" + ;; + "vm-deployment") + run_step_vm_deployment "$scenario_dir" "$vi_type" + ;; + "statistics-collection") + run_step_statistics_collection "$scenario_dir" "$vi_type" + ;; + "vm-operations") + run_step_vm_operations "$scenario_dir" "$vi_type" + ;; + "vm-undeploy-deploy") + run_step_vm_undeploy_deploy "$scenario_dir" "$vi_type" + ;; + "vm-operations-test") + run_step_vm_operations_test "$scenario_dir" "$vi_type" + ;; + "migration-tests") + run_step_migration_tests "$scenario_dir" "$vi_type" + ;; + "migration-parallel-2x") + run_step_migration_parallel_2x "$scenario_dir" "$vi_type" + ;; + "migration-parallel-4x") + run_step_migration_parallel_4x "$scenario_dir" "$vi_type" + ;; + "migration-parallel-8x") + run_step_migration_parallel_8x "$scenario_dir" "$vi_type" + ;; + "controller-restart") + run_step_controller_restart "$scenario_dir" "$vi_type" + ;; + "drain-node") + run_step_drain_node "$scenario_dir" "$vi_type" + ;; + "final-operations") + run_step_final_operations "$scenario_dir" "$vi_type" + ;; + *) + log_error "Unknown step: $step_name" + echo "Available steps:" + list_available_steps + exit 1 + ;; + esac +} + +# Additional steps aligned with original tests.sh +run_step_migration_parallel_2x() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "migration-parallel-2x") + + log_info "=== Running Step $step_number: migration-parallel-2x ===" + init_logging "step_migration-parallel-2x" "$vi_type" "$MAIN_COUNT_RESOURCES" + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + local migration_parallel_2x=$(( amountNodes*2 )) + log_info "Testing migration with parallelMigrationsPerCluster [$migration_parallel_2x (2x)]" + log_step_start "Migration parallel 2x" + local start_ts=$(get_timestamp) + scale_deckhouse 0 + migration_config "640Mi" "800" "$migration_parallel_2x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local end_ts=$(get_timestamp) + log_step_end "Migration parallel 2x" "$((end_ts-start_ts))" +} + +run_step_migration_parallel_4x() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "migration-parallel-4x") + + log_info "=== Running Step $step_number: migration-parallel-4x ===" + init_logging "step_migration-parallel-4x" "$vi_type" "$MAIN_COUNT_RESOURCES" + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + local migration_parallel_4x=$(( amountNodes*4 )) + log_info "Testing migration with parallelMigrationsPerCluster [$migration_parallel_4x (4x)]" + log_step_start "Migration parallel 4x" + local start_ts=$(get_timestamp) + migration_config "640Mi" "800" "$migration_parallel_4x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + local end_ts=$(get_timestamp) + log_step_end "Migration parallel 4x" "$((end_ts-start_ts))" +} + +run_step_migration_parallel_8x() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "migration-parallel-8x") + + log_info "=== Running Step $step_number: migration-parallel-8x ===" + init_logging "step_migration-parallel-8x" "$vi_type" "$MAIN_COUNT_RESOURCES" + local amountNodes=$(kubectl get nodes --no-headers -o name | wc -l) + local migration_parallel_8x=$(( amountNodes*8 )) + log_info "Testing migration with parallelMigrationsPerCluster [$migration_parallel_8x (8x)]" + log_step_start "Migration parallel 8x" + local start_ts=$(get_timestamp) + migration_config "640Mi" "800" "$migration_parallel_8x" "1" "150" + migration_percent_vms $MIGRATION_10_COUNT + migration_config + log_info "Restoring original deckhouse controller replicas to [$ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS]" + scale_deckhouse $ORIGINAL_DECHOUSE_CONTROLLER_REPLICAS + local end_ts=$(get_timestamp) + log_step_end "Migration parallel 8x" "$((end_ts-start_ts))" +} + +run_step_drain_node() { + local scenario_dir="$1" + local vi_type="$2" + local step_number=$(get_step_number "drain-node") + + log_info "=== Running Step $step_number: drain-node ===" + init_logging "step_drain-node" "$vi_type" "$MAIN_COUNT_RESOURCES" + log_info "Draining node via workload" + log_step_start "Drain node" + local start_ts=$(get_timestamp) + drain_node + local end_ts=$(get_timestamp) + log_step_end "Drain node" "$((end_ts-start_ts))" +} + +# Ordered steps for full step-runner alignment with original tests.sh +ALL_STEPS=( + cleanup + vm-deployment + statistics-collection + vm-operations + vm-undeploy-deploy + vm-operations-test + migration-tests + migration-parallel-2x + migration-parallel-4x + migration-parallel-8x + controller-restart + drain-node + final-operations +) + +run_steps_from() { + local start_step="$1" + local scenario_dir="$2" + local vi_type="$3" + + local started=false + local step_number=1 + for step in "${ALL_STEPS[@]}"; do + if [ "$started" = false ]; then + if [ "$step" = "$start_step" ]; then + started=true + else + step_number=$((step_number + 1)) + continue + fi + fi + log_info "=== Executing Step $step_number: $step ===" + run_individual_step "$step" "$scenario_dir" "$vi_type" + step_number=$((step_number + 1)) + done +} + +# === Test configuration === +# Default values (can be overridden by command line arguments) +SCENARIO_NUMBER=${SCENARIO_NUMBER:-1} +MAIN_COUNT_RESOURCES=${MAIN_COUNT_RESOURCES:-2} # vms and vds (reduced for testing) +PERCENT_VMS=10 # 10% of total resources +MIGRATION_DURATION="1m" +MIGRATION_PERCENTAGE_10=10 # 10% for migration +MIGRATION_PERCENTAGE_5=10 # 5% for migration +WAIT_MIGRATION=$( echo "$MIGRATION_DURATION" | sed 's/m//' ) + +# Large scale deployment configuration +MAX_BATCH_SIZE=${MAX_BATCH_SIZE:-1200} # Maximum resources per batch +TOTAL_TARGET_RESOURCES=${TOTAL_TARGET_RESOURCES:-15000} # Total target resources +BATCH_DEPLOYMENT_ENABLED=${BATCH_DEPLOYMENT_ENABLED:-false} # Enable batch deployment for large numbers + +# New deployment control options +BOOTSTRAP_ONLY=${BOOTSTRAP_ONLY:-false} # Only deploy resources, skip tests +CONTINUE_AFTER_BOOTSTRAP=${CONTINUE_AFTER_BOOTSTRAP:-false} # Continue tests after bootstrap +KEEP_RESOURCES=${KEEP_RESOURCES:-false} # Keep resources after tests (don't cleanup) + +# Parse command line arguments +parse_arguments "$@" + +# Recalculate resources after parsing command line arguments +PERCENT_RESOURCES=$(( $MAIN_COUNT_RESOURCES * $PERCENT_VMS / 100 )) +if [ $PERCENT_RESOURCES -eq 0 ]; then + PERCENT_RESOURCES=1 +fi + +# Calculate resources for migration percentages +MIGRATION_5_COUNT=$(( $MAIN_COUNT_RESOURCES * $MIGRATION_PERCENTAGE_5 / 100 )) +MIGRATION_10_COUNT=$(( $MAIN_COUNT_RESOURCES * $MIGRATION_PERCENTAGE_10 / 100 )) +if [ $MIGRATION_5_COUNT -eq 0 ]; then + MIGRATION_5_COUNT=1 +fi +if [ $MIGRATION_10_COUNT -eq 0 ]; then + MIGRATION_10_COUNT=1 +fi + +# Display configuration +log_info "=== Performance Test Configuration ===" +log_info "Scenario Number: $SCENARIO_NUMBER" +log_info "Resource Count: $MAIN_COUNT_RESOURCES" +log_info "Percent Resources (10%): $PERCENT_RESOURCES" +log_info "Migration 5% Count: $MIGRATION_5_COUNT" +log_info "Migration 10% Count: $MIGRATION_10_COUNT" +log_info "Batch Size: $MAX_BATCH_SIZE" +log_info "Batch Deployment Enabled: $BATCH_DEPLOYMENT_ENABLED" +log_info "========================================" + +# Main execution +prepare_for_tests + +# Check for bootstrap-only mode +if [ "$BOOTSTRAP_ONLY" = "true" ]; then + log_info "=== BOOTSTRAP ONLY MODE ===" + log_info "Deploying $MAIN_COUNT_RESOURCES resources without running tests" + + # Deploy resources only + case $SCENARIO_NUMBER in + 1) + VI_TYPE="persistentVolumeClaim" + deploy_vms_with_disks_smart $MAIN_COUNT_RESOURCES $VI_TYPE + ;; + 2) + VI_TYPE="containerRegistry" + deploy_vms_with_disks_smart $MAIN_COUNT_RESOURCES $VI_TYPE + ;; + esac + + log_success "Bootstrap completed: $MAIN_COUNT_RESOURCES resources deployed" + log_info "Use --continue to run tests on deployed resources" + exit 0 +fi + +# Check for continue mode +if [ "$CONTINUE_AFTER_BOOTSTRAP" = "true" ]; then + log_info "=== CONTINUE MODE ===" + log_info "Continuing tests on existing resources" + # Skip initial deployment, continue with tests +fi + +# Check if running individual step or full scenario +if [ -n "$INDIVIDUAL_STEP" ] || [ -n "${FROM_STEP:-}" ]; then + # Individual step execution + if [ -z "$SCENARIO_DIR" ]; then + log_error "Scenario directory is required for individual step execution" + echo "Usage: $0 --step $INDIVIDUAL_STEP --scenario-dir DIR [--vi-type TYPE]" + exit 1 + fi + + # Set default VI_TYPE if not provided + if [ -z "$VI_TYPE" ]; then + VI_TYPE="persistentVolumeClaim" + fi + # Optionally skip pre-cleanup by not running cleanup unless requested explicitly + if [ -n "${FROM_STEP:-}" ]; then + log_info "Running from step: $FROM_STEP" + run_steps_from "$FROM_STEP" "$SCENARIO_DIR" "$VI_TYPE" + log_success "From-step execution completed successfully" + else + log_info "Running individual step: $INDIVIDUAL_STEP" + # Respect NO_PRE_CLEANUP/NO_POST_CLEANUP within steps; cleanup step is separate + run_individual_step "$INDIVIDUAL_STEP" "$SCENARIO_DIR" "$VI_TYPE" + log_success "Individual step completed successfully" + fi +else + # Full scenario execution (original behavior) + case $SCENARIO_NUMBER in + 1) + VI_TYPE="persistentVolumeClaim" + run_scenario "scenario_1" "$VI_TYPE" + log_success "Scenario 1 (persistentVolumeClaim) completed successfully" + ;; + 2) + VI_TYPE="containerRegistry" + run_scenario "scenario_2" "$VI_TYPE" + log_success "Scenario 2 (containerRegistry) completed successfully" + ;; + *) + log_error "Invalid scenario number: $SCENARIO_NUMBER. Use 1 or 2." + exit 1 + ;; + esac + + # Handle resource cleanup based on --keep-resources option + if [ "$KEEP_RESOURCES" = "true" ]; then + log_info "=== KEEPING RESOURCES ===" + log_info "Resources will be kept after tests (--keep-resources enabled)" + log_success "All scenarios completed successfully - resources preserved" + else + log_info "=== CLEANING UP RESOURCES ===" + undeploy_resources + log_success "All scenarios completed successfully - resources cleaned up" + fi +fi diff --git a/tests/performance/tools/evicter/cmd/main.go b/tests/performance/tools/evicter/cmd/main.go index 1106dc4007..f8d094ed37 100644 --- a/tests/performance/tools/evicter/cmd/main.go +++ b/tests/performance/tools/evicter/cmd/main.go @@ -24,11 +24,6 @@ import ( ) func main() { - // opts := &slog.HandlerOptions{ - // AddSource: true, // This enables source location logging - // } - // logger := slog.New(slog.NewJSONHandler(os.Stdout, opts)) - // logger := slog.New(slog.NewTextHandler(os.Stdout, opts)) logger := slog.New(slog.NewTextHandler(os.Stdout, nil)) slog.SetDefault(logger) diff --git a/tests/performance/tools/evicter/internal/migration.go b/tests/performance/tools/evicter/internal/migration.go index ea74d1c149..4190353445 100644 --- a/tests/performance/tools/evicter/internal/migration.go +++ b/tests/performance/tools/evicter/internal/migration.go @@ -133,7 +133,17 @@ func (m *ContinuousMigrator) checkAndStartMigrations() { // Filter VMs that are running and not currently migrating var availableVMs []v1alpha2.VirtualMachine - for _, vm := range vmList.Items { + + // Calculate how many VMs should be migrating + // We need migrate only part of VMs, because we don't want migrate last n VMs + targetCount := (m.targetPercentage * len(vmList.Items)) / 100 + if targetCount == 0 { + targetCount = 1 + } + + endSlice := len(vmList.Items) - (16*len(vmList.Items))/100 + + for _, vm := range vmList.Items[:endSlice] { // Only consider VMs that are in Running state and not already migrating if vm.Status.Phase == v1alpha2.MachineRunning { m.mutex.RLock() @@ -146,8 +156,6 @@ func (m *ContinuousMigrator) checkAndStartMigrations() { } } - // Calculate how many VMs should be migrating - targetCount := (m.targetPercentage * len(vmList.Items)) / 100 currentMigrating := len(m.migratingVMs) // Start new migrations if needed @@ -186,8 +194,8 @@ func (m *ContinuousMigrator) startMigration(vm v1alpha2.VirtualMachine) { // Check if VM is still in Running state and not migrating if currentVM.Status.Phase != v1alpha2.MachineRunning { - slog.Info("VM is no longer in Running state, skipping migration", - "vm", vm.Name, + slog.Info("VM is no longer in Running state, skipping migration", + "vm", vm.Name, "currentPhase", currentVM.Status.Phase) return } @@ -196,7 +204,7 @@ func (m *ContinuousMigrator) startMigration(vm v1alpha2.VirtualMachine) { m.mutex.RLock() _, isMigrating := m.migratingVMs[vm.Name] m.mutex.RUnlock() - + if isMigrating { slog.Info("VM is already being migrated, skipping", "vm", vm.Name) return @@ -212,11 +220,11 @@ func (m *ContinuousMigrator) startMigration(vm v1alpha2.VirtualMachine) { // Check if there are any active VMOPs for this VM for _, vmop := range vmopList.Items { if vmop.Spec.VirtualMachine == vm.Name { - if vmop.Status.Phase == v1alpha2.VMOPPhaseInProgress || - vmop.Status.Phase == v1alpha2.VMOPPhasePending { - slog.Info("VM already has active VMOP, skipping migration", - "vm", vm.Name, - "vmop", vmop.Name, + if vmop.Status.Phase == v1alpha2.VMOPPhaseInProgress || + vmop.Status.Phase == v1alpha2.VMOPPhasePending { + slog.Info("VM already has active VMOP, skipping migration", + "vm", vm.Name, + "vmop", vmop.Name, "phase", vmop.Status.Phase) return } @@ -322,4 +330,3 @@ func (m *ContinuousMigrator) isMigrationComplete(vm *v1alpha2.VirtualMachine) bo // Migration is complete if we see Migrating -> Running transition return last.Phase == v1alpha2.MachineRunning && beforeLast.Phase == v1alpha2.MachineMigrating } - diff --git a/tests/performance/tools/netchecker/Taskfile.dist.yaml b/tests/performance/tools/netchecker/Taskfile.dist.yaml new file mode 100644 index 0000000000..43458b2b56 --- /dev/null +++ b/tests/performance/tools/netchecker/Taskfile.dist.yaml @@ -0,0 +1,18 @@ +version: "3" + +silent: true + +vars: + NC_HOST: '{{ .NC_HOST }}' + +tasks: + apply: + desc: "Apply netchecker" + cmds: + - | + if [ -z "$NC_HOST" ]; then + echo "NC_HOST is required" + exit 1 + fi + - sed -i "s/__NC_HOST__/{{ .NC_HOST }}/g" overlays/test/kustomization.yaml + - kubectl apply -k overlays/test \ No newline at end of file diff --git a/tests/performance/tools/netchecker/base/configmap.yaml b/tests/performance/tools/netchecker/base/configmap.yaml new file mode 100644 index 0000000000..544432f3b3 --- /dev/null +++ b/tests/performance/tools/netchecker/base/configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: netchecker-config + namespace: netchecker +data: + SERVER_PORT: "8080" + METRICS_PORT: "9090" + LOG_INTERVAL: "10s" + CLIENT_TIMEOUT: "30s" + LOG_LEVEL: "info" + CLIENT_CLEANUP_TIMEOUT: "10m" diff --git a/tests/performance/tools/netchecker/base/deployment.yaml b/tests/performance/tools/netchecker/base/deployment.yaml new file mode 100644 index 0000000000..04d2ff5c62 --- /dev/null +++ b/tests/performance/tools/netchecker/base/deployment.yaml @@ -0,0 +1,48 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: netchecker + namespace: netchecker + labels: + app: netchecker +spec: + replicas: 1 + selector: + matchLabels: + app: netchecker + template: + metadata: + labels: + app: netchecker + spec: + containers: + - name: netchecker + image: registry-dvp.dev.flant.dev/tools/netchecker:latest + imagePullPolicy: Always + ports: + - containerPort: 8080 + name: http + - containerPort: 9090 + name: metrics + envFrom: + - configMapRef: + name: netchecker-config + # livenessProbe: + # httpGet: + # path: /health + # port: 8080 + # initialDelaySeconds: 30 + # periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "500m" diff --git a/tests/performance/tools/netchecker/base/gdd.yaml b/tests/performance/tools/netchecker/base/gdd.yaml new file mode 100644 index 0000000000..3e67e2655e --- /dev/null +++ b/tests/performance/tools/netchecker/base/gdd.yaml @@ -0,0 +1,344 @@ +apiVersion: deckhouse.io/v1 +kind: GrafanaDashboardDefinition +metadata: + name: netchecker +spec: + folder: "" + definition: | + { + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "fieldMinMax": false, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 2, + "x": 0, + "y": 0 + }, + "id": 3, + "options": { + "colorMode": "none", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showPercentChange": false, + "textMode": "auto", + "wideLayout": true + }, + "pluginVersion": "10.4.19", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "editorMode": "code", + "exemplar": false, + "expr": "netchecker_active_clients", + "format": "time_series", + "instant": true, + "legendFormat": "Active", + "range": false, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "editorMode": "code", + "exemplar": false, + "expr": "netchecker_total_clients", + "hide": false, + "instant": true, + "legendFormat": "Total", + "range": false, + "refId": "B" + } + ], + "title": "Panel Title", + "type": "stat" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 9, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepBefore", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 4, + "w": 22, + "x": 2, + "y": 0 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "editorMode": "code", + "expr": "avg(netchecker_active_clients)", + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "editorMode": "code", + "expr": "avg(netchecker_total_clients)", + "hide": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "B" + } + ], + "title": "Total / Active clients", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "axisSoftMin": 0, + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "stepBefore", + "lineStyle": { + "fill": "solid" + }, + "lineWidth": 1, + "pointSize": 1, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 17, + "w": 24, + "x": 0, + "y": 4 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P0D6E4079E36703EB" + }, + "editorMode": "code", + "expr": "rate(netchecker_client_requests[$__rate_interval])", + "instant": false, + "legendFormat": "{{ client_name }}/{{ client_ip }}", + "range": true, + "refId": "A" + } + ], + "title": "Clients request rate", + "type": "timeseries" + } + ], + "refresh": "", + "schemaVersion": 39, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-24h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "Netchecker dashboard", + "uid": "deyy2oim2cl4wc", + "version": 3, + "weekStart": "" + } diff --git a/tests/performance/tools/netchecker/base/ingress.yaml b/tests/performance/tools/netchecker/base/ingress.yaml new file mode 100644 index 0000000000..c16190ae24 --- /dev/null +++ b/tests/performance/tools/netchecker/base/ingress.yaml @@ -0,0 +1,36 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: netchecker-ingress + namespace: netchecker + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + rules: + - host: nc.example.com + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: nc + port: + number: 8080 + tls: + - hosts: + - nc.example.com + secretName: nc +--- +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: tcpr +spec: + certificateOwnerRef: false + dnsNames: + - nc.example.com + issuerRef: + kind: ClusterIssuer + name: letsencrypt + secretName: nc diff --git a/tests/performance/tools/netchecker/base/kustomization.yaml b/tests/performance/tools/netchecker/base/kustomization.yaml new file mode 100644 index 0000000000..ba155157c0 --- /dev/null +++ b/tests/performance/tools/netchecker/base/kustomization.yaml @@ -0,0 +1,32 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: nc + +resources: +- namespace.yaml +- configmap.yaml +- deployment.yaml +- service.yaml +- ingress.yaml +- servicemonitor.yaml +- gdd.yaml + +patches: + - target: + kind: Ingress + name: netchecker-ingress + patch: |- + - op: replace + path: /spec/rules/0/host + value: nc.e2e.virtlab.flant.com + - op: replace + path: /spec/tls/0/hosts/0 + value: nc.e2e.virtlab.flant.com + - target: + kind: Certificate + name: tcpr + patch: |- + - op: replace + path: /spec/dnsNames/0 + value: nc.e2e.virtlab.flant.com diff --git a/tests/performance/tools/netchecker/base/namespace.yaml b/tests/performance/tools/netchecker/base/namespace.yaml new file mode 100644 index 0000000000..7302ca933a --- /dev/null +++ b/tests/performance/tools/netchecker/base/namespace.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: netchecker + labels: + prometheus.deckhouse.io/monitor-watcher-enabled: "true" + prometheus.deckhouse.io/probe-watcher-enabled: "true" + prometheus.deckhouse.io/rules-watcher-enabled: "true" + prometheus.deckhouse.io/scrape-configs-watcher-enabled: "true" diff --git a/tests/performance/tools/netchecker/base/service.yaml b/tests/performance/tools/netchecker/base/service.yaml new file mode 100644 index 0000000000..b2178a06b9 --- /dev/null +++ b/tests/performance/tools/netchecker/base/service.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: Service +metadata: + name: nc + namespace: nc + labels: + app: netchecker +spec: + selector: + app: netchecker + ports: + - name: http + port: 8080 + targetPort: 8080 + protocol: TCP + - name: metrics + port: 9090 + targetPort: 9090 + protocol: TCP + type: ClusterIP diff --git a/tests/performance/tools/netchecker/base/servicemonitor.yaml b/tests/performance/tools/netchecker/base/servicemonitor.yaml new file mode 100644 index 0000000000..471c6727d9 --- /dev/null +++ b/tests/performance/tools/netchecker/base/servicemonitor.yaml @@ -0,0 +1,15 @@ +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: netchecker-monitor + namespace: netchecker + labels: + prometheus: main +spec: + selector: + matchLabels: + app: netchecker + endpoints: + - port: metrics + path: /metrics + interval: 30s diff --git a/tests/performance/tools/netchecker/overlays/test/kustomization.yaml b/tests/performance/tools/netchecker/overlays/test/kustomization.yaml new file mode 100644 index 0000000000..6ec2b69938 --- /dev/null +++ b/tests/performance/tools/netchecker/overlays/test/kustomization.yaml @@ -0,0 +1,24 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: +- ../../base + +patches: + - target: + kind: Ingress + name: netchecker-ingress + patch: |- + - op: replace + path: /spec/rules/0/host + value: __NC_HOST__ + - op: replace + path: /spec/tls/0/hosts/0 + value: __NC_HOST__ + - target: + kind: Certificate + name: tcpr + patch: |- + - op: replace + path: /spec/dnsNames/0 + value: __NC_HOST__ diff --git a/tests/performance/tools/shatal/Taskfile.yaml b/tests/performance/tools/shatal/Taskfile.yaml index 5da8dbbf11..918eb4e303 100644 --- a/tests/performance/tools/shatal/Taskfile.yaml +++ b/tests/performance/tools/shatal/Taskfile.yaml @@ -12,5 +12,14 @@ tasks: if [[ -n "$KUBECONFIG_BASE64" ]] ; then KUBECONFIG_BASE64=$KUBECONFIG_BASE64 go run cmd/shatal/main.go else - go run cmd/shatal/main.go + KUBECONFIG_BASE64={{ .KUBECONFIG_BASE64 }} go run cmd/shatal/main.go fi + + run:local: + desc: "Run emulation of virtual machine movements" + vars: + KUBECONFIG: '{{ .KUBECONFIG | default "~/.kube/config" }}' + cmds: + - | + KUBECONFIG=$(cat ~/.kube/clusters/srv01/config | base64 -w 0) + KUBECONFIG_BASE64=$KUBECONFIG task run diff --git a/tests/performance/tools/shatal/cmd/shatal/main.go b/tests/performance/tools/shatal/cmd/shatal/main.go index ba0a12f811..c48ed4cb71 100644 --- a/tests/performance/tools/shatal/cmd/shatal/main.go +++ b/tests/performance/tools/shatal/cmd/shatal/main.go @@ -56,7 +56,12 @@ func main() { exit := make(chan os.Signal, 1) signal.Notify(exit, syscall.SIGINT, syscall.SIGTERM) - <-exit - - service.Stop() + // Wait for either a signal or for the service to complete naturally + select { + case <-exit: + service.Stop() + case <-service.Done(): + // Service completed naturally (e.g., when once: true) + service.Stop() + } } diff --git a/tests/performance/tools/shatal/config.yaml b/tests/performance/tools/shatal/config.yaml index 48b07359cb..7c45c664ad 100644 --- a/tests/performance/tools/shatal/config.yaml +++ b/tests/performance/tools/shatal/config.yaml @@ -1,23 +1,24 @@ kubeconfigBase64: "XXX=" -resourcesPrefix: "performance" -namespace: "default" +resourcesPrefix: "perf" +namespace: "perf" interval: "5s" count: 100 debug: true forceInterruption: false drainer: + once: true enabled: true interval: "10s" labelSelector: "!node-role.kubernetes.io/master" creator: - enabled: true + enabled: false interval: "5s" deleter: - enabled: true + enabled: false weight: 1 modifier: - enabled: true + enabled: false weight: 1 nothing: - enabled: true + enabled: false weight: 8 diff --git a/tests/performance/tools/shatal/internal/api/client.go b/tests/performance/tools/shatal/internal/api/client.go index 51362401ce..42c350692a 100644 --- a/tests/performance/tools/shatal/internal/api/client.go +++ b/tests/performance/tools/shatal/internal/api/client.go @@ -119,9 +119,13 @@ func (c *Client) DrainNode(ctx context.Context, node string) error { Client: c.clientset, IgnoreAllDaemonSets: true, DeleteEmptyDirData: true, - PodSelector: "vm=" + c.resourcePrefix, + PodSelector: "vms=" + c.resourcePrefix, Out: logWriter, ErrOut: logWriter, + // Optimize drain performance + Force: true, + GracePeriodSeconds: 30, + Timeout: 5 * time.Minute, OnPodDeletionOrEvictionStarted: func(pod *corev1.Pod, usingEviction bool) { mx.Lock() defer mx.Unlock() diff --git a/tests/performance/tools/shatal/internal/shatal/shatal.go b/tests/performance/tools/shatal/internal/shatal/shatal.go index 73a08db2ea..7ff9751fc3 100644 --- a/tests/performance/tools/shatal/internal/shatal/shatal.go +++ b/tests/performance/tools/shatal/internal/shatal/shatal.go @@ -39,6 +39,7 @@ type Shatal struct { logger *slog.Logger exit chan struct{} + done chan struct{} wg sync.WaitGroup forceInterruption bool @@ -49,6 +50,7 @@ func New(api *api.Client, conf config.Config, log *slog.Logger) (*Shatal, error) api: api, logger: log, exit: make(chan struct{}), + done: make(chan struct{}), forceInterruption: conf.ForceInterruption, } @@ -62,8 +64,14 @@ func New(api *api.Client, conf config.Config, log *slog.Logger) (*Shatal, error) nodeLocks[node.Name] = &sync.Mutex{} } - watcher := NewWatcher(api, conf.Interval, nodeLocks, log) - shatal.runners = append(shatal.runners, watcher) + // Only add watcher if there are operations that need it + needWatcher := conf.Modifier.Enabled || conf.Deleter.Enabled || conf.Nothing.Enabled + + var watcher *Watcher + if needWatcher { + watcher = NewWatcher(api, conf.Interval, nodeLocks, log) + shatal.runners = append(shatal.runners, watcher) + } if conf.Drainer.Enabled { if len(nodes) < 1 { @@ -97,21 +105,27 @@ func New(api *api.Client, conf config.Config, log *slog.Logger) (*Shatal, error) shatal.logger.With("weight", conf.Modifier.Weight).Info("With modifier") modifier := NewModifier(api, conf.Namespace, log) - watcher.Subscribe(modifier, conf.Modifier.Weight) + if watcher != nil { + watcher.Subscribe(modifier, conf.Modifier.Weight) + } } if conf.Deleter.Enabled { shatal.logger.With("weight", conf.Deleter.Weight).Info("With deleter") deleter := NewDeleter(api, log) - watcher.Subscribe(deleter, conf.Deleter.Weight) + if watcher != nil { + watcher.Subscribe(deleter, conf.Deleter.Weight) + } } if conf.Nothing.Enabled { shatal.logger.With("weight", conf.Nothing.Weight).Info("With nothing") nothing := NewNothing(log) - watcher.Subscribe(nothing, conf.Nothing.Weight) + if watcher != nil { + watcher.Subscribe(nothing, conf.Nothing.Weight) + } } return &shatal, nil @@ -132,6 +146,13 @@ func (s *Shatal) Run() { }() } + // Monitor when all runners complete + go func() { + s.wg.Wait() + s.logger.Info("All runners completed") + close(s.done) + }() + go func() { select { case <-s.exit: @@ -147,6 +168,10 @@ func (s *Shatal) Run() { }() } +func (s *Shatal) Done() <-chan struct{} { + return s.done +} + func (s *Shatal) Stop() { s.logger.Info("Stopping...") close(s.exit) diff --git a/tests/performance/tools/statistic/README.md b/tests/performance/tools/statistic/README.md new file mode 100644 index 0000000000..2bfde60e04 --- /dev/null +++ b/tests/performance/tools/statistic/README.md @@ -0,0 +1,114 @@ +# Statistic Tool + +Утилита для сбора статистики по виртуальным машинам, дискам и операциям в Kubernetes кластере. + +## Возможности + +- Сбор статистики по VirtualMachine (VM) +- Сбор статистики по VirtualDisk (VD) +- Сбор статистики по VirtualMachineOperation (VMOP) +- Фильтрация по количеству ресурсов +- Указание папки для сохранения файлов +- Раздельные отчеты для разных типов ресурсов + +## Использование + +### Основные команды + +```bash +# Собрать статистику по всем ресурсам +go run cmd/statistic/main.go -n + +# Собрать статистику только по VM +go run cmd/statistic/main.go -v -n + +# Собрать статистику только по VD +go run cmd/statistic/main.go -d -n + +# Собрать статистику только по VMOP +go run cmd/statistic/main.go -o -n +``` + +### Параметры + +- `-n, --namespace` - namespace для поиска ресурсов (по умолчанию: "perf") +- `-v, --virtualmachine` - собрать статистику по VM +- `-d, --virtualdisk` - собрать статистику по VD +- `-o, --vmop` - собрать статистику по VMOP +- `-O, --output-dir` - папка для сохранения CSV файлов (по умолчанию: ".") +- `-c, --vm-count` - ограничить количество VM для обработки (0 = все) +- `-C, --vd-count` - ограничить количество VD для обработки (0 = все) + +### Примеры использования + +```bash +# Собрать статистику по 10 VM в namespace "perf" и сохранить в папку "reports" +go run cmd/statistic/main.go -v -n perf -O reports -c 10 + +# Собрать статистику по всем VMOP в namespace "test" +go run cmd/statistic/main.go -o -n test -O /tmp/statistics + +# Собрать статистику по 5 VD и 5 VM +go run cmd/statistic/main.go -v -d -n perf -c 5 -C 5 -O ./results +``` + +## Использование через Taskfile + +```bash +# Собрать статистику по VM с ограничением количества +task get-stat:vm NAMESPACE=perf OUTPUT_DIR=./reports VM_COUNT=10 + +# Собрать статистику по VMOP +task get-stat:vmop NAMESPACE=perf OUTPUT_DIR=./reports + +# Собрать статистику по всем ресурсам +task get-stat:all NAMESPACE=perf OUTPUT_DIR=./reports VM_COUNT=5 VD_COUNT=5 +``` + +## Выходные файлы + +Утилита создает следующие файлы: + +- `all-vm--.csv` - детальная статистика по VM +- `all-vd--.csv` - детальная статистика по VD +- `all-vmop--.csv` - детальная статистика по VMOP +- `avg-vm--.csv` - средние значения по VM +- `avg-vd--.csv` - средние значения по VD +- `avg-vmop--.csv` - средние значения по VMOP + +## Статистика по VM + +- **WaitingForDependencies** - время ожидания зависимостей +- **VirtualMachineStarting** - время запуска виртуальной машины +- **GuestOSAgentStarting** - время запуска гостевого агента + +## Статистика по VD + +- **WaitingForDependencies** - время ожидания зависимостей +- **DVCRProvisioning** - время подготовки DVCR +- **TotalProvisioning** - общее время подготовки + +## Статистика по VMOP + +- **Phase** - текущая фаза операции +- **Duration** - продолжительность операции +- **StartTime** - время начала операции +- **EndTime** - время окончания операции + +## Интеграция с тестами производительности + +Утилита интегрирована с тестами производительности и может использоваться для: + +1. Сбора статистики по определенному количеству VM (например, по 10% VM) +2. Раздельного сбора статистики по VM и VMOP +3. Сохранения отчетов в структурированные папки + +Пример использования в тестах: + +```bash +# Собрать статистику только по 10% VM (2 VM из 20) +task get-stat:vm NAMESPACE=perf OUTPUT_DIR=./scenario_1_persistentVolumeClaim/statistics VM_COUNT=2 + +# Собрать статистику по VMOP после миграции +task get-stat:vmop NAMESPACE=perf OUTPUT_DIR=./scenario_1_persistentVolumeClaim/statistics +``` diff --git a/tests/performance/tools/statistic/Taskfile.yaml b/tests/performance/tools/statistic/Taskfile.yaml index d93c113789..0588cd300d 100644 --- a/tests/performance/tools/statistic/Taskfile.yaml +++ b/tests/performance/tools/statistic/Taskfile.yaml @@ -3,18 +3,21 @@ version: "3" silent: true vars: - NS: '{{ .NS | default "perf" }}' + NAMESPACE: '{{ .NAMESPACE | default "perf" }}' + OUTPUT_DIR: '{{ .OUTPUT_DIR | default "." }}' + VM_COUNT: '{{ .VM_COUNT | default "0" }}' + VD_COUNT: '{{ .VD_COUNT | default "0" }}' tasks: get-stat:vd: desc: "Run collect stat from vds" cmds: - - go run cmd/statistic/main.go -d -n {{.NS}} + - go run cmd/statistic/main.go -d -n {{.NAMESPACE}} -O {{.OUTPUT_DIR}} -C {{.VD_COUNT}} get-stat:vm: desc: "Run collect stat from vms" cmds: - - go run cmd/statistic/main.go -v -n {{.NS}} + - go run cmd/statistic/main.go -v -n {{.NAMESPACE}} -O {{.OUTPUT_DIR}} -c {{.VM_COUNT}} get-stat:all: desc: "Run collect stat from vds and vms" cmds: - - go run cmd/statistic/main.go -n {{.NS}} + - go run cmd/statistic/main.go -n {{.NAMESPACE}} -O {{.OUTPUT_DIR}} -c {{.VM_COUNT}} -C {{.VD_COUNT}} diff --git a/tests/performance/tools/statistic/internal/helpers/helper.go b/tests/performance/tools/statistic/internal/helpers/helper.go index 6537e8d14c..95364b33e8 100644 --- a/tests/performance/tools/statistic/internal/helpers/helper.go +++ b/tests/performance/tools/statistic/internal/helpers/helper.go @@ -97,13 +97,10 @@ func DurationToString(d *metav1.Duration) string { return dur } -func SaveToFile(content string, resType string, ns string) { +func SaveToFile(content string, resType string, ns string, outputDir string) { filepath := fmt.Sprintf("/%s-%s-%s.csv", resType, ns, time.Now().Format("2006-01-02_15-04-05")) - execpath, err := os.Getwd() - if err != nil { - os.Exit(1) - } - file, err := os.Create(execpath + filepath) + + file, err := os.Create(outputDir + filepath) if err != nil { fmt.Printf("Error creating file: %v\n", err) return diff --git a/tests/performance/tools/statistic/internal/vd/get_vd_stat.go b/tests/performance/tools/statistic/internal/vd/get_vd_stat.go index a2f24b3b75..ac0e4207c3 100644 --- a/tests/performance/tools/statistic/internal/vd/get_vd_stat.go +++ b/tests/performance/tools/statistic/internal/vd/get_vd_stat.go @@ -39,14 +39,10 @@ type VDs struct { Items []VD `json:"items"` } -func (vds *VDs) SaveToCSV(ns string) { +func (vds *VDs) SaveToCSV(ns string, outputDir string) { filepath := fmt.Sprintf("/all-%s-%s-%s.csv", "vd", ns, time.Now().Format("2006-01-02_15-04-05")) - execpath, err := os.Getwd() - if err != nil { - os.Exit(1) - } - file, err := os.Create(execpath + filepath) + file, err := os.Create(outputDir + filepath) if err != nil { os.Exit(1) } @@ -77,7 +73,13 @@ func (vds *VDs) SaveToCSV(ns string) { fmt.Println("Data of VD saved successfully to csv", file.Name()) } -func GetStatistic(client kubeclient.Client, namespace string) { +func GetStatistic(client kubeclient.Client, namespace string, outputDir string, vdCount int) { + // Create output directory if it doesn't exist + if err := os.MkdirAll(outputDir, 0755); err != nil { + fmt.Printf("Failed to create output directory: %v\n", err) + os.Exit(1) + } + var ( vds VDs sumWaitingForDependencies float64 @@ -88,14 +90,19 @@ func GetStatistic(client kubeclient.Client, namespace string) { // Limit & Continue for separete call res vdList, err := client.VirtualDisks(namespace).List(context.TODO(), metav1.ListOptions{}) if err != nil { - fmt.Printf("Failed to get vm: %v\n", err) + fmt.Printf("Failed to get vd: %v\n", err) os.Exit(1) } totalItems := len(vdList.Items) + processedCount := 0 for _, vd := range vdList.Items { if string(vd.Status.Phase) == "Ready" { + // If vdCount is specified and we've reached the limit, break + if vdCount > 0 && processedCount >= vdCount { + break + } vds.Items = append(vds.Items, VD{ Name: vd.Name, @@ -109,24 +116,32 @@ func GetStatistic(client kubeclient.Client, namespace string) { sumWaitingForDependencies += helpers.ToSeconds(vd.Status.Stats.CreationDuration.WaitingForDependencies) sumDVCRProvisioning += helpers.ToSeconds(vd.Status.Stats.CreationDuration.DVCRProvisioning) sumTotalProvisioning += helpers.ToSeconds(vd.Status.Stats.CreationDuration.TotalProvisioning) + processedCount++ } } - avgWaitingForDependencies := sumWaitingForDependencies / float64(totalItems) - avgDVCRProvisioning := sumDVCRProvisioning / float64(totalItems) - avgTotalProvisioning := sumTotalProvisioning / float64(totalItems) + // Use processed count for averages + actualCount := processedCount + if actualCount == 0 { + actualCount = 1 // Avoid division by zero + } + + avgWaitingForDependencies := sumWaitingForDependencies / float64(actualCount) + avgDVCRProvisioning := sumDVCRProvisioning / float64(actualCount) + avgTotalProvisioning := sumTotalProvisioning / float64(actualCount) saveData := fmt.Sprintf( "Total VDs count: %d\n"+ + "Processed VDs count: %d\n"+ "Average WaitingForDependencies in seconds: %.2f\n"+ "Average DVCRProvisioning in seconds: %.2f\n"+ "Average TotalProvisioning in seconds: %.2f\n", - totalItems, avgWaitingForDependencies, avgDVCRProvisioning, avgTotalProvisioning, + totalItems, processedCount, avgWaitingForDependencies, avgDVCRProvisioning, avgTotalProvisioning, ) - helpers.SaveToFile(saveData, "vd", namespace) + helpers.SaveToFile(saveData, "avg-vd", namespace, outputDir) fmt.Println(saveData) - vds.SaveToCSV(namespace) + vds.SaveToCSV(namespace, outputDir) } diff --git a/tests/performance/tools/statistic/internal/vm/get_vm_stat.go b/tests/performance/tools/statistic/internal/vm/get_vm_stat.go index d955c74c85..63ddcc9b57 100644 --- a/tests/performance/tools/statistic/internal/vm/get_vm_stat.go +++ b/tests/performance/tools/statistic/internal/vm/get_vm_stat.go @@ -40,14 +40,10 @@ type VMs struct { Items []VM `json:"items"` } -func (vms *VMs) SaveToCSV(ns string) { +func (vms *VMs) SaveToCSV(ns string, outputDir string) { filepath := fmt.Sprintf("/all-%s-%s-%s.csv", "vm", ns, time.Now().Format("2006-01-02_15-04-05")) - execpath, err := os.Getwd() - if err != nil { - os.Exit(1) - } - file, err := os.Create(execpath + filepath) + file, err := os.Create(outputDir + filepath) if err != nil { os.Exit(1) } @@ -78,7 +74,13 @@ func (vms *VMs) SaveToCSV(ns string) { fmt.Println("Data of VD saved successfully to csv", file.Name()) } -func GetStatistic(client kubeclient.Client, namespace string) { +func GetStatistic(client kubeclient.Client, namespace string, outputDir string, vmCount int) { + // Create output directory if it doesn't exist + if err := os.MkdirAll(outputDir, 0755); err != nil { + fmt.Printf("Failed to create output directory: %v\n", err) + os.Exit(1) + } + vmList, err := client.VirtualMachines(namespace).List(context.TODO(), metav1.ListOptions{}) if err != nil { fmt.Printf("Failed to get vm: %v\n", err) @@ -93,9 +95,15 @@ func GetStatistic(client kubeclient.Client, namespace string) { ) totalItems := len(vmList.Items) + processedCount := 0 + // Filter VMs by count if specified for _, vm := range vmList.Items { if string(vm.Status.Phase) == "Running" { + // If vmCount is specified and we've reached the limit, break + if vmCount > 0 && processedCount >= vmCount { + break + } vms.Items = append(vms.Items, VM{ Name: vm.Name, @@ -109,26 +117,34 @@ func GetStatistic(client kubeclient.Client, namespace string) { sumWaitingForDependencies += helpers.ToSeconds(vm.Status.Stats.LaunchTimeDuration.WaitingForDependencies) sumVirtualMachineStarting += helpers.ToSeconds(vm.Status.Stats.LaunchTimeDuration.VirtualMachineStarting) sumGuestOSAgentStarting += helpers.ToSeconds(vm.Status.Stats.LaunchTimeDuration.GuestOSAgentStarting) + processedCount++ } } - avgWaitingForDependencies := sumWaitingForDependencies / float64(totalItems) - avgVirtualMachineStarting := sumVirtualMachineStarting / float64(totalItems) - avgGuestOSAgentStarting := sumGuestOSAgentStarting / float64(totalItems) + // Use processed count for averages + actualCount := processedCount + if actualCount == 0 { + actualCount = 1 // Avoid division by zero + } + + avgWaitingForDependencies := sumWaitingForDependencies / float64(actualCount) + avgVirtualMachineStarting := sumVirtualMachineStarting / float64(actualCount) + avgGuestOSAgentStarting := sumGuestOSAgentStarting / float64(actualCount) saveData := fmt.Sprintf( "Total VMs count: %d\n"+ + "Processed VMs count: %d\n"+ "Average WaitingForDependencies in seconds: %.2f\n"+ "Average VirtualMachineStarting in seconds: %.2f\n"+ "Average GuestOSAgentStarting in seconds: %.2f\n", - totalItems, avgWaitingForDependencies, avgVirtualMachineStarting, avgGuestOSAgentStarting, + totalItems, processedCount, avgWaitingForDependencies, avgVirtualMachineStarting, avgGuestOSAgentStarting, ) - helpers.SaveToFile(saveData, "vm", namespace) + helpers.SaveToFile(saveData, "avg-vm", namespace, outputDir) fmt.Println(saveData) - vms.SaveToCSV(namespace) + vms.SaveToCSV(namespace, outputDir) } func getStoppingAndStoppedDuration(vm v1alpha2.VirtualMachine) time.Duration { diff --git a/tests/performance/tools/statistic/internal/vmop/get_vmop_stat.go b/tests/performance/tools/statistic/internal/vmop/get_vmop_stat.go new file mode 100644 index 0000000000..06f059785a --- /dev/null +++ b/tests/performance/tools/statistic/internal/vmop/get_vmop_stat.go @@ -0,0 +1,169 @@ +/* +Copyright 2025 Flant JSC + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package vmop + +import ( + "context" + "encoding/csv" + "fmt" + "os" + "time" + + "statistic/internal/helpers" + + "github.com/deckhouse/virtualization/api/client/kubeclient" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type VMOP struct { + Name string `json:"name"` + Phase string `json:"phase"` + Duration time.Duration `json:"duration"` + StartTime metav1.Time `json:"startTime"` + EndTime metav1.Time `json:"endTime"` +} + +type VMOPs struct { + Items []VMOP `json:"items"` +} + +func (vmops *VMOPs) SaveToCSV(ns string, outputDir string) { + filepath := fmt.Sprintf("/all-%s-%s-%s.csv", "vmop", ns, time.Now().Format("2006-01-02_15-04-05")) + + file, err := os.Create(outputDir + filepath) + if err != nil { + os.Exit(1) + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + header := []string{"Name", "Phase", "Duration", "StartTime", "EndTime"} + if err := writer.Write(header); err != nil { + fmt.Printf("Error writing header to CSV file: %v\n", err) + os.Exit(1) + } + + for _, res := range vmops.Items { + data := []string{ + res.Name, + res.Phase, + helpers.DurationToString(&metav1.Duration{Duration: res.Duration}), + res.StartTime.Format(time.RFC3339), + res.EndTime.Format(time.RFC3339), + } + if err := writer.Write(data); err != nil { + fmt.Printf("Error writing data to CSV file: %v\n", err) + os.Exit(1) + } + } + fmt.Println("Data of VMOP saved successfully to csv", file.Name()) +} + +func GetStatistic(client kubeclient.Client, namespace string, outputDir string) { + vmopList, err := client.VirtualMachineOperations(namespace).List(context.TODO(), metav1.ListOptions{}) + if err != nil { + fmt.Printf("Failed to get vmop: %v\n", err) + os.Exit(1) + } + + var ( + vmops VMOPs + sumDuration float64 + ) + + totalItems := len(vmopList.Items) + processedCount := 0 + + for _, vmop := range vmopList.Items { + // Calculate duration from start to end + var duration time.Duration + var startTime, endTime metav1.Time + + // Find start and end times from conditions + if len(vmop.Status.Conditions) > 0 { + // Sort conditions by LastTransitionTime to find earliest and latest + conditions := vmop.Status.Conditions + + // Find the earliest (start) time + startTime = conditions[0].LastTransitionTime + for _, condition := range conditions { + if condition.LastTransitionTime.Time.Before(startTime.Time) { + startTime = condition.LastTransitionTime + } + } + + // Find the latest (end) time + endTime = conditions[0].LastTransitionTime + for _, condition := range conditions { + if condition.LastTransitionTime.Time.After(endTime.Time) { + endTime = condition.LastTransitionTime + } + } + } + + // Only calculate duration if we have valid start and end times + if !endTime.IsZero() && !startTime.IsZero() && endTime.Time.After(startTime.Time) { + duration = endTime.Time.Sub(startTime.Time) + } else { + // If we can't determine duration, set to 0 + duration = 0 + } + + vmops.Items = append(vmops.Items, VMOP{ + Name: vmop.Name, + Phase: string(vmop.Status.Phase), + Duration: duration, + StartTime: startTime, + EndTime: endTime, + }) + + // Only add to sum if duration is positive + if duration.Seconds() > 0 { + sumDuration += duration.Seconds() + } + processedCount++ + } + + // Calculate average duration (only for VMOPs with positive duration) + avgDuration := float64(0) + validDurationCount := 0 + for _, vmop := range vmops.Items { + if vmop.Duration.Seconds() > 0 { + validDurationCount++ + } + } + + if validDurationCount > 0 { + avgDuration = sumDuration / float64(validDurationCount) + } + + saveData := fmt.Sprintf( + "Total VMOPs count: %d\n"+ + "Processed VMOPs count: %d\n"+ + "Valid Duration VMOPs: %d\n"+ + "Average Duration in seconds: %.2f\n", + totalItems, processedCount, validDurationCount, avgDuration, + ) + + helpers.SaveToFile(saveData, "avg-vmop", namespace, outputDir) + + fmt.Println(saveData) + + vmops.SaveToCSV(namespace, outputDir) +} diff --git a/tests/performance/tools/statistic/pkg/command/statistic.go b/tests/performance/tools/statistic/pkg/command/statistic.go index b0f57dc025..9ba08ef433 100644 --- a/tests/performance/tools/statistic/pkg/command/statistic.go +++ b/tests/performance/tools/statistic/pkg/command/statistic.go @@ -31,6 +31,9 @@ var ( namespace string virtualmachine bool virtualdisk bool + outputDir string + vmCount int + vdCount int ) var rootCmd = &cobra.Command{ @@ -55,6 +58,9 @@ func init() { rootCmd.Flags().StringVarP(&namespace, "namespace", "n", "perf", "namespace to look for the VMs,VDs, default 'perf'") rootCmd.Flags().BoolVarP(&virtualmachine, "virtualmachine", "v", false, "get virtualmachine statistics") rootCmd.Flags().BoolVarP(&virtualdisk, "virtualdisk", "d", false, "get virtualdisk statistics") + rootCmd.Flags().StringVarP(&outputDir, "output-dir", "O", ".", "output directory for CSV files") + rootCmd.Flags().IntVarP(&vmCount, "vm-count", "c", 0, "limit number of VMs to process (0 = all)") + rootCmd.Flags().IntVarP(&vdCount, "vd-count", "C", 0, "limit number of VDs to process (0 = all)") } func getStatistic(cmd *cobra.Command, args []string) { @@ -64,11 +70,11 @@ func getStatistic(cmd *cobra.Command, args []string) { getAll := !virtualmachine && !virtualdisk if getAll || virtualmachine { - vm.GetStatistic(client, namespace) + vm.GetStatistic(client, namespace, outputDir, vmCount) } if getAll || virtualdisk { - vd.GetStatistic(client, namespace) + vd.GetStatistic(client, namespace, outputDir, vdCount) } } diff --git a/tests/performance/values.yaml b/tests/performance/values.yaml index 26cc5b1227..32e43379fc 100644 --- a/tests/performance/values.yaml +++ b/tests/performance/values.yaml @@ -1,9 +1,11 @@ -count: 1 +start_count: 16000 +count: 2 nginx: false +curlUrl: "http://nc.nc.svc.cluster.local:8080/ping" resources: storageClassName: "ceph-pool-r2-csi-rbd" default: all # all, vms, vds, vi - prefix: "performance" + prefix: "perf" virtualMachine: spec: template: @@ -19,19 +21,23 @@ resources: restartApprovalMode: Dynamic cpu: cores: 1 - coreFraction: 10% + coreFraction: 5% memory: size: 256Mi virtualDisk: + annotation: true spec: template: type: virtualDisk # virtualImage or virtualDisk size: 300Mi virtualImage: + annotation: true spec: + count: 0 template: - # Virtual image type virtualImage or persistentVolumeClaim - type: virtualImage + # Virtual image type containerRegistry or persistentVolumeClaim + type: persistentVolumeClaim + # type: persistentVolumeClaim image: name: alpine url: "https://0e773854-6b4e-4e76-a65b-d9d81675451a.selstorage.ru/alpine/alpine-v3-20.qcow2" diff --git a/tests/performance/vd-deploy.sh b/tests/performance/vd-deploy.sh new file mode 100755 index 0000000000..5785bfc5c8 --- /dev/null +++ b/tests/performance/vd-deploy.sh @@ -0,0 +1,279 @@ +#!/usr/bin/env bash + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +MAX_BATCH_SIZE=1000 + +if [ -z "$NAMESPACE" ]; then + NAMESPACE="perf" +fi + +SLEEP_TIME=5 + +# === +LOG_FILE="vd-deploy_$(date +"%Y%m%d_%H%M%S").log" +# === + +# == datete functions == +get_current_date() { + date +"%Y-%m-%d %H:%M:%S" +} + +get_current_date() { + date +"%H:%M:%S %d-%m-%Y" +} + +get_timestamp() { + date +%s +} + +format_duration() { + local total_seconds=$1 + local hours=$((total_seconds / 3600)) + local minutes=$(( (total_seconds % 3600) / 60 )) + local seconds=$((total_seconds % 60)) + printf "%02d:%02d:%02d\n" "$hours" "$minutes" "$seconds" +} + +formatted_date() { + local timestamp="$1" + + # Check if timestamp is valid (not empty and is a number) + if [ -z "$timestamp" ] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then + # Use current time if timestamp is invalid + date +"%H:%M:%S %d-%m-%Y" + return + fi + + # Use OS-specific date command + case "$OS_TYPE" in + "macOS") + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + "Linux") + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + *) + # Fallback - try both methods + if date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # macOS style worked + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" + elif date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # Linux style worked + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" + else + # Last resort - use current time + date +"%H:%M:%S %d-%m-%Y" + fi + ;; + esac +} + +# === + + +exit_trap() { + echo "" + echo "Cleanup" + echo "Exiting..." + exit 0 +} + +trap exit_trap SIGINT SIGTERM + +get_default_storage_class() { + if [ -n "${STORAGE_CLASS:-}" ]; then + echo "$STORAGE_CLASS" + else + kubectl get storageclass -o json \ + | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name' + fi +} + +log_info() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${BLUE}[INFO]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [INFO] $message" >> "$LOG_FILE" + fi +} + +log_success() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${GREEN}[SUCCESS]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [SUCCESS] $message" >> "$LOG_FILE" + fi +} + +log_warning() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${YELLOW}[WARNING]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [WARNING] $message" >> "$LOG_FILE" + fi +} + +log_error() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${RED}[ERROR]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [ERROR] $message" >> "$LOG_FILE" + fi +} + +wait_vd() { + local sleep_time=${1:-10} + local expected_count=$2 + local VDReady + local VDTotal + + while true; do + VDReady=$(kubectl -n $NAMESPACE get vd | grep "Ready" | wc -l) + + if [ -n "$expected_count" ]; then + VDTotal=$expected_count + else + VDTotal=$(kubectl -n $NAMESPACE get vd -o name | wc -l) + fi + + if [ $VDReady -eq $VDTotal ]; then + echo "All vds are ready" + echo "$(formatted_date $(get_timestamp))" + echo "" + break + fi + + echo "" + echo "Waiting for vds to be ready..." + echo "VD ready: $VDReady/$VDTotal" + echo "" + echo "Waiting for $sleep_time seconds..." + sleep $sleep_time + echo "" + + done +} + +show_deployment_progress() { + local current_count=$1 + local total_count=$2 + local batch_number=$3 + local total_batches=$4 + local start_time=$5 + + local current_time=$(get_timestamp) + local elapsed_time=$((current_time - start_time)) + local progress_percent=$(( (current_count * 100) / total_count )) + + # Calculate estimated time remaining + local estimated_total_time=0 + local estimated_remaining_time=0 + if [ $current_count -gt 0 ]; then + estimated_total_time=$(( (elapsed_time * total_count) / current_count )) + estimated_remaining_time=$((estimated_total_time - elapsed_time)) + fi + + log_info "Progress: $current_count/$total_count ($progress_percent%)" + log_info "Batch: $batch_number/$total_batches" + log_info "Elapsed: $(format_duration $elapsed_time)" + if [ $estimated_remaining_time -gt 0 ]; then + log_info "Estimated remaining: $(format_duration $estimated_remaining_time)" + fi +} + +deploy_disks_only_batch() { + local total_count=$1 + local vi_type=$2 + local batch_size=${3:-$MAX_BATCH_SIZE} + local start_time=$(get_timestamp) + + log_info "Starting batch deployment of $total_count disks from $vi_type" + log_info "Batch size: $batch_size resources per batch" + log_info "Start time: $(formatted_date $start_time)" + + local deployed_count=0 + local batch_number=1 + local total_batches=$(( (total_count + batch_size - 1) / batch_size )) + + log_info "Total batches to deploy: $total_batches" + + while [ $deployed_count -lt $total_count ]; do + local remaining_count=$((total_count - deployed_count)) + local current_batch_size=$batch_size + + # Adjust batch size for the last batch if needed + if [ $remaining_count -lt $batch_size ]; then + current_batch_size=$remaining_count + fi + + log_info "=== Batch $batch_number/$total_batches ===" + show_deployment_progress "$deployed_count" "$total_count" "$batch_number" "$total_batches" "$start_time" + + local batch_start=$(get_timestamp) + + # Deploy current batch of disks (COUNT should be cumulative, not absolute) + local cumulative_count=$((deployed_count + current_batch_size)) + log_info "Deploying disk batch $batch_number: $current_batch_size new disks (total will be: $cumulative_count)" + task apply:disks \ + COUNT=$cumulative_count \ + NAMESPACE=$NAMESPACE \ + STORAGE_CLASS=$(get_default_storage_class) \ + VIRTUALDISK_TYPE=virtualDisk \ + VIRTUALIMAGE_TYPE=$vi_type + + # Wait for current batch to be ready + wait_vd $SLEEP_TIME + + local batch_end=$(get_timestamp) + local batch_duration=$((batch_end - batch_start)) + deployed_count=$((deployed_count + current_batch_size)) + + log_success "Batch $batch_number completed in $(format_duration $batch_duration)" + log_info "Total deployed so far: $deployed_count/$total_count" + + # Add delay between batches to avoid overwhelming the system + if [ $batch_number -lt $total_batches ]; then + log_info "Waiting 30 seconds before next batch..." + sleep 30 + fi + + ((batch_number++)) + done + + local end_time=$(get_timestamp) + local total_duration=$((end_time - start_time)) + local formatted_duration=$(format_duration "$total_duration") + + log_success "Batch disk deployment completed: $deployed_count disks in $formatted_duration" + log_info "Average time per disk: $(( total_duration / deployed_count )) seconds" + log_info "End time: $(formatted_date $end_time)" + + echo "$total_duration" + log_info "$total_duration" +} + +# ======= +main() { + local total_vd=${1:-15000} + # containerRegistry or persistentVolumeClaim + local disk_type=${2:-persistentVolumeClaim} + local batch_size=${3:-1000} + echo "VDs" > $LOG_FILE + log_info "Start Deploying disks [$total_vd]" + deploy_disks_only_batch $total_vd $disk_type $batch_size + + log_success "Disk deployment completed" +} + +main $@ \ No newline at end of file diff --git a/tests/performance/vd-pvc-snapshot.sh b/tests/performance/vd-pvc-snapshot.sh new file mode 100755 index 0000000000..46e5ad1d80 --- /dev/null +++ b/tests/performance/vd-pvc-snapshot.sh @@ -0,0 +1,310 @@ +#!/usr/bin/env bash + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +CYAN='\033[0;36m' +NC='\033[0m' # No Color + +SLEEP_TIME=5 + +# === +LOG_FILE="vs-pvc-deploy_$(date +"%Y%m%d_%H%M%S").log" +# === + +# == datete functions == +formatted_date() { + local timestamp="$1" + + # Check if timestamp is valid (not empty and is a number) + if [ -z "$timestamp" ] || ! [[ "$timestamp" =~ ^[0-9]+$ ]]; then + # Use current time if timestamp is invalid + date +"%H:%M:%S %d-%m-%Y" + return + fi + + # Use OS-specific date command + case "$OS_TYPE" in + "macOS") + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + "Linux") + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null || date +"%H:%M:%S %d-%m-%Y" + ;; + *) + # Fallback - try both methods + if date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # macOS style worked + date -r "$timestamp" +"%H:%M:%S %d-%m-%Y" + elif date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" 2>/dev/null; then + # Linux style worked + date -d "@$timestamp" +"%H:%M:%S %d-%m-%Y" + else + # Last resort - use current time + date +"%H:%M:%S %d-%m-%Y" + fi + ;; + esac +} + +get_current_date() { + date +"%H:%M:%S %d-%m-%Y" +} + +get_timestamp() { + date +%s +} + +format_duration() { + local total_seconds=$1 + local hours=$((total_seconds / 3600)) + local minutes=$(( (total_seconds % 3600) / 60 )) + local seconds=$((total_seconds % 60)) + printf "%02d:%02d:%02d\n" "$hours" "$minutes" "$seconds" +} + +# === + + +exit_trap() { + echo "" + echo "Cleanup" + echo "Exiting..." + exit 0 +} + +trap exit_trap SIGINT SIGTERM + +get_default_storage_class() { + if [ -n "${STORAGE_CLASS:-}" ]; then + echo "$STORAGE_CLASS" + else + kubectl get storageclass -o json \ + | jq -r '.items[] | select(.metadata.annotations."storageclass.kubernetes.io/is-default-class" == "true") | .metadata.name' + fi +} + +log_info() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${BLUE}[INFO]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [INFO] $message" >> "$LOG_FILE" + fi +} + +log_success() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${GREEN}[SUCCESS]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [SUCCESS] $message" >> "$LOG_FILE" + fi +} + +log_warning() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${YELLOW}[WARNING]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [WARNING] $message" >> "$LOG_FILE" + fi +} + +log_error() { + local message="$1" + local timestamp=$(get_current_date) + echo -e "${RED}[ERROR]${NC} $message" + if [ -n "$LOG_FILE" ]; then + echo "[$timestamp] [ERROR] $message" >> "$LOG_FILE" + fi +} + + +create_vi() { + local vi_name=${1:-"perf-persistentvolumeclaim"} + local ns=${2:-"perf-vs"} + local sc=${3:-"ceph-pool-r2-csi-rbd"} + + kubectl apply -f - </dev/null;then + echo "NS exist" + else + kubectl create ns ${ns} + echo "NS ${ns} created" + fi +} + +# GLOBAL Values +NAMESPACE="perf-vs" +SC="ceph-pool-r2-csi-rbd-immediate" +VS_NAME="vs-perf" +PVC_NAME="pvc-perf" +VI_NAME="vi-perf" + + +main(){ + local pvc_count=${1:-1000} + local start_time=$(get_timestamp) + + local pvc_name="" + local pvc_bound=0 + + create_ns ${NAMESPACE} + + log_info "Starting deployment volume_snapshot" + log_info "Start time: $(formatted_date $start_time)" + + log_info "Create virtual image" + create_vi ${VI_NAME} ${NAMESPACE} ${SC} + + log_info "Create snapshot" + create_vs ${VS_NAME} ${VI_NAME} ${NAMESPACE} ${SC} + + log_info "Create persistent volume claim $pvc_count" + for i in $(seq 0 $(($pvc_count-1)) ); do + pvc_name=$(printf "%s-%05d" ${PVC_NAME} ${i}) + create_pvc ${pvc_name} ${VS_NAME} ${NAMESPACE} ${SC} + done + + log_info "Wait for bound" + local bound_start=$(get_timestamp) + while true; do + log_info "Waiting for bound" + sleep ${SLEEP_TIME} + pvc_bound=$(kubectl -n ${NAMESPACE} get pvc -l test=blockdevices -o jsonpath='{.items[?(@.status.phase=="Bound")].metadata.name}' 2>/dev/null | wc -w) + + if [ $pvc_bound -eq $pvc_count ]; then + local end_time=$(get_timestamp) + local total_duration=$((end_time - start_time)) + local bound_duration=$((end_time - bound_start)) + log_info "Bound duration: $(format_duration $bound_duration)" + log_info "Total duration: $(format_duration $total_duration)" + + local formatted_duration=$(format_duration "$total_duration") + log_success "Bound all volumes in $formatted_duration" + break + fi + + log_info "Waiting for bound... $pvc_bound/$pvc_count" + + done + +} + + +main $@ + + + +# новый sc +# k delete validatingwebhookconfigurations.admissionregistration.k8s.io d8-csi-ceph-sc-validation +# создать новый sc с только imageFeatures: layering \ No newline at end of file