update fix_store_after_hazard (#15309) #3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Unit Tests | |
| env: | |
| # increment this when downloads substantially change to avoid the internet | |
| CACHE_VERSION: '18' | |
| CAPTURE_PROCESS_REPLAY: 1 | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| PYTHONPATH: ${{ github.workspace }} | |
| CHECK_OOB: 1 | |
| on: | |
| push: | |
| branches: | |
| - master | |
| pull_request: | |
| workflow_dispatch: | |
| jobs: | |
| llvmspeed: | |
| name: LLVM Speed | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: llvm-speed | |
| deps: testing_unit | |
| llvm: 'true' | |
| - name: Speed Test | |
| run: CPU=1 CPU_LLVM=1 THREADS=0 python3 test/speed/external_test_speed_v_torch.py | |
| - name: Speed Test (BEAM=2) | |
| run: BEAM=2 CPU=1 CPU_LLVM=1 THREADS=0 python3 test/speed/external_test_speed_v_torch.py | |
| docs: | |
| name: Docs | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 10 | |
| env: | |
| CHECK_OOB: 0 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| deps: docs | |
| pydeps: "capstone torch" | |
| - name: Build wheel and show size | |
| run: | | |
| pip install build | |
| python -m build --wheel --outdir dist | |
| ls -lh dist/*.whl | |
| - name: Use as an external package | |
| run: | | |
| mkdir $HOME/test_external_dir | |
| cd $HOME/test_external_dir | |
| python -m venv venv | |
| source venv/bin/activate | |
| pip install $GITHUB_WORKSPACE | |
| python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
| pip install mypy | |
| mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
| - name: Run beautiful_mnist with tinygrad only | |
| run: | | |
| mkdir $GITHUB_WORKSPACE/test_dir | |
| cd $GITHUB_WORKSPACE/test_dir | |
| python -m venv venv | |
| source venv/bin/activate | |
| pip install $GITHUB_WORKSPACE | |
| cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py . | |
| BS=2 STEPS=10 python beautiful_mnist.py | |
| - name: Test Docs Build | |
| run: python -m mkdocs build --strict | |
| - name: Test Docs | |
| run: python docs/abstractions3.py | |
| - name: Test README | |
| run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && python README.py | |
| - name: Test Quickstart | |
| run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && python quickstart.py | |
| - name: Test DEBUG | |
| run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" | |
| - name: Compile EfficientNet to C and test it | |
| run: | | |
| CPU=1 CPU_LLVM=0 python examples/compile_efficientnet.py > recognize.c | |
| clang -O2 recognize.c -lm -o recognize | |
| cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock | |
| torchbackend: | |
| name: Torch Backend Tests | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: torch-backend-pillow-torchvision-et-pt | |
| deps: testing_unit | |
| pydeps: "pillow torchvision expecttest" | |
| llvm: 'true' | |
| - name: Install ninja | |
| run: | | |
| sudo apt update || true | |
| sudo apt install -y --no-install-recommends ninja-build | |
| - name: Test one op | |
| run: FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Test ResNet-18 | |
| run: DEBUG=2 python3 extra/torch_backend/example.py | |
| - name: custom tests | |
| run: python3 -m pytest -n auto extra/torch_backend/test.py --durations=20 | |
| - name: Test one op in torch tests | |
| run: DEBUG=2 python3 extra/torch_backend/torch_tests.py TestTinyBackendPRIVATEUSE1.test_unary_log_tiny_float32 | |
| - name: Test Ops with TINY_BACKEND | |
| run: CPU=1 CPU_LLVM=1 LLVMOPT=0 TINY_BACKEND=1 python3 -m pytest -n auto test/backend/test_ops.py --durations=20 | |
| - name: Test in-place operations on views | |
| run: TORCH_DEBUG=1 python3 extra/torch_backend/test_inplace.py | |
| - name: Test multi-gpu | |
| run: CPU=1 CPU_LLVM=1 GPUS=4 TORCH_DEBUG=1 python3 extra/torch_backend/test_multigpu.py | |
| - name: Test kernel fusion | |
| run: python3 extra/torch_backend/test_kernel_fusion.py | |
| torchbackendmore: | |
| name: Torch Backend Tests More | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: torch-backend-pillow-torchvision-et-pt | |
| deps: testing_unit | |
| llvm: 'true' | |
| - name: Install ninja | |
| run: | | |
| sudo apt update || true | |
| sudo apt install -y --no-install-recommends ninja-build | |
| - name: Test beautiful_mnist in torch with TINY_BACKEND | |
| run: STEPS=20 CPU=1 TARGET_EVAL_ACC_PCT=90.0 TINY_BACKEND=1 python3 examples/other_mnist/beautiful_mnist_torch.py | |
| - name: Test some torch tests (expect failure) | |
| run: python3 -m pytest extra/torch_backend/torch_tests.py -v --tb=no || true | |
| bepython: | |
| name: Python Backend | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: be-minimal | |
| deps: testing_unit | |
| - name: Test dtype with Python emulator | |
| run: DEBUG=1 PYTHON=1 python3 -m pytest -n=auto test/backend/test_dtype.py test/backend/test_dtype_alu.py | |
| - name: Test ops with Python emulator | |
| run: DEBUG=2 SKIP_SLOW_TEST=1 PYTHON=1 python3 -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| - name: Test uops with Python emulator | |
| run: PYTHON=1 python3 -m pytest test/backend/test_uops.py --durations=20 | |
| - name: Test symbolic with Python emulator | |
| run: PYTHON=1 python3 test/backend/test_symbolic_ops.py | |
| - name: test_renderer_failures with Python emulator | |
| run: PYTHON=1 python3 -m pytest -rA test/backend/test_renderer_failures.py::TestRendererFailures | |
| - name: Test IMAGE=2 support | |
| run: | | |
| IMAGE=2 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm | |
| IMAGE=2 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_simple_conv2d | |
| - name: Test emulated METAL tensor cores | |
| run: | | |
| DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_big_gemm | |
| DEBUG=2 EMULATE=METAL FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py | |
| - name: Test emulated AMX tensor cores | |
| run: DEBUG=2 AMX=1 EMULATE=AMX FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm | |
| - name: Test emulated AMD tensor cores | |
| run: | | |
| DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py | |
| - name: Test emulated AMD MFMA tensor cores | |
| run: | | |
| DEBUG=2 EMULATE=AMD_MFMA FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD_MFMA FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py | |
| - name: Test emulated AMD RDNA4 tensor cores | |
| run: | | |
| DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 ATOL=1e-3 python3 ./extra/gemm/simple_matmul.py | |
| DEBUG=2 EMULATE=AMD_RDNA4 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py | |
| - name: Test emulated CUDA tensor cores | |
| run: | | |
| DEBUG=2 EMULATE=CUDA FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm_fp16 | |
| DEBUG=2 EMULATE=CUDA ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm | |
| DEBUG=2 EMULATE=CUDA_SM75 FORWARD_ONLY=1 PYTHON=1 python3 test/backend/test_ops.py TestOps.test_gemm_fp16 | |
| DEBUG=2 EMULATE=CUDA_SM89 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py | |
| - name: Test emulated INTEL OpenCL tensor cores | |
| run: DEBUG=2 EMULATE=INTEL FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py | |
| - name: Test emulated AMX tensor cores | |
| run: DEBUG=2 AMX=1 EMULATE=AMX FORWARD_ONLY=1 PYTHON=1 python3 test/opt/test_tensor_cores.py | |
| - name: Test device flop counts | |
| run: | | |
| DEBUG=2 EMULATE=METAL PYTHON=1 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| DEBUG=2 EMULATE=AMD PYTHON=1 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| DEBUG=2 EMULATE=CUDA PYTHON=1 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| DEBUG=2 EMULATE=INTEL PYTHON=1 python3 ./test/null/test_uops_stats.py TestUOpsStatsMatmulHalf | |
| DEBUG=2 AMX=1 EMULATE=AMX PYTHON=1 python3 ./test/null/test_uops_stats.py TestUOpsStats.test_simple_matmul | |
| linter: | |
| name: Linters | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: linting-only | |
| python-version: '3.11' | |
| deps: linting | |
| - name: Lint bad-indentation and trailing-whitespace with pylint | |
| run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . | |
| - name: Run pre-commit linting hooks | |
| run: SKIP=tiny,tests,example pre-commit run --all-files | |
| - name: Lint additional files with ruff | |
| run: | | |
| python3 -m ruff check examples/mlperf/ --ignore E501 | |
| python3 -m ruff check extra/thunder/tiny/ --ignore E501 --ignore F841 --ignore E722 | |
| python3 -m ruff check extra/torch_backend/backend.py | |
| - name: Run mypy with lineprecision report | |
| run: | | |
| python -m mypy --lineprecision-report . | |
| grep -v autogen lineprecision.txt | awk 'NR>2 {lines+=$2; precise+=$3; imprecise+=$4; any+=$5; empty+=$6} END {t=lines-empty; printf "TOTAL: %d lines, %d precise (%.1f%%), %d imprecise (%.1f%%), %d any (%.1f%%)\n", t, precise, 100*precise/t, imprecise, 100*imprecise/t, any, 100*any/t}' | |
| cat lineprecision.txt | |
| - name: Run TYPED=1 | |
| run: CHECK_OOB=0 DEV=CPU TYPED=1 python test/test_tiny.py | |
| nulltest: | |
| name: Null Tests | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: unittest-13 | |
| pydeps: "pillow ftfy regex pre-commit" | |
| deps: testing_unit | |
| llvm: 'true' | |
| amd: 'true' | |
| - name: Run NULL backend tests | |
| run: NULL=1 python -m pytest -n=auto test/null/ --durations=20 | |
| - name: Run targetted tests on NULL backend | |
| run: NULL=1 python3 -m unittest test.backend.test_multitensor.TestMultiTensor.test_data_parallel_resnet_train_step | |
| # TODO: too slow | |
| # - name: Run SDXL on NULL backend | |
| # run: NULL=1 DEBUG=1 python3 examples/sdxl.py --seed 0 --noshow --timing --fakeweights | |
| - name: Run Clip tests for SD MLPerf on NULL backend | |
| run: NULL=1 python -m pytest -n=auto test/external/mlperf_stable_diffusion/external_test_models.py::TestOpenClip --durations=20 | |
| - name: Run AMD emulated BERT training on NULL backend | |
| run: EMULATE=AMD_RDNA4 NULL=1 NULL_ALLOW_COPYOUT=1 CAPTURE_PROCESS_REPLAY=0 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=66 GPUS=1 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | |
| # TODO: support fake weights | |
| #- name: Run LLaMA 7B on 4 fake devices | |
| # run: NULL=1 python3 examples/llama.py --gen 1 --size 7B --shard 4 --prompt "Hello." --count 3 --temperature 0 --timing | |
| unittest: | |
| name: Unit Tests | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: unittest-13 | |
| pydeps: "pillow ftfy regex pre-commit" | |
| deps: testing_unit | |
| llvm: 'true' | |
| amd: 'true' | |
| - name: Run pre-commit test hooks | |
| run: SKIP=ruff,mypy pre-commit run --all-files | |
| - name: Check Device.DEFAULT | |
| run: python -c "from tinygrad import Device; assert Device.DEFAULT == 'CPU', Device.DEFAULT" | |
| - name: Run unit tests | |
| run: | | |
| CPU=1 python test/null/test_device.py TestRunAsModule.test_module_runs | |
| CPU=1 python -m pytest -n=auto test/unit/ --durations=20 | |
| - name: Run GC tests | |
| run: python test/external/external_uop_gc.py | |
| - name: External Benchmark Schedule | |
| run: python3 test/external/external_benchmark_schedule.py | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| - name: Regen dataset on test_tiny | |
| run: | | |
| test/external/process_replay/reset.py | |
| CAPTURE_PROCESS_REPLAY=1 python test/test_tiny.py TestTiny.test_plus | |
| python extra/optimization/extract_dataset.py | |
| gzip -c /tmp/sops > extra/datasets/sops.gz | |
| #DEBUG=1 MIN_ASTS=1 python extra/optimization/get_action_space.py | |
| - name: Repo line count < 24000 lines | |
| run: MAX_LINE_COUNT=24000 python sz.py | |
| spec: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| group: [1, 2] | |
| name: SPEC=2 (${{ matrix.group }}) | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: spec-unit | |
| deps: testing_unit | |
| python-version: '3.14' | |
| - name: Test SPEC=2 | |
| run: SPEC=2 pytest --maxfail=10 -n auto --durations=30 test/unit test/backend test/opt --ignore test/backend/test_custom_kernel.py --ignore test/unit/test_hashing.py --timeout 60 -k "not test_setitem_big" --splits 2 --group ${{ matrix.group }} | |
| fuzzing: | |
| name: Fuzzing | |
| runs-on: ubuntu-latest | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: fuzzing-unit | |
| deps: testing_unit | |
| - name: Fuzz Test symbolic | |
| run: python test/external/fuzz_symbolic.py | |
| - name: Fuzz Test symbolic (symbolic divisors) | |
| run: python test/external/fuzz_symbolic_symbolic_div.py | |
| - name: Fuzz Test fast idiv | |
| run: python test/external/fuzz_fast_idiv.py | |
| - name: Fuzz Test shape ops | |
| run: python test/external/fuzz_shape_ops.py | |
| testopenclimage: | |
| name: CL IMAGE Tests | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: gpu-image | |
| deps: testing_unit | |
| opencl: 'true' | |
| - name: Test CL IMAGE=2 ops | |
| run: | | |
| CL=1 IMAGE=2 python -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| # TODO: training is broken | |
| # CL=1 IMAGE=2 python test/models/test_end2end.py TestEnd2End.test_linear_mnist | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testgpumisc: | |
| name: CL Misc tests | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: gen-dataset | |
| deps: testing | |
| opencl: 'true' | |
| - name: Generate Dataset | |
| run: CL=1 extra/optimization/generate_dataset.sh | |
| - name: Run Kernel Count Test | |
| run: CL=1 python -m pytest -n=auto test/external/external_test_opt.py | |
| - name: Run fused optimizer tests | |
| run: CL=1 FUSE_OPTIM=1 python -m pytest -n=auto test/models/test_mnist.py test/backend/test_optim.py -k "not muon" | |
| - name: Upload artifact | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: sops.gz | |
| path: /tmp/sops.gz | |
| testopenpilot: | |
| name: openpilot Compile Tests | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: openpilot-compile | |
| deps: testing | |
| opencl: 'true' | |
| llvm: 'true' | |
| - name: Test openpilot model kernel count and gate usage | |
| run: | | |
| ALLOWED_KERNEL_COUNT=123 ALLOWED_READ_IMAGE=1397 ALLOWED_GATED_READ_IMAGE=94 FLOAT16=1 CL=1 IMAGE=2 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916 | |
| - name: Test openpilot CL compile fp16 | |
| run: FLOAT16=1 DEBUGCL=1 CL=1 IMAGE=2 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916 | |
| - name: Test openpilot CL compile fp32 (test correctness) | |
| run: DEBUGCL=1 CL=1 IMAGE=2 SELFTEST=1 python examples/openpilot/compile3.py https://github.com/haraschax/filedump/raw/refs/heads/master/driving_vision_fp32.onnx | |
| - name: Test openpilot LLVM compile fp16 | |
| run: FLOAT16=1 CPU=1 CPU_LLVM=1 python examples/openpilot/compile3.py https://gitlab.com/commaai/openpilot-lfs.git/gitlab-lfs/objects/cf6376aa9a090f0da26c280ef69eabf9bbdd51d1faac9ed392919c3db69be916 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| # ****** ONNX Tests ****** | |
| testonnxcpu: | |
| name: ONNX (CPU) Tests | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: onnxoptc | |
| deps: testing | |
| python-version: '3.12' | |
| llvm: 'true' | |
| - name: Test ONNX (CPU) | |
| run: CPU=1 CPU_LLVM=0 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
| - name: Test ONNX (LLVM) | |
| run: CPU=1 CPU_LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
| - name: Test ONNX Runner (CPU) | |
| run: CPU=1 CPU_LLVM=0 python3 test/external/external_test_onnx_runner.py | |
| - name: Test Additional ONNX Ops (CPU) | |
| run: CPU=1 CPU_LLVM=0 python3 test/external/external_test_onnx_ops.py | |
| - name: Test Quantize ONNX | |
| run: CPU=1 CPU_LLVM=0 python3 test/backend/test_quantize_onnx.py | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testopencl: | |
| name: ONNX (CL)+Optimization Tests | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: onnxoptl | |
| deps: testing | |
| pydeps: "tensorflow==2.19" | |
| python-version: '3.12' | |
| opencl: 'true' | |
| - name: Test ONNX (CL) | |
| run: CL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
| #- name: Test Optimization Helpers | |
| # run: DEBUG=1 python3 extra/optimization/test_helpers.py | |
| #- name: Test Action Space | |
| # run: DEBUG=1 CL=1 python3 extra/optimization/get_action_space.py | |
| - name: Test Beam Search | |
| run: CL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
| - name: Test MLPerf stuff | |
| run: CL=1 python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20 | |
| - name: NULL=1 beautiful_mnist_multigpu | |
| run: NULL=1 NULL_ALLOW_COPYOUT=1 python examples/beautiful_mnist_multigpu.py | |
| - name: Test Bert training | |
| run: NULL=1 NULL_ALLOW_COPYOUT=1 DEFAULT_FLOAT=HALF BENCHMARK=10 BS=24 GPUS=4 BERT_LAYERS=2 MODEL=bert python3 examples/mlperf/model_train.py | |
| - name: Test llama 3 training | |
| run: NULL=1 NULL_ALLOW_COPYOUT=1 SAMPLES=300 BS=8 SEQLEN=512 GRADIENT_ACC_STEPS=1 FAKEDATA=1 DEFAULT_FLOAT=bfloat16 OPTIM_DTYPE=bfloat16 LLAMA3_SIZE=1B MODEL=llama3 python3 examples/mlperf/model_train.py | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testllm: | |
| name: Test LLM | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 15 | |
| env: | |
| CHECK_OOB: 0 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: apps_llm | |
| - name: Test 1B LLM | |
| run: echo "What's a male chicken called? Answer with only one word." | MAX_BUFFER_SIZE=0 python3 -m tinygrad.apps.llm | grep -i rooster | |
| # ****** Models Tests ****** | |
| testmodels: | |
| name: Models (llvm+cpu+gpu) | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: models | |
| deps: testing | |
| opencl: 'true' | |
| llvm: 'true' | |
| - name: Test models (llvm) | |
| run: CPU=1 CPU_LLVM=1 python -m pytest -n=auto test/models --durations=20 | |
| - name: Test models (opencl) | |
| run: CL=1 python -m pytest -n=auto test/models --durations=20 | |
| - name: Test models (cpu) | |
| run: CPU=1 CPU_LLVM=0 python -m pytest -n=auto test/models --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testmetalmodels: | |
| name: Models (metal) | |
| runs-on: macos-14 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: metal | |
| deps: testing | |
| python-version: '3.12' | |
| - name: Test models (Metal) | |
| run: METAL=1 python -m pytest -n=auto test/models --durations=20 | |
| - name: Test LLaMA compile speed | |
| run: METAL=1 python test/external/external_test_speed_llama.py | |
| # ****** Feature Tests ****** | |
| testdevectorize: | |
| name: Linux (devectorize) | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: devectorize-minimal | |
| deps: testing_unit | |
| pydeps: "pillow" | |
| llvm: "true" | |
| - name: Test LLVM=1 DEVECTORIZE=0 | |
| run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/backend/test_ops.py | |
| - name: Test LLVM=1 DEVECTORIZE=0 for model | |
| run: CPU=1 CPU_LLVM=1 DEVECTORIZE=0 python3 test/models/test_efficientnet.py | |
| - name: Test CPU=1 DEVECTORIZE=0 | |
| run: CPU=1 CPU_LLVM=0 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/backend/test_ops.py | |
| testdsp: | |
| name: Linux (DSP) | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: dsp-minimal | |
| deps: testing_unit | |
| pydeps: "onnx==1.18.0 onnxruntime ml_dtypes" | |
| llvm: "true" | |
| - name: Set up Docker Buildx | |
| uses: docker/setup-buildx-action@v3 | |
| - name: Build QEMU Docker with cache | |
| uses: docker/build-push-action@v4 | |
| with: | |
| file: extra/dsp/Dockerfile | |
| push: false | |
| load: true | |
| tags: qemu-hexagon:latest | |
| cache-from: type=gha | |
| cache-to: ${{ github.event_name != 'pull_request' && 'type=gha,mode=min' || '' }} | |
| - name: Set MOCKDSP env | |
| run: printf "MOCKDSP=1" >> $GITHUB_ENV | |
| - name: Run test_tiny on DSP | |
| run: DEBUG=2 DSP=1 python test/test_tiny.py | |
| - name: Test transcendentals | |
| run: CC=clang-20 DEBUG=2 DSP=1 python test/backend/test_transcendental.py TestTranscendentalVectorized | |
| - name: Test quantize onnx | |
| run: DEBUG=2 DSP=1 python3 test/backend/test_quantize_onnx.py | |
| testwebgpu: | |
| name: Linux (WebGPU) | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: webgpu-minimal | |
| deps: testing_unit | |
| python-version: '3.12' | |
| webgpu: 'true' | |
| - name: Check Device.DEFAULT (WEBGPU) and print some source | |
| run: | | |
| WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT" | |
| WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run selected webgpu tests | |
| run: | | |
| WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/backend --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testamdasm: | |
| name: AMD ASM IDE | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 20 | |
| env: | |
| AMD: 1 | |
| PYTHON_REMU: 1 | |
| MOCKGPU: 1 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: rdna3-emu | |
| deps: testing_unit | |
| amd: 'true' | |
| python-version: '3.14' | |
| - name: Verify AMD autogen is up to date | |
| run: | | |
| python -m tinygrad.renderer.amd.generate | |
| git diff --exit-code tinygrad/runtime/autogen/amd/ | |
| - name: Install LLVM 21 | |
| run: | | |
| wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key | sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc | |
| echo "deb http://apt.llvm.org/$(lsb_release -cs)/ llvm-toolchain-$(lsb_release -cs)-21 main" | sudo tee /etc/apt/sources.list.d/llvm.list | |
| sudo apt-get update | |
| sudo apt-get install llvm-21 llvm-21-tools cloc | |
| - name: Install rocprof-trace-decoder | |
| run: sudo PYTHONPATH="." ./extra/sqtt/install_rocprof_decoder.py | |
| - name: Run AMD renderer tests | |
| run: AMD_LLVM=0 python -m pytest -n=auto test/amd/ --durations 20 | |
| - name: Run AMD renderer tests (AMD_LLVM=1) | |
| run: AMD_LLVM=1 python -m pytest -n=auto test/amd/ --durations 20 | |
| - name: Run SQTT profiling tests | |
| run: PROFILE=1 SQTT=1 python3 -m pytest -n=auto test/amd/test_sqtt_profiler.py | |
| - name: Run AMD emulated tests on NULL backend | |
| env: | |
| AMD: 0 | |
| run: | | |
| PYTHONPATH=. NULL=1 EMULATE=AMD python extra/mmapeak/mmapeak.py | |
| PYTHONPATH=. NULL=1 EMULATE=AMD_CDNA4 python3 -m pytest -n=auto test/testextra/test_tk.py test/backend/test_asm_gemm.py | |
| - name: Run ASM matmul on MOCKGPU | |
| run: PYTHONPATH="." AMD=1 MOCKGPU=1 N=256 python3 extra/gemm/amd_asm_matmul.py | |
| - name: Run LLVM test | |
| run: AMD_LLVM=1 python test/device/test_amd_llvm.py | |
| testmockam: | |
| name: Linux (am) | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 15 | |
| env: | |
| AMD: 1 | |
| MOCKGPU: 1 | |
| AMD_IFACE: PCI | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: mockam | |
| deps: testing_unit | |
| amd: 'true' | |
| - name: Run test_tiny on MOCKAM | |
| run: python test/test_tiny.py | |
| - name: Run test_tiny on MOCKAM USB | |
| run: AMD_IFACE=USB python test/test_tiny.py | |
| - name: Run test_hcq on MOCKAM | |
| run: python -m pytest test/device/test_hcq.py | |
| testamd: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [amd, amdllvm] | |
| arch: [rdna3, rdna4] | |
| #arch: [rdna3, rdna4, cdna4] | |
| name: Linux (${{ matrix.backend }} ${{ matrix.arch }}) | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 15 | |
| env: | |
| AMD: 1 | |
| MOCKGPU: 1 | |
| MOCKGPU_ARCH: ${{ matrix.arch }} | |
| SKIP_SLOW_TEST: 1 | |
| AMD_LLVM: ${{ matrix.backend == 'amdllvm' && '1' || matrix.backend != 'amdllvm' && '0' }} | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: ${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| amd: 'true' | |
| llvm: ${{ matrix.backend == 'amdllvm' && 'true' }} | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['AMD'], Device.DEFAULT" | |
| DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run pytest (amd) | |
| run: python -m pytest -n=auto test/backend/test_ops.py test/backend/test_dtype.py test/backend/test_dtype_alu.py test/backend/test_linearizer.py test/backend/test_randomness.py test/backend/test_jit.py test/backend/test_graph.py test/backend/test_multitensor.py test/device/test_hcq.py test/testextra/test_cfg_viz.py test/external/external_test_am.py --durations=20 | |
| - name: Run TRANSCENDENTAL math | |
| run: TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testnvidia: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [ptx, nv] | |
| name: Linux (${{ matrix.backend }}) | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 20 | |
| env: | |
| MOCKGPU: 1 | |
| FORWARD_ONLY: 1 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: ${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| cuda: 'true' | |
| ocelot: 'true' | |
| - name: Set env | |
| run: printf "${{ matrix.backend == 'PTX' && 'CUDA=1\nCUDA_PTX=1' || matrix.backend == 'nv' && 'NV=1\nSKIP_SLOW_TEST=1' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CUDA','NV'], Device.DEFAULT" | |
| DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run pytest (cuda) | |
| # skip multitensor because it's slow | |
| run: python -m pytest -n=auto test/backend --ignore test/backend/test_multitensor.py --durations=20 | |
| - name: Run TestOps.test_add with PMA | |
| run: VIZ=-1 PMA=1 DEBUG=5 python3 test/backend/test_ops.py TestOps.test_add | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| testcpuopencl: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [llvm, cpu, opencl, lvp] | |
| name: Linux (${{ matrix.backend }}) | |
| runs-on: ubuntu-22.04 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: ${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| opencl: ${{ matrix.backend == 'opencl' && 'true' }} | |
| llvm: ${{ matrix.backend == 'llvm' || matrix.backend == 'lvp' }} | |
| mesa: ${{ matrix.backend == 'lvp' && 'true' }} | |
| - name: Set env | |
| run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'opencl' && 'CL=1' || matrix.backend == 'lvp' && 'CPU=1\nCPU_LVP=1' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['CPU','CL'], Device.DEFAULT" | |
| DEBUG=5 FORWARD_ONLY=1 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run pytest (${{ matrix.backend }}) | |
| run: python -m pytest -n=auto test/backend --durations=20 | |
| - name: Run TRANSCENDENTAL math | |
| run: TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| # ****** OSX Tests ****** | |
| testmetal: | |
| name: MacOS (unit) | |
| runs-on: macos-14 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: metal | |
| deps: testing | |
| python-version: '3.12' | |
| amd: 'true' | |
| cuda: 'true' | |
| ocelot: 'true' | |
| llvm: 'true' | |
| - name: Run unit tests | |
| run: METAL=1 python -m pytest -n=auto test/unit/ --durations=20 | |
| - name: Run NULL backend tests | |
| run: NULL=1 python -m pytest -n=auto test/null/ --durations=20 | |
| - name: Run ONNX | |
| run: METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
| - name: Test tensor core ops (fake) | |
| run: METAL=1 DEBUG=3 TC=2 python test/backend/test_ops.py TestOps.test_gemm | |
| - name: Test tensor core ops (real) | |
| run: METAL=1 DEBUG=3 python test/backend/test_ops.py TestOps.test_big_gemm | |
| - name: Test Beam Search | |
| run: METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
| - name: Test Device Specific | |
| run: METAL=1 python3 -m pytest test/device/test_metal.py | |
| #- name: Fuzz Test linearizer | |
| # run: METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py | |
| - name: Run TRANSCENDENTAL math | |
| run: METAL=1 TRANSCENDENTAL=2 python -m pytest -n=auto test/backend/test_ops.py::TestOps::test_sin test/backend/test_ops.py::TestOps::test_cos test/backend/test_ops.py::TestOps::test_tan test/backend/test_ops.py::TestOps::test_exp test/backend/test_ops.py::TestOps::test_log --durations=20 | |
| - name: Run pytest (amd) | |
| env: | |
| MOCKGPU: 1 | |
| AMD: 1 | |
| AMD_LLVM: 0 | |
| FORWARD_ONLY: 1 | |
| run: | | |
| python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20 | |
| - name: Run pytest (amd with llvm backend) | |
| env: | |
| MOCKGPU: 1 | |
| AMD: 1 | |
| AMD_LLVM: 1 | |
| FORWARD_ONLY: 1 | |
| run: | | |
| python -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py test/device/test_amd_llvm.py --durations=20 | |
| - name: Run pytest (ptx) | |
| env: | |
| MOCKGPU: 1 | |
| NV_PTX: 1 | |
| NV: 1 | |
| FORWARD_ONLY: 1 | |
| # TODO: failing due to library loading error | |
| CAPTURE_PROCESS_REPLAY: 0 | |
| run: | | |
| python3 -m pytest -n=auto test/device/test_hcq.py test/test_tiny.py --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| osxwebgpu: | |
| name: MacOS (WebGPU) | |
| runs-on: macos-14 | |
| timeout-minutes: 10 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: osx-webgpu | |
| deps: testing | |
| webgpu: 'true' | |
| - name: Build WEBGPU Efficientnet | |
| run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m examples.compile_efficientnet | |
| - name: Run selected webgpu tests | |
| run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m pytest -n=auto test/backend --durations=20 | |
| #- name: Clean npm cache | |
| # run: npm cache clean --force | |
| #- name: Install Puppeteer | |
| # run: npm install puppeteer | |
| # this is also flaky | |
| #- name: Run WEBGPU Efficientnet | |
| # run: node test/web/test_webgpu.js | |
| # this is flaky | |
| #- name: Run VIZ tests as external package | |
| # run: | | |
| # mkdir $GITHUB_WORKSPACE/test_dir | |
| # cd $GITHUB_WORKSPACE/test_dir | |
| # python -m venv venv | |
| # source venv/bin/activate | |
| # pip install $GITHUB_WORKSPACE | |
| # cp $GITHUB_WORKSPACE/test/web/test_viz.js . | |
| # node test_viz.js | |
| - name: Test ONNX Runner (WEBGPU) | |
| run: WEBGPU=1 python3 test/external/external_test_onnx_runner.py | |
| osxtests: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [metal, llvm, cpu, lvp] | |
| name: MacOS (${{ matrix.backend }}) | |
| runs-on: macos-15 | |
| timeout-minutes: 20 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: macos-${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| llvm: ${{ matrix.backend == 'llvm' || matrix.backend == 'lvp' }} | |
| mesa: ${{ matrix.backend == 'lvp' && 'true' }} | |
| - name: Set env | |
| run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'metal' && 'METAL=1' || matrix.backend == 'lvp' && 'CPU=1\nCPU_LVP=1' }}" >> $GITHUB_ENV | |
| - name: Check Device.DEFAULT and print some source | |
| run: | | |
| python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU','LVP':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT" | |
| DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus | |
| - name: Run pytest (${{ matrix.backend }}) | |
| run: python3 -m pytest -n=auto test/backend --durations=20 | |
| - name: Run process replay tests | |
| uses: ./.github/actions/process-replay | |
| - name: Run macOS-specific unit test | |
| if: matrix.backend == 'cpu' | |
| run: python3 -m pytest test/unit/test_disk_tensor.py::TestDiskTensor::test_copy_to_cpu_not_truncated | |
| # ****** Windows Tests ****** | |
| wintests: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [llvm, cpu, webgpu] | |
| name: Windows (${{ matrix.backend }}) | |
| runs-on: windows-latest | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: windows-${{ matrix.backend }}-minimal | |
| deps: testing_unit | |
| pydeps: ${{ matrix.backend == 'webgpu' && 'dawn-python' || '' }} | |
| - name: Set env | |
| shell: bash | |
| run: printf "${{ matrix.backend == 'llvm' && 'CPU=1\nCPU_LLVM=1' || matrix.backend == 'cpu' && 'CPU=1\nCPU_LLVM=0\nCPU_COUNT=2' || matrix.backend == 'webgpu' && 'WEBGPU=1'}}" >> $GITHUB_ENV | |
| - name: Run unit tests | |
| if: matrix.backend=='llvm' | |
| # test_newton_schulz hits RecursionError | |
| run: python -m pytest -n=auto test/unit/ --ignore=test/unit/test_disk_tensor.py --ignore=test/unit/test_tar.py --ignore=test/unit/test_linalg.py --durations=20 | |
| - name: Run NULL backend tests | |
| if: matrix.backend=='llvm' | |
| shell: bash | |
| run: CPU=0 CPU_LLVM=0 NULL=1 python -m pytest -n=auto test/null/ --ignore=test/null/test_elf.py --durations=20 | |
| - name: Run pytest (${{ matrix.backend }}) | |
| shell: bash | |
| run: | | |
| python -c "from tinygrad import Device; assert Device.DEFAULT == {'LLVM':'CPU'}.get(x:='${{ matrix.backend }}'.upper(), x), Device.DEFAULT" | |
| python -m pytest -n=auto test/test_tiny.py test/backend/test_ops.py --durations=20 | |
| # ****** Compile-only Tests ****** | |
| compiletests: | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| backend: [ir3, nak] | |
| name: Compile-only (${{ matrix.backend }}) | |
| runs-on: ubuntu-24.04 | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: compile-${{ matrix.backend }} | |
| deps: testing_unit | |
| mesa: ${{ (matrix.backend == 'ir3' || matrix.backend == 'nak') && 'true' }} | |
| python-version: '3.12' | |
| - name: Set env | |
| shell: bash | |
| run: printf "NULL=1\nNULL_ALLOW_COPYOUT=1\n${{ matrix.backend == 'ir3' && 'NULL_IR3=1' || matrix.backend == 'nak' && 'NULL_NAK=1' }}" >> $GITHUB_ENV | |
| - name: Run test_ops | |
| shell: bash | |
| run: | | |
| python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" | |
| DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add | |
| python -m pytest -n=auto test/backend/test_ops.py --durations=20 | |
| qcomclcompiletests: | |
| name: Compile-only (QCOM CL) | |
| runs-on: ubuntu-24.04-arm | |
| timeout-minutes: 15 | |
| steps: | |
| - name: Checkout Code | |
| uses: actions/checkout@v4 | |
| - name: Setup Environment | |
| uses: ./.github/actions/setup-tinygrad | |
| with: | |
| key: compile-qcomcl | |
| deps: testing_unit | |
| tinydreno: 'true' | |
| python-version: '3.12' | |
| - name: Set env | |
| shell: bash | |
| run: printf "NULL=1\nNULL_ALLOW_COPYOUT=1\nNULL_QCOMCL=1" >> $GITHUB_ENV | |
| - name: Run test_ops | |
| shell: bash | |
| run: | | |
| python -c "from tinygrad import Device; assert Device.DEFAULT == 'NULL'" | |
| DEBUG=4 python3 test/backend/test_ops.py TestOps.test_add | |
| python -m pytest -n=auto test/backend/test_ops.py --durations=20 |