From b31f01c6009b149bd2ddbf02a6c2670355657152 Mon Sep 17 00:00:00 2001 From: TR-3B <144127816+MagellaX@users.noreply.github.com> Date: Mon, 25 Aug 2025 01:23:19 +0530 Subject: [PATCH 1/4] Add build configuration and CI wheel support --- .env.example | 8 +++++++- .github/workflows/build.yml | 28 ++++++++++++++++++++++++++++ pyproject.toml | 25 +++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/build.yml create mode 100644 pyproject.toml diff --git a/.env.example b/.env.example index ae80d08..18c10f8 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,9 @@ +# Build configuration +TORCH_CUDA_ARCH_LIST=80;90 +CUDA_HOME=/usr/local/cuda +CUDA_CACHE_PATH={HOME}/.cache/stream_attn/cuda +TRITON_CACHE_DIR={HOME}/.cache/stream_attn/triton + # StreamAttention environment configuration example # Copy to .env and edit as needed @@ -18,4 +24,4 @@ STREAM_ATTENTION_RING_OVERLAP_SIZE=256 # Star Attention STREAM_ATTENTION_STAR_BLOCK_SIZE=2048 STREAM_ATTENTION_STAR_ANCHOR_SIZE=256 -STREAM_ATTENTION_STAR_NUM_HOSTS=1 \ No newline at end of file +STREAM_ATTENTION_STAR_NUM_HOSTS=1 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..99d7073 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,28 @@ +name: Build Wheels + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + sm: [80, 90] + env: + TORCH_CUDA_ARCH_LIST: ${{ matrix.sm }} + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install build tooling + run: | + python -m pip install --upgrade pip + pip install build + - name: Build wheel + run: | + python -m build --wheel diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..584b569 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,25 @@ +[build-system] +requires = [ + "setuptools>=61", + "wheel", + "torch==2.1.0", + "triton==2.1.0", +] +build-backend = "setuptools.build_meta" + +[project] +name = "stream-attention" +version = "1.0.0" +description = "Production-ready multi-GPU FlashAttention implementation with support for extremely long contexts" +authors = [{name = "StreamAttention Team", email = "streamattention@example.com"}] +readme = "README.md" +license = {file = "LICENSE"} +requires-python = ">=3.8" +dependencies = [ + "torch==2.1.0", + "triton==2.1.0", +] + +[tool.stream_attention] +cuda_version = "12.1" +triton_version = "2.1.0" From 05270d9e97d0a2b227e3411ce2cb63b9ab973e86 Mon Sep 17 00:00:00 2001 From: TR-3B <144127816+MagellaX@users.noreply.github.com> Date: Mon, 25 Aug 2025 01:27:49 +0530 Subject: [PATCH 2/4] Update .env.example Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- .env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 18c10f8..e897909 100644 --- a/.env.example +++ b/.env.example @@ -2,7 +2,7 @@ TORCH_CUDA_ARCH_LIST=80;90 CUDA_HOME=/usr/local/cuda CUDA_CACHE_PATH={HOME}/.cache/stream_attn/cuda -TRITON_CACHE_DIR={HOME}/.cache/stream_attn/triton +TRITON_CACHE_DIR=${HOME}/.cache/stream_attn/triton # StreamAttention environment configuration example # Copy to .env and edit as needed From 68cfaa07a0ded276b87d16b79158a106df33c67c Mon Sep 17 00:00:00 2001 From: TR-3B <144127816+MagellaX@users.noreply.github.com> Date: Mon, 25 Aug 2025 01:27:58 +0530 Subject: [PATCH 3/4] Update .env.example Co-authored-by: cubic-dev-ai[bot] <191113872+cubic-dev-ai[bot]@users.noreply.github.com> --- .env.example | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.env.example b/.env.example index e897909..13ea402 100644 --- a/.env.example +++ b/.env.example @@ -1,7 +1,7 @@ # Build configuration TORCH_CUDA_ARCH_LIST=80;90 CUDA_HOME=/usr/local/cuda -CUDA_CACHE_PATH={HOME}/.cache/stream_attn/cuda +CUDA_CACHE_PATH=${HOME}/.cache/stream_attn/cuda TRITON_CACHE_DIR=${HOME}/.cache/stream_attn/triton # StreamAttention environment configuration example From 7e38eedd428180077bbc2b41d4f101d752a8742a Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Sun, 24 Aug 2025 20:03:55 +0000 Subject: [PATCH 4/4] Update environment config and dependencies for StreamAttention Co-authored-by: alphacr792 --- .env.example | 7 +++++++ pyproject.toml | 3 +-- stream_attention/benchmarks/accuracy_test.py | 1 - 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.env.example b/.env.example index 13ea402..7197a89 100644 --- a/.env.example +++ b/.env.example @@ -4,6 +4,13 @@ CUDA_HOME=/usr/local/cuda CUDA_CACHE_PATH=${HOME}/.cache/stream_attn/cuda TRITON_CACHE_DIR=${HOME}/.cache/stream_attn/triton +# StreamAttention environment configuration example +# Build configuration +TORCH_CUDA_ARCH_LIST=80;90 +CUDA_HOME=/usr/local/cuda +CUDA_CACHE_PATH=${HOME}/.cache/stream_attn/cuda +TRITON_CACHE_DIR=${HOME}/.cache/stream_attn/triton + # StreamAttention environment configuration example # Copy to .env and edit as needed diff --git a/pyproject.toml b/pyproject.toml index 584b569..afee971 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,8 +2,6 @@ requires = [ "setuptools>=61", "wheel", - "torch==2.1.0", - "triton==2.1.0", ] build-backend = "setuptools.build_meta" @@ -18,6 +16,7 @@ requires-python = ">=3.8" dependencies = [ "torch==2.1.0", "triton==2.1.0", + "pyyaml>=6.0", ] [tool.stream_attention] diff --git a/stream_attention/benchmarks/accuracy_test.py b/stream_attention/benchmarks/accuracy_test.py index c83047d..70808e9 100644 --- a/stream_attention/benchmarks/accuracy_test.py +++ b/stream_attention/benchmarks/accuracy_test.py @@ -60,5 +60,4 @@ def main(): main() - main()