Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
ebb7def
Optional TLS for sglang/vllm if supported (or any passthrough endpoint).
jondurbin Feb 9, 2026
365e6d7
Fix pem env.
jondurbin Feb 9, 2026
b597bd2
Aegis.
jondurbin Feb 11, 2026
9943896
Manifest script.
jondurbin Feb 12, 2026
a0a12b0
cllmv session init
jondurbin Feb 12, 2026
78a767f
Fix verification method for NN.
jondurbin Feb 13, 2026
f358cdf
Aegis updates/secure env.
jondurbin Feb 13, 2026
09ce74b
Aegis updates, certs/mtls, etc.
jondurbin Feb 13, 2026
4dbedb5
Fixes.
jondurbin Feb 13, 2026
52f9d41
More aegis updates.
jondurbin Feb 13, 2026
80cfa82
Fixes.
jondurbin Feb 13, 2026
a7cafc9
Better handling of dead engines.
jondurbin Feb 13, 2026
43d19bb
Lib updates.
jondurbin Feb 13, 2026
4a5d150
Better subprocess failure exiting.
jondurbin Feb 14, 2026
ba4f6f3
mTLS updates, pass protected cert, etc.
jondurbin Feb 14, 2026
864a894
gzip
jondurbin Feb 14, 2026
342cab7
aegis lib updates
jondurbin Feb 14, 2026
7ec9b7c
tls before logging server
jondurbin Feb 14, 2026
e6c4b84
Better disconnect handling.
jondurbin Feb 14, 2026
4f5179e
aegis updates
jondurbin Feb 15, 2026
77ddd30
aegis updates, runtime cache invalidation for DG/TI/etc.
jondurbin Feb 15, 2026
f3f9b1e
Aegis updates
jondurbin Feb 16, 2026
7136289
Updates/fixes.
jondurbin Feb 18, 2026
017b23d
cllmv version
jondurbin Feb 19, 2026
615ab9b
foo.
jondurbin Feb 19, 2026
cf44390
Fixes.
jondurbin Feb 20, 2026
cb029d1
Fix ref to old chutes-cfsv.so.
jondurbin Feb 20, 2026
612f0fe
foo
jondurbin Feb 20, 2026
1d36709
foo
jondurbin Feb 20, 2026
df06152
Fix aegis nonce re-init.
jondurbin Feb 20, 2026
8d77630
mTLS no fallback
jondurbin Feb 20, 2026
ea7a2fa
Keep logging server as http for now.
jondurbin Feb 20, 2026
41b920c
foo
jondurbin Feb 20, 2026
d2d425a
Foo.
jondurbin Feb 20, 2026
ff95f25
Separate threadpool for actual chute work.
jondurbin Feb 20, 2026
1352b2d
foo
jondurbin Feb 20, 2026
4461f81
Fix cacert
jondurbin Feb 20, 2026
fe1def5
fix cacert
jondurbin Feb 20, 2026
c175950
aegis update.
jondurbin Feb 20, 2026
573e1b7
foo
jondurbin Feb 21, 2026
aa48b09
f
jondurbin Feb 21, 2026
7e45d49
foo
jondurbin Feb 21, 2026
c6df080
f
jondurbin Feb 21, 2026
d3a3312
f
jondurbin Feb 21, 2026
924ccf0
f
jondurbin Feb 21, 2026
2760a4d
f
jondurbin Feb 21, 2026
1eb482d
f
jondurbin Feb 21, 2026
024cb83
f
jondurbin Feb 21, 2026
c1cbaa3
f
jondurbin Feb 21, 2026
fdf8deb
Merge branch 'aegis' of github.com:chutesai/chutes into aegis
jondurbin Feb 21, 2026
1563641
aegis updates
jondurbin Feb 21, 2026
fcdccf8
aegis
jondurbin Feb 21, 2026
d573846
f
jondurbin Feb 21, 2026
0e1d381
f
jondurbin Feb 21, 2026
8a4ddd7
aegis
jondurbin Feb 21, 2026
0df1a83
Version
jondurbin Feb 21, 2026
30bd750
Merge branch 'main' into aegis
jondurbin Feb 21, 2026
8311958
e2e fixes
jondurbin Feb 21, 2026
34f550a
E2E public path routing fixes.
jondurbin Feb 21, 2026
4acb0b4
e2e fixes, cert timing fixes
jondurbin Feb 21, 2026
5be6086
e2e stream fixes
jondurbin Feb 22, 2026
23b9236
aegis updates
jondurbin Feb 22, 2026
4e98ead
dev mode no mtls for sub processes
jondurbin Feb 22, 2026
3e687a5
fix e2e sse
jondurbin Feb 22, 2026
c2ea8cf
aegis lib update
jondurbin Feb 22, 2026
98bb6b0
aegis lib updates
jondurbin Feb 22, 2026
5440c3f
Fixes.
jondurbin Feb 22, 2026
275351a
fixes
jondurbin Feb 23, 2026
e0940f3
more aegis edge cases
jondurbin Feb 23, 2026
e5bf108
aegis fixes
jondurbin Feb 23, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion chutes/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version = "0.5.4.rc15"
version = "0.5.5.rc42"
Binary file added chutes/cfsv_v4
Binary file not shown.
10 changes: 7 additions & 3 deletions chutes/cfsv_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import ctypes
import asyncio
from functools import lru_cache
Expand All @@ -7,9 +6,14 @@

class CFSVWrapper:
def __init__(
self, lib_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "chutes-cfsv.so")
self,
lib_path=None,
):
self.lib = ctypes.CDLL(lib_path)
if lib_path:
self.lib = ctypes.CDLL(lib_path)
else:
# CFSV is compiled into chutes-aegis.so (loaded via LD_PRELOAD).
self.lib = ctypes.CDLL(None)

# cfsv_challenge(base_path, salt, sparse, index_file, exclude_path, result_buf, result_buf_size)
self.lib.cfsv_challenge.argtypes = [
Expand Down
2 changes: 2 additions & 0 deletions chutes/chute/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def __init__(
self.allow_external_egress = allow_external_egress
self.encrypted_fs = encrypted_fs
self.passthrough_headers = passthrough_headers
self.passthrough_ssl_context = None
self._wrong_ssl_context = None
self.docs_url = None
self.redoc_url = None
self.tee = tee
Expand Down
322 changes: 209 additions & 113 deletions chutes/chute/cord.py

Large diffs are not rendered by default.

85 changes: 78 additions & 7 deletions chutes/chute/template/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,18 @@
from chutes.image import Image
from chutes.image.standard.vllm import VLLM
from chutes.chute import Chute, ChutePack, NodeSelector
from chutes.chute.template.helpers import set_default_cache_dirs, set_nccl_flags, monitor_engine
from chutes.chute.template.helpers import (
set_default_cache_dirs,
set_nccl_flags,
monitor_engine,
generate_mtls_certs,
build_client_ssl_context,
build_wrong_client_ssl_context,
validate_mtls,
force_exit,
mtls_enabled,
set_encrypted_env_var,
)

os.environ["VLLM_WORKER_MULTIPROC_METHOD"] = "spawn"

Expand Down Expand Up @@ -172,7 +183,10 @@ async def initialize_vllm_embedding(self):
# Verify the cache contents.
await verify_cache(repo_id=model_name, revision=revision)

set_default_cache_dirs(download_path)
set_default_cache_dirs(
download_path,
cache_version=getattr(self, "_source_hash", None),
)

torch.cuda.empty_cache()
torch.cuda.init()
Expand Down Expand Up @@ -212,6 +226,31 @@ async def initialize_vllm_embedding(self):
if "--api-key" in engine_args:
raise ValueError("You may not override api key!")

use_mtls = mtls_enabled()
ssl_ctx = None
wrong_ssl_ctx = None

if use_mtls:
# Generate ephemeral mTLS certificates.
certs = generate_mtls_certs()
ssl_ctx = build_client_ssl_context(
certs["ca_cert_file"],
certs["client_cert_file"],
certs["client_key_file"],
certs["password"],
)
wrong_ssl_ctx = build_wrong_client_ssl_context(
certs["ca_cert_file"],
certs["wrong_client_cert_file"],
certs["wrong_client_key_file"],
certs["password"],
)
self.passthrough_ssl_context = ssl_ctx
self._wrong_ssl_context = wrong_ssl_ctx
logger.info("mTLS enabled for vLLM embedding engine communication")
else:
logger.warning("mTLS disabled (LLM_ENGINE_MTLS_ENABLE not set)")

env = os.environ.copy()
env["PYTHONDONTWRITEBYTECODE"] = "1"
if enable_chunked_processing:
Expand All @@ -220,13 +259,19 @@ async def initialize_vllm_embedding(self):
env["HF_HUB_OFFLINE"] = "1"
env["SGL_MODEL_NAME"] = self.name
env["SGL_REVISION"] = revision
if use_mtls:
env["VLLM_SSL_KEYFILE_PEM"] = certs["server_key_pem"].decode()
env["VLLM_SSL_CERTFILE_PEM"] = certs["server_cert_pem"].decode()
env["VLLM_SSL_CA_CERTS_PEM"] = certs["ca_cert_pem"].decode()
set_encrypted_env_var(env, "VLLM_SSL_KEYFILE_PASSWORD", certs["password"])

ssl_args = " --ssl-cert-reqs 2" if use_mtls else ""
pooler_config_arg = shlex.quote(json.dumps(pooler_config))
startup_command = (
f"{sys.executable} -m vllm.entrypoints.openai.api_server "
f"--model {model_name} --served-model-name {self.name} "
f"--revision {revision} --pooler-config {pooler_config_arg} "
f"--port 10101 --host 127.0.0.1 --api-key {api_key} {engine_args}"
f"--port 10101 --host 127.0.0.1 --api-key {api_key}{ssl_args} {engine_args}"
)
display_cmd = startup_command.replace(api_key, "*" * len(api_key))
parts = shlex.split(startup_command)
Expand All @@ -237,14 +282,38 @@ async def initialize_vllm_embedding(self):

server_ready = asyncio.Event()
self._monitor_task = asyncio.create_task(
monitor_engine(self._vllm_process, api_key, server_ready, model_name=self.name)
monitor_engine(
self._vllm_process,
api_key,
server_ready,
model_name=self.name,
ssl_context=ssl_ctx,
wrong_ssl_context=wrong_ssl_ctx,
)
)

def _on_monitor_done(t):
if t.cancelled():
return
exc = t.exception()
if exc:
logger.error("Embedding vLLM monitor task failed, terminating: {}", exc)
force_exit(1)

self._monitor_task.add_done_callback(_on_monitor_done)

base_url = "https://127.0.0.1:10101" if use_mtls else "http://127.0.0.1:10101"
while True:
if self._vllm_process.poll() is not None:
raise RuntimeError(
"Embedding vLLM subprocess exited before readiness check "
f"(exit={self._vllm_process.returncode})"
)
try:
async with aiohttp.ClientSession() as session:
connector = aiohttp.TCPConnector(ssl=ssl_ctx) if ssl_ctx else None
async with aiohttp.ClientSession(connector=connector) as session:
async with session.get(
"http://127.0.0.1:10101/v1/models",
f"{base_url}/v1/models",
headers={"Authorization": f"Bearer {api_key}"},
) as resp:
if resp.status == 200:
Expand All @@ -255,8 +324,10 @@ async def initialize_vllm_embedding(self):
await asyncio.sleep(1)

self.passthrough_headers["Authorization"] = f"Bearer {api_key}"
if use_mtls:
await validate_mtls(self.name, api_key, ssl_ctx, wrong_ssl_ctx)
server_ready.set()
logger.info("Embedding server initialized successfully!")
logger.info("Embedding server initialized successfully!")

@chute.cord(
passthrough_path="/v1/embeddings",
Expand Down
Loading