From fe766665ea2bff5948e84da6c4a15c0b0b5aee08 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Wed, 11 Feb 2026 17:24:36 -0500 Subject: [PATCH 01/58] Aegis. --- Dockerfile | 3 +- api/graval_worker.py | 78 ++++++++- api/image/forge.py | 118 +++++++++++++- api/instance/router.py | 147 ++++++++++++++--- api/instance/schemas.py | 6 + api/miner/router.py | 4 +- api/util.py | 104 +++++++++++- data/chutes-aegis-verify.so | Bin 0 -> 170512 bytes watchtower.py | 306 ++++++++++++++++++++++++++++++++---- 9 files changed, 705 insertions(+), 61 deletions(-) create mode 100755 data/chutes-aegis-verify.so diff --git a/Dockerfile b/Dockerfile index fbbb4b27..00596828 100644 --- a/Dockerfile +++ b/Dockerfile @@ -41,7 +41,8 @@ RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 # NN verification lib. ADD data/chutes-nnverify.so /usr/local/lib/chutes-nnverify.so -RUN chmod 755 /usr/local/lib/chutes-nnverify.so +ADD data/chutes-aegis-verify.so /usr/local/lib/chutes-aegis-verify.so +RUN chmod 755 /usr/local/lib/chutes-*.so ### # FORGE diff --git a/api/graval_worker.py b/api/graval_worker.py index 8d7542f6..deb1961e 100644 --- a/api/graval_worker.py +++ b/api/graval_worker.py @@ -3,6 +3,7 @@ """ import api.database.orms # noqa +import ctypes import os import subprocess import tempfile @@ -609,7 +610,9 @@ async def generate_fs_hash( ) bin_name = "cfsv" - if semcomp(chutes_version or "0.0.0", "0.5.2") >= 0: + if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: + bin_name = "cfsv_v4" + elif semcomp(chutes_version or "0.0.0", "0.5.2") >= 0: bin_name = "cfsv_v3" elif semcomp(chutes_version or "0.0.0", "0.4.7") >= 0: bin_name = "cfsv_v2" @@ -670,3 +673,76 @@ async def generate_fs_hash( ) raise Exception("No RESULT line found in cfsv output: {stdout.decode()}") return fsv_hash + + +ENVVERIFY_LIB_PATH = "/usr/local/lib/libenvverify.so" + + +@broker.task +async def verify_bytecode_integrity( + image_id: str, + patch_version: str, + challenge: str, + modules_csv: str = "", +) -> dict: + """ + Download bytecode manifest from S3, parse it via libenvverify.so, + and return expected hashes for the given modules so the caller can + compare against the miner's response. + """ + # Download manifest from S3 (cached locally like CFSV data). + cache_path = f"/tmp/{image_id}.{patch_version}.manifest" + if not os.path.exists(cache_path): + logger.info(f"Downloading bytecode manifest for {image_id=}, {patch_version=}") + s3_key = f"image_hash_blobs/{image_id}/{patch_version}.manifest" + try: + temp_fd, temp_path = tempfile.mkstemp( + dir="/tmp", prefix=f"{image_id}.{patch_version}.manifest." + ) + os.close(temp_fd) + try: + async with settings.s3_client() as s3: + await s3.download_file(settings.storage_bucket, s3_key, temp_path) + os.rename(temp_path, cache_path) + logger.info( + f"Cached bytecode manifest to {cache_path} for {image_id=} {patch_version=}" + ) + except Exception: + if os.path.exists(temp_path): + os.unlink(temp_path) + raise + except Exception as e: + logger.error(f"Failed to download bytecode manifest from S3: {e}") + raise Exception(f"Failed to download bytecode manifest from S3: {e}") + else: + logger.info(f"Using cached bytecode manifest at {cache_path}") + + # Parse manifest via libenvverify.so on the validator side. + if not os.path.exists(ENVVERIFY_LIB_PATH): + raise Exception(f"libenvverify.so not found at {ENVVERIFY_LIB_PATH}") + + verify_lib = ctypes.CDLL(ENVVERIFY_LIB_PATH) + verify_lib.integrity_query_manifest_entries.argtypes = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_size_t, + ] + verify_lib.integrity_query_manifest_entries.restype = ctypes.c_int + + result_buf = ctypes.create_string_buffer(65536) + rc = verify_lib.integrity_query_manifest_entries( + cache_path.encode(), + (modules_csv or "").encode(), + result_buf, + 65536, + ) + if rc != 0: + logger.error(f"integrity_query_manifest_entries returned {rc}") + raise Exception(f"Failed to parse bytecode manifest (rc={rc})") + + try: + return json.loads(result_buf.value) + except Exception as e: + logger.error(f"Failed to parse manifest entries JSON: {e}") + raise Exception(f"Failed to parse manifest entries JSON: {e}") diff --git a/api/image/forge.py b/api/image/forge.py index 0bf834cf..64bba52d 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -38,6 +38,9 @@ CFSV_PATH = os.path.join(os.path.dirname(chutes.__file__), "cfsv") CFSV_V2_PATH = f"{CFSV_PATH}_v2" CFSV_V3_PATH = f"{CFSV_PATH}_v3" +CFSV_V4_PATH = f"{CFSV_PATH}_v4" +BCM_SO_PATH = os.path.join(os.path.dirname(chutes.__file__), "chutes-bcm.so") +MANIFEST_DRIVER_PATH = os.path.join(os.path.dirname(chutes.__file__), "generate_manifest_driver.py") async def initialize(): @@ -97,7 +100,9 @@ async def build_and_push_image(image, build_dir): # Copy cfsv binary to build directory build_cfsv_path = os.path.join(build_dir, "cfsv") - if semcomp(image.chutes_version or "0.0.0", "0.5.2") >= 0: + if semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0: + shutil.copy2(CFSV_V4_PATH, build_cfsv_path) + elif semcomp(image.chutes_version or "0.0.0", "0.5.2") >= 0: shutil.copy2(CFSV_V3_PATH, build_cfsv_path) elif semcomp(image.chutes_version or "0.0.0", "0.4.6") >= 0: shutil.copy2(CFSV_V2_PATH, build_cfsv_path) @@ -190,11 +195,19 @@ async def _capture_logs(stream, name, capture=True): RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true USER chutes RUN pip install chutes=={image.chutes_version} -RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-netnanny.so"))') /usr/local/lib/chutes-netnanny.so +""" + # v4 (aegis) vs v3 (netnanny+logintercept) .so injection + if semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0: + chutes_dockerfile_content += """RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-aegis.so"))') /usr/local/lib/chutes-aegis.so +ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so +""" + else: + chutes_dockerfile_content += """RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-netnanny.so"))') /usr/local/lib/chutes-netnanny.so RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-logintercept.so"))') /usr/local/lib/chutes-logintercept.so RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-cfsv.so"))') /usr/local/lib/chutes-cfsv.so ENV LD_PRELOAD=/usr/local/lib/chutes-netnanny.so:/usr/local/lib/chutes-logintercept.so -WORKDIR /app +""" + chutes_dockerfile_content += """WORKDIR /app """ chutes_dockerfile_path = os.path.join(build_dir, "Dockerfile.chutes") with open(chutes_dockerfile_path, "w") as f: @@ -259,6 +272,25 @@ async def _capture_logs(stream, name, capture=True): fsv_dockerfile_content += """ RUN python -m cllmv.pkg_hash > /tmp/package_hashes.json """ + + # Generate bytecode manifest (V2) for chutes >= 0.5.5. + build_bcm_path = os.path.join(build_dir, "chutes-bcm.so") + build_driver_path = os.path.join(build_dir, "generate_manifest_driver.py") + if ( + semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0 + and os.path.exists(BCM_SO_PATH) + and os.path.exists(MANIFEST_DRIVER_PATH) + ): + shutil.copy2(BCM_SO_PATH, build_bcm_path) + shutil.copy2(MANIFEST_DRIVER_PATH, build_driver_path) + fsv_dockerfile_content += """COPY chutes-bcm.so /tmp/chutes-bcm.so +COPY generate_manifest_driver.py /tmp/generate_manifest_driver.py +RUN CFSV_OP="${CFSV_OP}" python3 /tmp/generate_manifest_driver.py \ + --output /tmp/bytecode.manifest \ + --lib /tmp/chutes-bcm.so \ + --extra-dirs /usr/local/lib/python3.12/site-packages +""" + fsv_dockerfile_path = os.path.join(build_dir, "Dockerfile.fsv") with open(fsv_dockerfile_path, "w") as f: f.write(fsv_dockerfile_content) @@ -301,11 +333,16 @@ async def _capture_logs(stream, name, capture=True): data_file_path, package_hashes, inspecto_hash, + bytecode_manifest_path, ) = await extract_cfsv_data_from_verification_image(verification_tag, build_dir) image.inspecto = inspecto_hash image.package_hashes = package_hashes await upload_filesystem_verification_data(image, data_file_path) + # Upload bytecode manifest to S3 if generated. + if bytecode_manifest_path and os.path.exists(bytecode_manifest_path): + await upload_bytecode_manifest(image, bytecode_manifest_path) + # Build final image that combines original + index file logger.info(f"Building final image as {short_tag}") @@ -315,6 +352,12 @@ async def _capture_logs(stream, name, capture=True): ENV PYTHONDONTWRITEBYTECODE=1 ENTRYPOINT [] """ + # Include bytecode manifest in final image if it was generated. + if bytecode_manifest_path and os.path.exists(bytecode_manifest_path): + final_dockerfile_content = final_dockerfile_content.rstrip() + "\n" + final_dockerfile_content += ( + "COPY --from=fsv /tmp/bytecode.manifest /etc/bytecode.manifest\n" + ) final_dockerfile_path = os.path.join(build_dir, "Dockerfile.final") with open(final_dockerfile_path, "w") as f: f.write(final_dockerfile_content) @@ -705,7 +748,15 @@ async def extract_cfsv_data_from_verification_image(verification_tag: str, build shutil.copy2(source_path, data_file_path) logger.info(f"Successfully copied data file from {source_path} to {data_file_path}") - return data_file_path, package_hashes, inspecto_hash + # Extract bytecode manifest if it exists (V2, chutes >= 0.5.5). + bytecode_manifest_path = None + manifest_src = os.path.join(mount_path, "tmp", "bytecode.manifest") + if os.path.exists(manifest_src): + bytecode_manifest_path = os.path.join(build_dir, "bytecode.manifest") + shutil.copy2(manifest_src, bytecode_manifest_path) + logger.info(f"Extracted bytecode manifest from {manifest_src}") + + return data_file_path, package_hashes, inspecto_hash, bytecode_manifest_path finally: # Unmount if we mounted if mount_path and container_id: @@ -744,6 +795,17 @@ async def upload_filesystem_verification_data(image, data_file_path: str): logger.success(f"Uploaded filesystem verification data to {s3_key}") +async def upload_bytecode_manifest(image, manifest_path: str): + """ + Upload the bytecode manifest to S3. + """ + patch_version = image.patch_version if image.patch_version is not None else "initial" + s3_key = f"image_hash_blobs/{image.image_id}/{patch_version}.manifest" + async with settings.s3_client() as s3: + await s3.upload_file(manifest_path, settings.storage_bucket, s3_key) + logger.success(f"Uploaded bytecode manifest to {s3_key}") + + async def get_target_image_id() -> str | None: """ Get the image_id to build, ensuring no other processes can lock this same image_id. @@ -918,7 +980,9 @@ async def _capture_logs(stream, name, capture=True): with tempfile.TemporaryDirectory() as build_dir: try: build_cfsv_path = os.path.join(build_dir, "cfsv") - if semcomp(chutes_version or "0.0.0", "0.5.2") >= 0: + if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: + shutil.copy2(CFSV_V4_PATH, build_cfsv_path) + elif semcomp(chutes_version or "0.0.0", "0.5.2") >= 0: shutil.copy2(CFSV_V3_PATH, build_cfsv_path) elif semcomp(chutes_version or "0.0.0", "0.4.6") >= 0: shutil.copy2(CFSV_V2_PATH, build_cfsv_path) @@ -944,7 +1008,14 @@ async def _capture_logs(stream, name, capture=True): RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true USER chutes RUN pip install chutes=={chutes_version} -RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-netnanny.so"))') /usr/local/lib/chutes-netnanny.so +""" + # v4 (aegis) vs v3 (netnanny+logintercept) .so injection + if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: + dockerfile_content += """RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-aegis.so"))') /usr/local/lib/chutes-aegis.so +ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so +""" + else: + dockerfile_content += """RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-netnanny.so"))') /usr/local/lib/chutes-netnanny.so RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-logintercept.so"))') /usr/local/lib/chutes-logintercept.so RUN cp -f $(python -c 'import chutes; import os; print(os.path.join(os.path.dirname(chutes.__file__), "chutes-cfsv.so"))') /usr/local/lib/chutes-cfsv.so ENV LD_PRELOAD=/usr/local/lib/chutes-netnanny.so:/usr/local/lib/chutes-logintercept.so @@ -1013,6 +1084,25 @@ async def _capture_logs(stream, name, capture=True): fsv_dockerfile_content += """ RUN python -m cllmv.pkg_hash > /tmp/package_hashes.json """ + + # Generate bytecode manifest (V2) for chutes >= 0.5.5. + build_bcm_path = os.path.join(build_dir, "chutes-bcm.so") + build_driver_path = os.path.join(build_dir, "generate_manifest_driver.py") + if ( + semcomp(chutes_version or "0.0.0", "0.5.5") >= 0 + and os.path.exists(BCM_SO_PATH) + and os.path.exists(MANIFEST_DRIVER_PATH) + ): + shutil.copy2(BCM_SO_PATH, build_bcm_path) + shutil.copy2(MANIFEST_DRIVER_PATH, build_driver_path) + fsv_dockerfile_content += """COPY chutes-bcm.so /tmp/chutes-bcm.so +COPY generate_manifest_driver.py /tmp/generate_manifest_driver.py +RUN CFSV_OP="${CFSV_OP}" python3 /tmp/generate_manifest_driver.py \ + --output /tmp/bytecode.manifest \ + --lib /tmp/chutes-bcm.so \ + --extra-dirs /usr/local/lib/python3.12/site-packages +""" + fsv_dockerfile_path = os.path.join(build_dir, "Dockerfile.fsv") with open(fsv_dockerfile_path, "w") as f: f.write(fsv_dockerfile_content) @@ -1062,12 +1152,22 @@ async def _capture_logs(stream, name, capture=True): data_file_path, package_hashes, inspecto_hash, + bytecode_manifest_path, ) = await extract_cfsv_data_from_verification_image(verification_tag, build_dir) s3_key = f"image_hash_blobs/{image_id}/{patch_version}.data" async with settings.s3_client() as s3: await s3.upload_file(data_file_path, settings.storage_bucket, s3_key) logger.success(f"Uploaded filesystem verification data to {s3_key}") + # Upload bytecode manifest if generated. + if bytecode_manifest_path and os.path.exists(bytecode_manifest_path): + manifest_s3_key = f"image_hash_blobs/{image_id}/{patch_version}.manifest" + async with settings.s3_client() as s3: + await s3.upload_file( + bytecode_manifest_path, settings.storage_bucket, manifest_s3_key + ) + logger.success(f"Uploaded bytecode manifest to {manifest_s3_key}") + # Stage 3: Build final image that combines updated + index file logger.info(f"Stage 3: Building final image as {target_tag}") @@ -1077,6 +1177,12 @@ async def _capture_logs(stream, name, capture=True): ENV PYTHONDONTWRITEBYTECODE=1 ENTRYPOINT [] """ + # Include bytecode manifest in final image if it was generated. + if bytecode_manifest_path and os.path.exists(bytecode_manifest_path): + final_dockerfile_content = final_dockerfile_content.rstrip() + "\n" + final_dockerfile_content += ( + "COPY --from=fsv /tmp/bytecode.manifest /etc/bytecode.manifest\n" + ) final_dockerfile_path = os.path.join(build_dir, "Dockerfile.final") with open(final_dockerfile_path, "w") as f: f.write(final_dockerfile_content) diff --git a/api/instance/router.py b/api/instance/router.py index 5a81c6fc..3862d70c 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -78,6 +78,7 @@ generate_ip_token, aes_decrypt, derive_ecdh_session_key, + derive_x25519_session_key, decrypt_instance_response, notify_created, notify_deleted, @@ -101,9 +102,74 @@ NETNANNY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] NETNANNY.verify.restype = ctypes.c_int +# Aegis v4 verification library (optional — may not be deployed yet) +AEGIS_VERIFY = None +try: + AEGIS_VERIFY = ctypes.CDLL("/usr/local/lib/chutes-aegis-verify.so") + AEGIS_VERIFY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] + AEGIS_VERIFY.verify.restype = ctypes.c_int + logger.info("Loaded chutes-aegis-verify.so") +except OSError: + logger.warning( + "chutes-aegis-verify.so not found, v4 netnanny verification will fall back to commitment-only" + ) + + +def _verify_rint_commitment_v4(commitment_hex: str) -> bool: + """Verify a v4 runtime integrity commitment (aegis/Ed25519).""" + try: + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey + + if len(commitment_hex) != 292: + logger.error(f"RUNINT v4: commitment length mismatch: {len(commitment_hex)} != 292") + return False + + commitment_bytes = bytes.fromhex(commitment_hex) + if len(commitment_bytes) != 146: + logger.error( + f"RUNINT v4: decoded commitment length mismatch: {len(commitment_bytes)} != 146" + ) + return False + + prefix = commitment_bytes[0] + if prefix != 0x04: + logger.error(f"RUNINT v4: invalid prefix: {prefix} != 0x04") + return False + + version = commitment_bytes[1] + if version != 0x04: + logger.error(f"RUNINT v4: invalid version: {version} != 0x04") + return False + + pubkey_bytes = commitment_bytes[2:34] # Ed25519 pubkey (32 bytes) + nonce_bytes = commitment_bytes[34:50] # nonce (16 bytes) + lib_proof_bytes = commitment_bytes[50:82] # lib_proof HMAC-SHA256 (32 bytes) + sig_bytes = commitment_bytes[82:146] # Ed25519 signature (64 bytes) + + # Verify: Ed25519_verify(pubkey, version||pubkey||nonce||lib_proof, signature) + msg_to_verify = bytes([version]) + pubkey_bytes + nonce_bytes + lib_proof_bytes + pk = Ed25519PublicKey.from_public_bytes(pubkey_bytes) + + try: + pk.verify(sig_bytes, msg_to_verify) + logger.info("RUNINT v4: commitment verification successful") + return True + except Exception: + logger.error("RUNINT v4: signature verification failed") + return False + + except Exception as e: + logger.error(f"RUNINT v4: commitment verification error: {e}") + return False + def _verify_rint_commitment(commitment_hex: str, expected_nonce: str) -> bool: - """Verify the runtime integrity commitment (mini-cert).""" + """Verify the runtime integrity commitment (mini-cert). Auto-detects v3/v4.""" + # v4 commitments start with "04" prefix + if commitment_hex[:2] == "04": + return _verify_rint_commitment_v4(commitment_hex) + + # v3 (SECP256k1) path try: from ecdsa import VerifyingKey, SECP256k1, BadSignatureError import hashlib @@ -288,8 +354,8 @@ async def _check_scalable_private(db, chute, miner): AND ia.activated_at <= NOW() - INTERVAL '7 days' """) public_result = ( - await db.execute(public_history_query, {"hotkey": miner.hotkey}) - ).mappings().first() + (await db.execute(public_history_query, {"hotkey": miner.hotkey})).mappings().first() + ) if not public_result or public_result["public_count"] == 0: logger.warning( f"PRIVATE_GATE: miner {miner.hotkey} denied private chute {chute_id}: " @@ -770,9 +836,32 @@ async def _validate_launch_config_instance( detail=launch_config.verification_error, ) - # NetNanny (match egress config and hash). + # NetNanny / Aegis verification (match egress config and hash). nn_valid = True - if chute.allow_external_egress != args.egress or not args.netnanny_hash: + if semcomp(chute.chutes_version or "0.0.0", "0.5.5") >= 0: + # v4 (aegis): netnanny_hash comes from aegis-verify + if not args.netnanny_hash: + nn_valid = False + elif AEGIS_VERIFY is not None: + if not AEGIS_VERIFY.verify( + launch_config.config_id.encode(), + args.netnanny_hash.encode(), + 1, + ): + logger.error( + f"{log_prefix} aegis-verify hash mismatch for {launch_config.config_id=}" + ) + nn_valid = False + else: + logger.success( + f"{log_prefix} aegis-verify hash challenge success: {launch_config.config_id=} {args.netnanny_hash=}" + ) + else: + # aegis-verify .so not available, allow through (commitment already verified) + logger.warning( + f"{log_prefix} aegis-verify not available, skipping hash verification" + ) + elif chute.allow_external_egress != args.egress or not args.netnanny_hash: nn_valid = False else: if not NETNANNY.verify( @@ -888,6 +977,16 @@ async def _validate_launch_config_instance( rint_commitment=getattr(args, "rint_commitment", None), rint_nonce=getattr(args, "rint_nonce", None), rint_pubkey=getattr(args, "rint_pubkey", None), + extra={ + k: v + for k, v in { + "tls_cert": getattr(args, "tls_cert", None), + "tls_cert_sig": getattr(args, "tls_cert_sig", None), + "e2e_pubkey": getattr(args, "e2e_pubkey", None), + }.items() + if v is not None + } + or None, ) if launch_config.job_id or ( not chute.public @@ -1278,23 +1377,28 @@ async def validate_tee_launch_config_instance( detail="rint_pubkey and rint_nonce required for chutes >= 0.5.1", ) - # Generate ECDH session key if miner provided rint_pubkey + # Generate session key if miner provided rint_pubkey validator_pubkey = None if instance.rint_pubkey and instance.rint_nonce: try: - validator_pubkey, session_key = derive_ecdh_session_key( - instance.rint_pubkey, instance.rint_nonce - ) + if semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + validator_pubkey, session_key = derive_x25519_session_key( + instance.rint_pubkey, instance.rint_nonce + ) + else: + validator_pubkey, session_key = derive_ecdh_session_key( + instance.rint_pubkey, instance.rint_nonce + ) instance.rint_session_key = session_key logger.info( - f"Derived ECDH session key for TEE instance {instance.instance_id} " + f"Derived session key for TEE instance {instance.instance_id} " f"validator_pubkey={validator_pubkey[:16]}..." ) except Exception as exc: - logger.error(f"ECDH session key derivation failed for TEE: {exc}") + logger.error(f"Session key derivation failed for TEE: {exc}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=f"ECDH session key derivation failed: {exc}", + detail=f"Session key derivation failed: {exc}", ) # Store the launch config @@ -1368,23 +1472,28 @@ async def claim_launch_config( detail="rint_pubkey and rint_nonce required for chutes >= 0.5.1", ) - # Generate ECDH session key if miner provided rint_pubkey + # Generate session key if miner provided rint_pubkey validator_pubkey = None if instance.rint_pubkey and instance.rint_nonce: try: - validator_pubkey, session_key = derive_ecdh_session_key( - instance.rint_pubkey, instance.rint_nonce - ) + if semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + validator_pubkey, session_key = derive_x25519_session_key( + instance.rint_pubkey, instance.rint_nonce + ) + else: + validator_pubkey, session_key = derive_ecdh_session_key( + instance.rint_pubkey, instance.rint_nonce + ) instance.rint_session_key = session_key logger.info( - f"Derived ECDH session key for {instance.instance_id} " + f"Derived session key for {instance.instance_id} " f"validator_pubkey={validator_pubkey[:16]}..." ) except Exception as exc: - logger.error(f"ECDH session key derivation failed: {exc}") + logger.error(f"Session key derivation failed: {exc}") raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, - detail=f"ECDH session key derivation failed: {exc}", + detail=f"Session key derivation failed: {exc}", ) # Generate a ciphertext for this instance to decrypt. diff --git a/api/instance/schemas.py b/api/instance/schemas.py index 191dee18..718ee639 100644 --- a/api/instance/schemas.py +++ b/api/instance/schemas.py @@ -63,6 +63,9 @@ class LaunchConfigArgs(BaseModel): rint_commitment: Optional[str] = None rint_nonce: Optional[str] = None rint_pubkey: Optional[str] = None + tls_cert: Optional[str] = None + tls_cert_sig: Optional[str] = None + e2e_pubkey: Optional[str] = None class GravalLaunchConfigArgs(LaunchConfigArgs): @@ -120,6 +123,9 @@ class Instance(Base): rint_pubkey = Column(String, nullable=True) rint_session_key = Column(String, nullable=True) + # Flexible extra data (aegis v4 fields, future extensions) + extra = Column(JSONB, nullable=True) + # Hourly rate charged to customer, which may differ from the hourly rate of the actual # GPUs used for this instance due to node selector. For example, if a chute supports # both H100 and A100, the user is only charged the A100 rate since the miners *could* diff --git a/api/miner/router.py b/api/miner/router.py index 59cf6de4..4f97ffb5 100644 --- a/api/miner/router.py +++ b/api/miner/router.py @@ -617,7 +617,9 @@ async def get_thrash_cooldowns( "chute_id": row.chute_id, "chute_name": row.chute_name, "deleted_at": row.deleted_at.isoformat() if row.deleted_at else None, - "cooldown_expires_at": row.cooldown_expires_at.isoformat() if row.cooldown_expires_at else None, + "cooldown_expires_at": row.cooldown_expires_at.isoformat() + if row.cooldown_expires_at + else None, } for row in result.fetchall() ] diff --git a/api/util.py b/api/util.py index f3fc2dcd..21c3988a 100644 --- a/api/util.py +++ b/api/util.py @@ -43,8 +43,10 @@ from scalecodec.utils.ss58 import is_valid_ss58_address, ss58_decode from async_substrate_interface.async_substrate import AsyncSubstrateInterface from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes -from cryptography.hazmat.primitives.asymmetric import ec +from cryptography.hazmat.primitives.asymmetric import ec, x25519 from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat +from cryptography.hazmat.primitives.kdf.hkdf import HKDF +from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305 ALLOWED_HOST_RE = re.compile(r"(?!-)[a-z\d-]{1,63}(? tuple return validator_pubkey_hex, session_key.hex() +def derive_x25519_session_key(miner_x25519_pubkey_hex: str, rint_nonce_hex: str) -> tuple[str, str]: + """ + Generate an ephemeral X25519 keypair and derive session key with miner's X25519 pubkey. + + Args: + miner_x25519_pubkey_hex: Miner's X25519 public key as 64 hex chars (32 bytes) + rint_nonce_hex: The rint_nonce from instance (32 hex chars = 16 bytes) + + Returns: + (validator_pubkey_hex, session_key_hex): Validator's X25519 pubkey to send back, + and the derived 32-byte session key for ChaCha20-Poly1305. + + Session key derivation (v4): + HKDF-SHA256( + ikm=X25519_DH(validator_priv, miner_pub), + salt=miner_pub || validator_pub || nonce, + info=b"runint-session-v4", + length=32 + ) + """ + miner_pubkey_bytes = bytes.fromhex(miner_x25519_pubkey_hex) + if len(miner_pubkey_bytes) != 32: + raise ValueError(f"Invalid miner X25519 pubkey length: {len(miner_pubkey_bytes)}") + + rint_nonce_bytes = bytes.fromhex(rint_nonce_hex) + if len(rint_nonce_bytes) != 16: + raise ValueError(f"Invalid rint_nonce length: {len(rint_nonce_bytes)}") + + miner_public_key = x25519.X25519PublicKey.from_public_bytes(miner_pubkey_bytes) + + validator_private_key = x25519.X25519PrivateKey.generate() + validator_public_key = validator_private_key.public_key() + validator_pubkey_bytes = validator_public_key.public_bytes_raw() + validator_pubkey_hex = validator_pubkey_bytes.hex() + + shared_secret = validator_private_key.exchange(miner_public_key) + + salt = miner_pubkey_bytes + validator_pubkey_bytes + rint_nonce_bytes + session_key = HKDF( + algorithm=hashes.SHA256(), + length=32, + salt=salt, + info=b"runint-session-v4", + backend=default_backend(), + ).derive(shared_secret) + + return validator_pubkey_hex, session_key.hex() + + +def chacha20_poly1305_encrypt(plaintext: bytes, key_hex: str) -> bytes: + """ + Encrypt with ChaCha20-Poly1305. + Format: nonce (12 bytes) || ciphertext || tag (16 bytes) + """ + if isinstance(plaintext, str): + plaintext = plaintext.encode() + key = bytes.fromhex(key_hex) + nonce = secrets.token_bytes(12) + aead = ChaCha20Poly1305(key) + ct = aead.encrypt(nonce, plaintext, None) + return nonce + ct + + +def chacha20_poly1305_decrypt(ciphertext: bytes, key_hex: str) -> bytes: + """ + Decrypt ChaCha20-Poly1305 ciphertext. + Format: nonce (12 bytes) || ciphertext || tag (16 bytes) + Ciphertext may be base64-encoded. + """ + key = bytes.fromhex(key_hex) + if isinstance(ciphertext, str): + ciphertext = ciphertext.encode() + # Try base64 decode if it looks like base64 + try: + raw = base64.b64decode(ciphertext) + except Exception: + raw = ciphertext + if len(raw) < 28: # 12 nonce + 16 tag minimum + raise ValueError("Ciphertext too short for ChaCha20-Poly1305") + nonce = raw[:12] + ct_and_tag = raw[12:] + aead = ChaCha20Poly1305(key) + return aead.decrypt(nonce, ct_and_tag, None) + + def aes_gcm_encrypt(plaintext: bytes, key: bytes) -> bytes: """ Encrypt with AES-256-GCM. @@ -503,6 +590,12 @@ def decrypt_instance_response( raise ValueError("iv required for legacy AES-CBC decryption") return aes_decrypt(ciphertext, instance.symmetric_key, iv) + # chutes >= 0.5.5 uses ChaCha20-Poly1305 with X25519-derived session key + if semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + if not instance.rint_session_key: + raise ValueError("chutes >= 0.5.5 requires rint_session_key") + return chacha20_poly1305_decrypt(ciphertext, instance.rint_session_key) + # chutes >= 0.5.1 uses AES-256-GCM with ECDH-derived session key if semcomp(instance.chutes_version or "0.0.0", "0.5.1") >= 0: if not instance.rint_session_key: @@ -537,6 +630,15 @@ def encrypt_instance_request( if isinstance(plaintext, str): plaintext = plaintext.encode() + # chutes >= 0.5.5 uses ChaCha20-Poly1305 with X25519-derived session key + if semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + if not instance.rint_session_key: + raise ValueError("chutes >= 0.5.5 requires rint_session_key") + encrypted = chacha20_poly1305_encrypt(plaintext, instance.rint_session_key) + if hex_encode: + return encrypted.hex(), None + return base64.b64encode(encrypted).decode(), None + # chutes >= 0.5.1 uses AES-256-GCM with ECDH-derived session key if semcomp(instance.chutes_version or "0.0.0", "0.5.1") >= 0: if not instance.rint_session_key: diff --git a/data/chutes-aegis-verify.so b/data/chutes-aegis-verify.so new file mode 100755 index 0000000000000000000000000000000000000000..6ab5db4299b3f579dd08fb40ae919c37864cba70 GIT binary patch literal 170512 zcmeFadw3K@_CGwGncN`I6ATw2LZZz|6m=$o5(SwiJxPyFAW8&R(GVaA8c0kgD4NBD z1nu5&lvVb-th&3ni!Qn|^i#F;D`QiCkeGX2~MSjp}lo#^D^F<-4-e=^i z|4Vm&DsMzgUrrCFldvdXBhS~!^M(9Y&a99h!?ROy^o-(UUhGR8FXR(0{jv{wW>9$a zL?h$H{yHKCzmOlwcPH}kBm?39#pTU2-NozsHNIR-cSts<-UK{mJelL~HmzK9+wey7 zq_pdA`D}cb~5z5-RZw!bA1-z>^SlWzhg(iTaOnQ(CswsMC6y{QA^8hn)>*Zg|Cl(J|@X><6R(u z(Q^g-AruMwCG+r=@Q32Ln#YI1Pr*Z?d>x+a@!WvNj^{=^^c#U^B%Yh`jKUNCOACwK zg76qTx8WI!$AM=Y9{MSGTzGEBa|a$bo-91$@l3#TCmw1*4xYR5+=J)0c&6Z?-~D(V zz>|k(I-YzyGx0o#XEvVD?_mVy;F*i32u}$f51xyE^Elu!_>bdx0?+R_Yytdo9$pBa z`k8)<4A1lw{HO6~JpOz5UOYZL&*J$5o)_@^84vwl!t)mvGMWC$ee#y^@ISau-f}#z z^7w1qC-0wlR^WL9&%f}{?@c^!u~3(@62Vn?796hLv8M2&n;q{AJm*|I>)mNBe=6&^ zC-7oj(xhA7eCp4Qdp91L^})onyBsUMDMIGYLszc(=&kz~|M|)fwH~oQ-twn0kEOr% z&#}|)?78O$=e1Ydd@v_!+-7NH?MuL&&a3#xlDfD-s}C@ zl>tF6Sp3PNbE{u_`()W4)(`3T%nffRKG^c(h6kc={9EG_Q%9UWW4f(q(bzM?GnP)@ z^OD~_FZ+Y%i(mZh-+yY|8L@fS4JYpX+ovb%ZuQ)Feb4%_?ftiC#ra>hz3};{NfF}w zMGvid{Ok)?4c%O}@|L`W+xite^v6GMo%>w!+{H^?`eVvXA8uLlieub;n+pHBcgHK= z{(aP`^=WnaoZ z+IsWB)<6B`>r-iWtr}E(|K4>czWLp(aesbM+g6|QySPC;7m26)CI0x^fsbT=)MsQ$ z)}q!m&-VN2f^*fNf2G{@(ZDbIA9-n8Nzt7X?yh)tcD-M>T#>k;&Zh5w=z*cGo0>DS z$0dxcocwfhUH+O^5|-Wl$_wkFU(34i{QU*jzPxVmvfeAcK9!KTBN{wwg`A;YE0 z&xj#-oGJVA2v)^nUQs2(k25vVVuN42;h6>jjqsCbg2(S0!!tE;cs5P4_Jq0jKqm zejk{5yf!2ieqn?6RA;H`WS8_)SLo5^RKY1It|KAOXRZAd5lWkp8(W$=Z5ZJeIg zi0<_CD%}6#i@%AQ7Hvn$bN59wbP&xj^MuposA8NRq zG@LTvq4u8Ua&YaLIsYaQJB6k5h*j@KE{Jb3T|p zF$#VF{H5P=P0}#>&zu^72ipv$MW51&uzx<;9$_tgBYv4I~ z`ET&{IuZ>4>3j6s!|@xY8S#+(f5Yit_N@V5VR)u`4$tP}WfkYcQeM85mmiXoF(LU8 zje1Y;a^6Oi_|QZO8-5le*;LBgRW#H95Wna*f%9Q8A9ptxU{eJz$MGiv05tZ?;dCzE zV#L>QxX9(9C};ps9s3pYa;B0(iQh2fr(Z>=T{NNL7oxv5RPWyy*whnv^c&5~&*t+| zNQeK7x66NxQO?u6T?0A)SKVslw{kgoob#vyzwJG|ng>BoBY*_}0t1<1glP zW^?}V?lev2dF@N}CV-xcHhjbN-a`aeAeZY(qJ9&&3z<=Opqme)0)mi$SjGO5TqRc}Br!dHLHp zA546JzRcmZyj|t@8i02Zr{8>z->}Vye{OiDL%hA&ynjP-Zs&4k8ek9-lB@f8|C+8h z;BWKtD?;+j^L5|O;V?2$l6D?ibUhiVwUp8L8iH{>c@0S%E{&!w}(hQ^gfAI-!a%g;U zedcwJ@8#uK`xq6T;PB5le$tf&oRe=#MX^MuKf^%0oF|;+d|q|Xfd7-zpTg-`e8hlH z;}hHfE}yF|8ehwK|27OW3fOHl!*okX9yp%|b2?uS@t^CfWIv+cVBW5x-bOigUQRQo z$8^zpHApMs2g_&i0sQddi}F%O-b6G^pHtL{XPT2gXL=Dx zhw~oH&z+T5GRtISHQL3)rA1UrD24IHG^2o#Fr%Pk{v1T67ZjG{ zo3iJ-z>eI4+0&*M6wH~2cvj(z(t`ZlbnwlS4=Rn8fj_*0u=t#!!s5=h6UOv{{JdgQ zUVe#zOM(2k2BW7>n@)`>0ka4k;y-~YZscT~VEw~FycH~Lw1EW;TuvBq1U=E&)VvZ< z#CVxIv$UX~t0P#ko!!ES>Pj?I*h5^i}E zA_!1c4dxe6#ikidD@Kl)#rdJk*?Gk!`IBdRW=$yaK;l^II$`eoxexLb6f>9RDfphk z8R3ZGVJgVT6oS%(7aqQQ?##mCIe8vWerF+sGOl#yOe_Q4VA;IQn3Y0s@Leu0&YM4x z=6!?M5Xj}ppVI}JFzuoI=^h5l&7JO_G`~2PWT`W27hZZ^N&dwAxh1nb z5F(DC=9P3IaF{$}1{IrA$OyR@XrNyjnm(~R{H}az^4#h9mn;^R^ zZ%XK@Bd53&MPCAq61vbb@(M~WjmJ{D5XTi3Uby<=g1qUB@L#kx zEbUh_x#o5wr(2d^r^b+6qq{q0o_DE{X5_te8l(TxNv^rZT+ywpjDo_vOMsQaQmCBa z%m^nd9IljN8JJG0mNAEP#&e7FL-R@&vHtn}~ z8_lkga2i~ER_ZLVYY?P!<;bXejOXsSFS8);g}(>wl@7)D94fwl(PrK=Gcw-oT$OcW>tYP@!@TM^QzG3Ajh2e+8gJJmn!|+?f;CyS( zQrpAeA=@-13|MB+kL|W8jbR#2QW*TUF!-=A_>eHTEey_06D)O97~H7I$m9rv|23=} zH4MHW44xeZ=O#LqdQTW!3&Wop2InRPmO3j8{)aI9qA)m2RGq)a!r&wmpsgHH^@|8MZW9QZE>{>y>?zd7)uIQWcOc_c>lM(!_+FsY0Do`{YHwQ^(3 z21Z)P$nOB|71tlZ;^QbzOIGtT7Qcn!q{=ogX7N;t(^Ax2 z&f?cloK)K8A{M`r;-tzpPi66e6ekt7Ih)1%P@I;aW(SM+p!gt)+gLn;;)xVbV(~L` z5x;`sRu(@=aZ(|gO)P$x;-uO(x1Oi=e@F2oiZ`+NUWzAEyn)4cQ2a`Y`&oPo#jm3H zDi*JyxJ2<~EdC+Ihf;hoi@!_p-%z}q#owU#)f6vc@xN1i7{#Zu_=^-zp?EfnKTq*% zDDGhK-&6crirZLxA;pJNJc-30r}%Xgx3YLK#jmHhiN*h35Mi3RxMOH{AL;C;`o`=> z2Gtiyd{EQcJ$*5ho<$xSO0gYH;*fGe+`xU{zr*lHQoGcY3##6#29AwY1MOy2s8zRL z@Fbvs`*{H|9ZfUEAxc+%8n5MJN|D!8dhOk6<(LRynpFLuCrjbOvdpsCwgl8;pAImM6=T z7+ROhmuYcz)Vb1*yY#w}HDJf5IN7zIvipNF#BGuPu`#l~@k5#)^g7k&ku1U2(G4zr zlcN6+91oPW$Jf90jjQs!rF4K-kt{8Jx|MIF`X-!L)0#`3*8_oOOXYrXoLC)aAE&vW zIHL!`qi0>d2^WAVRuaDmJ4UYw9FD8}jEw0|LQB6l}4L(=1OIXco|=JRvlHkEWrYAV^!#qv$e&UtRoHD~`}zgO5>E zhgx}B78hor=UsY-x~M~}l3Dnay6BWxHIhlhPIb`^u_^^&x84!lbq2Y{Pyym1Izo2q zZ9zZ5D%&06A~WUijm+u+LGa~BR^&j3l%MVBsB9NXb5(tc6yvI_i*WmfrU1w7E0C=A zeY75k)rw@*l*A&n;w;^z@Qg-Yw|>a2A4Uncp#;jOc5FaFo*in(rw087a>>AEJ=CP; zCqp4|p$9>?Z=LiR$)c}JDsuZ~NY?eV&vonH1shL;ZeOoFTow2*iNUDGVEnm`4p#ai zzqp9*1CaP+bBwkiUA*f|?!%JHB%7OXsLe%g{Z#OJ&@1b8Y$Oneo&|?k9w&Ibh^|h$ zeaj`%GO{C^apjpfWMjq1AJAIi){onF*f&3vBu@^0*-mWV zXyvsW4-RAfa3n_lAc>(;YY00_Z@oKsD@)vH#i-yJI)ZIr04M7}>Fstun4y3fs<%vv z5v!M2h%kM3;Fv|M9?{#OjmU&}LY#YOIhlgm;YXh*mF}s{beK9D`YKAaAJv0u#YU2m zutCj+Z`b%`LghvIVS-tCWPqygbm^x^7=0t7(SG7+90Xtt%6&&5A)3nM0D@#yeGhj) zkeYDSjo1`t-xoGNkUYi7nq{z>_O<7la9KJDwuQ;kZuM4_B|aZ$iL!5QvD!C-r31R? z7OMkojT>ez4kNST2;Jpy+A&Bri&aY@@>%*ex4uc$KXvK5Wc_DVFO$%cZNY!Gp@$Y7 z6{|{UT!35RqS+MEj|ZQjBsOLy8)N2jG=s`SgOh-OHoHJ5S}ZQ2ap=||xo#93qgHOh z_!-$8Ma?dglAQJf81K~HB+LL(wyW~o5YG@40YP-@XHbxd3CgAjSI0&gJ-b{TpP?qg z6BmBT1i0_tP#TSpB-WFP-i&OUDH}v#044@c&*0uuM6q9NiCjxsjH+8Cw|55Up=`l* zlnNdI-%9vBf^PtV>9)n}v>#Qy>!b$C#8MWslv+wTMJbIurGIc9OR39RbimWcjd^+| z;w?R#_6|Twk3m4q!=)_Y=Pcp8OE~HhHeomf-z73JRIARR7?)nFdfi0wX3T&uI&)DhXNj^JP;MS)IGv5};;OQ3(T9%DTi1|hKTW8!=lWGzeoJWH>0>7N;*3~|gs zH$$HH{6r!^Vtg&3(R$I2tXU-D$4QVX*GW_1o5ZRnc(h(Dr!0DaM#|>^El85}%`V@a z9eRCblUc1iV^oWtS%3kXpQ8><3GgH~fpo6_PON_YWqYeUyA`k2 z2#6~Eg^S`57sbt8aka9>;!4|Ix|X!7Yuv&ar+ss`^*;mU_vpfRbp+gL^`*^DyWh}f zX4lRcUn@tZ*>FXwm4bnw)xM+c#5iq?Tglwh*(d^ zdiy?%%q1K2M#v#aS(qGNvPyZj|JJ|VZ;4zJD7({M4=uzqOswu*wb_%b`X&is^LxM_ zEw0~euNSL(5IvhqkD(!1CSazxe0K{UQzOx#o$?ac1o}7IKkveCiDXC>KT}=E+@or< zTVF0EflAL+F5g|Kty_LpUs!(oCZ}&YxVq1CuYEI?S$|9aFnkcDRt6%~w4I(Y;y=D9 zNmA09em6+pT6t)!SiO1#i3sW(akDzTy~8hSBW|PgEdY7?nmS=EAK3i_@1ahw2Ue}6 z_bsY-3VIin-bZgPnJ{AFnO<+y*OTg#eZ?^jvHFzwkDvd&YBvxieY$FW@1i>0-W+KF z3Cn|+G(FR(wJn2)JHKeJ*_F0~>bL&i*B}4u^|x&@QKt+dvigbD&qyZ>?!}4f^akQy zT)&GbR;M?*e4_`uQue97z8g9OOhw-)dH{>sPbWG$gbgSW6Bw2FJ(XA`br>aDqSfi2 zc5Hy&(3=fLH->?gkkA7g0I#(~-GaQ>RfOg33F5%Pu)G)LprL+m7KY#5YiU;mHKW&J zwIhJVG}@I`?-}d6bAVXAgGq;4p|OVQ%!hVt_- zP-ZSVI=`#?B7RW|=BD}Q1(O(GC27d)o__XrpXEG4CcR3c6;Tf;`kBC)E7Y_=|T%%C;l3mD|o%AcQf zr=2gk$&Eb^b&mb$%*8JKE7dn5QLU`S+HpMi7Q@>R;zDs#hy_?Ik9Mob$m2@+mKaid zq@#m3TCWcT&6Q0UTn=YhN+QwnmYd{H97qC7QK+Sp`qff7pj4s< zC5AU30ff;;sT28j--yv_MJ?ffh|ytkr<@6nB{Ga22JfE%R9B=jS^ruN9JT6S2aXQF zE=&#_u?B(zkYWYWVgqRyc8zl2I8PXWge68o5*Zb&SlTT{o)}UpS5p61@5FB4{22jE zr+uhSCXjCq6V+u>s#v|pfPBUwBQT`DFhC!0&`pT4UG@qN7=<;ajLKzYJA*Me2QKbg#GD#ym`UwPk|P)KZ)8P!1YZexMbF*?ab3uE0p5r zn2#Y0t6fbnkCC@pY5>-Ubnyf7e;&<+VGBCjf?GOEz;GcPw%)TD;225hmB`MsVTTgj z(3$heeJE%Kh#^H?^I^g3DrucWODZemLePdRL|!n6l9C{A9A1yOy7q6^l#PF<@{2-J zQDNaAQwWoDWcVi#Tr?F?Q-aMP#O`4e)4eS~%M=9bKtrX3zJYpCS ztfgPr{cQglX9>eyqR@coM1My41EQfEJ8P@hY)f2^#Rgw?FrA(Zqwl8`Be;J#jFcy3>>dCXenk&5g)U@3V>mM61=3oH5#ZJdt0*xxM5 zGO~V;_d!@M??ZLe%NI~L_PvIfl$pOdKE>y_YaYNc-^z3Hs#Kd8PXtdAk=CM1yQ zWRhC9iL8EPtAYt(%xl0Tk)VDeC5U{jYWRLCx$^vAv5H8;JcG`KxeNuZp~iJ=a#hww zkRjny!g_QXnI0TkxP!w`o}k+4qQhi77v7=MRVj5oJnU%tTg>4Xs`v(tUfAj?z$qadI3#J1?3~oZ!PMh3d1`M~-#c?!0 z&}gc3gLDWQq>>CeLOph9 z4YZf57wRJ)TmH}&F+#4`(73dG zwj9_H?G!dBdShEdax0dwpTLzR!gc{_VxyAm7XlT5m2HX_b%vrs&`q$gM@6Z1s+o zK-W@PI4cW3>b1$}if<}vuZ&;(aG*9CMN=p2VOw(If4L$AgwuLka-DFxBJjc^OV1O} zPRROJxox+s2b{?#2scI%Y7}7$>@6|?!j_7fXVd4u#t4&Prfl1#_=ex8Bp(9*;)Hh8 z|1!i1#3;VeQF29Ha?EO%9H{FjC+~I&$7ErTqPHj4kMoVNRn$%!I{1_``JB+K2>S!I zkqY{#HMs$D>UhuL5BOsh{WQe*gre_Mz@BA_utV5FGOHw?COYoC_krK&Cx8)9%w=b?=R)ayc`U#q3!0PsY96uKbfIkMePm!#y zKz|XJm_!k)!KE)F3LhknK%9spXoH;umnMd#ick-Z3bk@Y?eE>!_~bxsf{fWjZ?!|b z?cnl-irR5$XF3A47I4hoil)>@R-Ag|>43i{q~e5PKVc6Ddual=GVd>Q0s$-Ls~Y=B z`xd8Q)E+nL;Q2sJf2w_}Y_FB=yUiX6Y=sOchu1Gh!2%NmJ5teGjnwn6Z~NvBRsnUc zeK)cx0@{q8oO;Jc5G7PXt+pc(&6q;MvzIlsXx%64yH^I*vbt&)yfmzPT`Ib!UTCNK z&RqS<;qLX3@n9{+Dwx$918R}&_%3v~RTk=Ci9oAQQ@<@&gu{w&L_C-md4*;H^ZGl5 z3+PyT0DM88g5b1-))NyUD~zD3bLC9XqE4*q%Hp+HSfOg8n39UP|~dD$s! z!$2KXQQKbj_aT8=E9TAyd%eBYDI6kF^C!KzQ8*p&_osothEFUpf-y(AGUX@qe;^P= z{5dJN?T}-)$ac&mJ1T1a)NxHZIKMbh(*qUOwe5-BW8X&I`_HSE{3}o^VhpZAUE7e* zX5S;UN2V=%;hgYgz~2}Bya&)admBmJhTqBRg27SsVY#{S7@W@Hn!c~iiIf#^Z$eeD++1;3;3h3G_au* z>$kTP)0z$qzko{E1bIwvN45sV?gwWQ(uIEn{G!l+d8r%*qqgm_0sD3$`IY#X@qs{J zEC--M(|5P+j@?QkRa;TJaKXg3Ky8$;6@z#g<4PO2g651odCSMtmXnIm#F(-N&2NwV zXvdywSS3O;YkVD8g&O{_^Qjgv_f6D7VwI%dOcv4sLM>L}M=hvzn4TGfDs1}XIGj<9 z3e%LJKZofbYd<)6AaPFSJ)fu_w#3^n`^eR>MD_NXJ|31@vK2%I9w3`-8Jv{J)QG1^VUxKKcNV z{(pg(5PyV!PLVBQ-^@5!b{VxFQcup{j|97Q^eUR z?N78TuB?kwaNfNYKx~E3OOpCYNCsVNLlUK~4@sjpa_b!tCZ8(ky*42r>(Nd~ zxUA>GsDWb$R0TR=7IAw=zJ+w!Q@Y8rful$qIt(d?0&mI^YTb0>B?xh9g*esWkVy?R z4^r&`#I~w>J<^7#cAWju2_|O6B$uvigHfuUbZ*SNRy8dsR&7LC*pyIxNuD=kulwmZ zHLyPlr*ki&YE=lZ;}m%vZFzB!X+O&Acc*=iJ&`Jo+vye#|J%e+H8wh{p#z#>ILSqe zx4LxLu09R+*h{;_FWvgtFrIbtv5R{!4B`&OrgoUn#D%5Up|Ja8Zhd6M?Kna9dRE4{ z0*9h-F#aXt6MVx*z@DahM@VFjd+kfyPHCi*H~mYsa{swIdE5Ey7Kh-iIoF2x%TCwWI z?)*b5vb;*04U7~@d*aZsxy!@;`^R(IN#((R`<2x8S^;J`Rv+~**{*;KB6pY#F69PJx2~wQIfU&cUwLM_jpG)f;F) z+8c1e!q@jUd`^dTVCVTU(VlXL zKkY=>Zdiaf+Yu7~=*P{BkbG4>X)%dabm51rv%S8~1jW9-yK%X~;IG$11N%@E&d^M? z3JxydM37@9$3jjdIZ@<9lM_QuEIB>M=}Ar;IlahXrHB-=k`qr(A95}uCxM*4kPP8vD4kaH_JqsbXV&TZt3H4z(YE&a$8 z=Y78ST%y2Du66zv&e-&|`EQAt3>q5DseSV~E$S1T{Xl=N&#XLuK4if^%R4)s6b*DClL z5ge*tr26r(H#k(kNcD?Ue;|g+Y%r?tI)A}=(Vq{{c~K_JSCI=?Fq-tG_)J)@GGWEa zbl}NW>oTX(__#QK*ZPV3jiEbC-ohl+XZeEgRUv4cx2Sq%l5q&+N;%?6+cgj85C>Ya zvwVfaaAbJ{R@7lIQ`7ad(>s~9n2fp)Pzg9T!DXhBt7r|v9gTx@WJK0v8HrdFa8|PE zNmts}#Ruq&Bve+GaLAp4rRo%$r^2sUHvbvrcg`8J6(<{eO>Wh?aj1!C&YLkjq^^nD&9vEyoCe2xrsQufu;GW zg97H@8B#gc)hgTxdl^?JmMTypmcZ_?82&RbT4{8@P}b{ZeWw#U8ogN$=nVn?z(7r3 z>}&K!*rl*U8qn9aqlcIkYs2Qy22Lqcfvwy*2 zJ?w||&lUS7CHbJNf96cyDBF+7!Y=)!uvZqo!VNPE?b7t0opkhE>#FErX4XX|;F4kn z$%=k8l=>0>!myn39i5{e4(4qqxr9~tP_Xtq)>wX=mx^P&uJM$mAAt^aYQJE@0U0E^E%+jE zA)=i}dS7!f4HdryxNhNCRthezoe7?60Je90Qs@{D1J~iw$0v23++Ejsa>vPxVr0^d z(KEsI$jwQ-3f0>EXa-BC)4I-Mx~0Ivp;LxgGm{sgr3>T>T6zThC@r0VqqTGkff0`+ zKT=D_ad35pMN5w+zo(WSLw*k}J(m1fExiZ%Fd&Kx0%I!AF8HR(5U>+{hvNLp7#f5SdS9}APRsBGhykRoDfes8t z&`n{VlTC(MX+PrvV(|ew2_5DV>Rc(OT2_`)P^j zT;JAW*ms<&5k10&uA$1WV!OzYf0o@%#-MWPUjzr!CJStGP?O$41jqTLXGo-HNTg>- zq-RK^XGo-HNTg>-q-RK^XGo-HNTg>-q-RK^XGo-HNTg>-q-RK^XGo-HNTg>-q-RK^ zXGo-HNTg>-q-RK^XGo-HNTg>-q-RK^XGo-HNTg>-H{kdvy$F|2d%=GMesB22ida3p zM6ABUqq(J6_@(eg088eH)ek?4z~nj?@#y_YkXwk*2o~Mn+`xuwo0S)*Pvqq3_AIgdt8_!4Lr#E2SQ?%{?T- zX`3;`Owm}1TAVh|kVx6442hC~7wxo784}~PM5ug^rL3N=?Pas-C}jw5SleSLva7v0}?1GQZP=3m9WmK*E{v?PJOSex6rE%Fz9WC zP+I9y^C)rQDd=V`376s$ouk{diA9R`@FR>x+8xD;M*Ri;Xm@xNt)x^-pXbofQyMx- zLqBQgCN2F5hxSC7L!({-Pqg#}k~D@+d9PJ04v2EtNnILXN%CzBkN z92YsalXC|tiE-+fRt)vhwd`PU6gsEjUEtt-|^DLxE z%HHuf=En|T*R6PWByJ||Tlq1{(%MV17j6CkC7V1)Wv}NcwkJ@9-7argtqnU8x(3R| zKSnurS1$dGYvx^x3XKE>=ftQDSwb zpk)Z~BD9PMcxEjFvY2VnGA!^SwTwu3QCbFJMr#?-@M5$K+``X{)iN-8GJ9wlJ>d1! zGJ3*`(=y`V_0lpRnwh<|49I?_sAb^FaHdtuu)>SiGUDO&(K7nLyG+Zt3|@kkkpQo+ zmeChpKP>~d-!l7a8U5i6&@yljE%S0M<8pWdwTyxA25A|C;3aAqiSVw_GOmC(Sj!j; zZ-|yL#P0tf3ETvy&wNFP>SKQVS}-+l2B^D!55Z0B51~%%2PYu`j=L|;%^m*p(GLI7 zD2Hj2tRIp6=OP^C2hWjVe%LXmxcEuBghI>c_=PRTz|kl9k#;*7MJi- zdg_#+CRiR2ZcWz`qsI9XKZdm64&}a#)WoY~-vdI1mT1lZm~IJVP2ZQ9nsAlV_drCZ zmH=?V$CzCK-jSIaccomp)tsrtMRoWq1D4A65n`1eiHgq=qxe)9skX@eGtsjDaFo-u z#i{R<{bwUU%|5yOyK~NA+d=PU6zD@1lT-g1Wx4_fLD$deX!bRjr_NF;5l7V#3|#n; zUH*bMs&vNi)2mjYO`qHn4^7>(DJ#Hv*o)pRA{WB5o)ByQ&PHNlPRujm^B zScs|u~ z%4eu^1FMYi7As`g5UanG%TEkqcmdJ10SUW+h9(nYyrN%l*e=ktUeO&Lw^yhTQV@Bz zjmY*nNkx3RFA)<%OZkr=Z!@YCU7LXF2Tl7>=?FeS zRH3GyVU`=-4`)$~icrzcs-t0et3mKr)X^H1ZzVd~jiOI=qoWoHZxS5~QRs+%>An#+ zp_B`RZuw-Xdc=~cW%4sBvZ10M0SO2+a=pRqj!RqQi-^^KSOGlZ4Joq86nb8LsUiFnwk@RO4g5~OezX> z|18^@W!q+?ZIo>|JkEj4z^C!&wCw=`t~ubUhZ96lW#tMIZJQmb1<~z_?F)zP3n~_h zMl)qyhg6<>q1ziLQ3YqPPgZO{E4I%SWIFA%9dsfFWH)oDB8L~~&>u$^!K^8)TsI&ELewx7|m^R~T;Z7
(HR?AEZ~8q ztu*UYIc#kXupEMf*$e%*T(+Hc*v>*NVba;8jyaAoB`#Wuy#|ROjMC@75KIuuI82Xa zOsYmi(`a5wQ6|^XD3}$X^V7!ANU z0W}^)pdgZFpHq;(5tM%;Y9DQa=)+HlK*~|17&!ZwjA8~ru0*_VqezWPs@8-iI8s;b zCl8dCE2*o#LpXKm_bg%U0p_hZ2v12}dI%oTTZJriy*yP5!bSgK?q1PMIAZk-n%I{e zW|%qB!qmkr1W#pObVf2wnW^PRSu#W|RXfI#*Ag#@sGy?70F6XKQ?XhUMQHb!9ol0S zhc+ux(e8Q^I=%uN>740z*`dCuflK0E=t%@Tm z7GamZo9)TuQ?;VF=s{@kUE8oEvTeLb@66D3N7{p3hZm}_(fME9God`P%}QjOmB=k6NfsZvKy%avQ$@F>fdxGid#&!(bE!vGud>orFz8OpZ9Z({$VPhLI$X@!B z09fb>V$~?Lf>GI zPa*UUHu@BTZiU@Gg`n}HFDS(JqXLDX_oF)~B*M4`cF&M7c4kNzHP{fN2YTVb`-E)o zj2!@20e1Ak)eh<>C?4NX4_RgXEOj0fwlZmutTzJ2wvE6+{W;P=eLKn^UC%8gD*6_h zx??GaF=VeGQzt;t3}LVn_Y@H$h0!eyaq6E@EYf{z0Zcm!Pr@MAqB62UiNNS&4MVIK z6&XjAnf0v%MCI7N69r%mYNLLjN<6Ia87LI zvX(i|sbxOu)N&t_wMXYWwaE}|umA~=Zx|GS1&af?ga9_QZmBl{*wMNr0Re1jJI@ys zOE!j?F+A|u-h zvHIyI%#SuBNRu^!uLDYRHG*#-NE0@K|3Z)^Z3LkvJxvogf^Q*6lQ)8IBS>>Mf-4cE z2^>M_K~K{Jj^H~8()5ksy9mc@MR7mkWkmZF z={*Ed6Wkfn3T!@o+XPw;F%?1K>;q>6*+_@ecrGJ$;0_^EEM0nyEAX9#tv4B|Fk(WI zR?~K+i3ZJzjMM@vy-PZ}zoHdLuV#%*}ux)eLwo+fQ&~e4KZJbs%`cL%ZseVXrTh{7#uj3xOqfi{f8$y_r!p4G4h!4dt~sBxqY184?xb<)vSQT?lFIIK`o zAH#BJjE2{h)FL`V8J@rfPU<~0Y#{4Xq<=Y5XIT(Q=tG&_R8k#e4y9a}tCZC2D8!V$ zP%Cnzx0Td;Fkp<~vl5t?{b^ug&T^y{k+sxf3>8P}W3)2yQ36B^5;kr|VBBJmD5)yO z2- z(qs??=4&$K64Rz8vxWOTO=ipXYE5Q~_WPR5mh5UxW()QjO%}T?0H%pJ0SI1Q4J|Xh z$W)vD2%cg*C3rk|O7YCY^C+Ij@XW{aIG!i)l;Qauo&|WGG~E|EzX*MAho9%t$0_!> zbllpyfQw!D>dST-T*D&&vB@~k$wv5T9(Iu4J1g|UBniq?9B^19I8f=};{6V|WT3}6 z&n!Bwd5m6#)Tqe#m)YBPhGqQK{henw9py+ZzR@_lQSreJNBH>&RKwx)T?`L0be-Rv zXXiIAVIzA%uS}W+#hlLfzFAGWIaYoZiuxn(!|wY@5=w7@H3+XFF_e#ha=UNz=nox8 zz)N}dqc9O?P(J;T0n^*`FN3R*xz@tYJ?QQL{<hwg+(bSFfjJ0TL?36ba;!hiNz4e0w{-FLJ#UT%Ex<)R~L(f~To1G?;uFxwmV zBhIPbmGqX^C9X(3P4LhG?{Qb!9y*ckXqlS@u_77%0y_4OVI1}9P zE*<@{8*jm@zeRWRYZJIXC`oj6q6)Y$!RomDHUh6J9uEq@!bHmsrd?@Y6(4YWC5azS zhYHUUF1S<9Uh*2nU8r#8;_&m*Bz7=?n(K;4KDPrQ%eSTxwYt2|wZrQ?)UF87r2v=C zp!l!`y_9;O<;Kwckd0Zsl?=f*IvTB{APS_rAz#1454ev7$9=;OxI-v7T%g$?<^$~FO~Hy)bU5p| z45zU&%}JJeUk|hCb^%HDg1tpAzfXsH~+|9Z=Ocq7!w^~|8wZHe3A#?7upG$^^WX^#;{)$cxN#6dzFhcJ(|I{UKVTHSdZ(S+h z@YCwnm8b$UGA@g4vGb< zgJOBoR9j^!2bttoaDO59E4j}G2#j65a0O>p>1(tH!ckt8rHQBS=RR#g2u}MC@(*yI zb{-U`Z3p?Z=OCXp9OTn(gM8X*kWc#z@@bPndnKdZXK}~nqJ2{Md@>PY;3s^%&IZ<( zBNz;RkHrRkYl6K2dWq#p`i2#y=v!eWz6tBRWo-G0DVp2Tp>HogaT5ZL@{?1v1Pf}7 zKx^gFde!$t5mrLqdb$f%NtqEVWU=%jwXTvTCKId$s_#9Tbi`o_Y~I7%cFQn7^yM*^V_g~;Xz1~jnKXRXa+Z*ve@%U2i|C#%ApO)}n z;{MCr{|op3%Kg7_e;M~*;r`#b{}1jj=l-kQe~tV9^7u#GU&noRN{aOLJpKvuA)FZY z5*@-zbOjXD&H~la* zXAC*Fku%nW8Qc#sq#cQ|zsY=^7A(GAnW?MH)KzBcDl>JJnYzkMU1g@OGE-NXsjJM? zRc7icGj)}@tZSiopGw3KHPgIF{+Ha}%l&=a|BCxxbN?Ief6M(Q?(gURciiXeo0+X| zUF#FRY#OHDcGLgJD0vS%UBI=^CLBzQPgA9M%Y&}#>nHHC1fAGryYz!9DKL0zyT+~K zJB`8MMutbe~;&H}MS+$Ei(%+qay3570AD z#m84@4ZhtM7c@R4qvB&Y1o_wjM@`w}PCGg;8W&}+gZ%L?!q9q5n`h8`I5^5EoAeWc z2bb<3@$ZI-O>bqb6}N8bVr>lY*+=!L!$Z}F?(nL(9ej_9kM|G--7f3W@k`5m+HHl& zBiAJvJ!tJHDu2bRtgW5&#DJdA?cJN{04Li`sBC=VwKD9Ft|!*)$9-SAxQo}7=(|0| z2kb|)^q&Ydy9LcBNgfqe9qr_?M|Yx+0ilv^!Xj zl(j`DfaVLuLQ9H;mJ|yuDHd8%EVQJ+?g)pL6bmgW7MiClG*4M*p0dz9W$_sE&zMG( zR{Gvr^1AaMIR6{vJJ8c`7d_t^a# z@b=X?7V9SeFiXcpk!?Ck$ViQg&cIy(L5|rB$dsrESm&c7oH09LJ$6T#VK9$2J7Ydo zV!kxqJ*z1V;N3G%k{jy#&i&!Ms5A9Sa`(98(tF*fP?ygdeg}V1HnDsP8qf$g*&p zf|cQSs24Z?3pvB)qhICERqu51Cz^%-Ni{0yL!r6@*!Q(6X3_C^Chn;{wR+aO5$9t2|$q%96!krk_Gk3*TK z(mOy9zFGMAEPeQveO8;lK+@9Nut&IrT36aPCBHMiYZROVG49yJE5m2u-6?p}tKH!e z(>KvjqqlwVJ|sr;jr_g_eFJd;O=mDekFeuCuj;b6B!{i6H$He)7MJX>jqZ(CLLyYW z?08^>;8~239*04J3szX#Z3(@bpTK?l|8S)VlW`Kw$jRhfNzPT|Fk|#k3jKzhtH~Kg zP6|2KkaI0L!^ydhoa@QCfgBq-c5+h5xsjaz&-y#^I;epE)a@g?8%|>(Y|RMScf-aD z6q|OuZ9*@b(7Pt2W8!=r@0+CHmK#mHE5sBW0g5TOZKptd*5Z2FI=YUwf-b7Pi9jQS z{ww-ONIh*66feRwpB6rR4^m1 zpf3isI?c23{vEncHqVbikFS!=kHkR|Ry)m)SQP};%I4V#Kv?HA&mW)w!|ycb;c5$d z(P{o|5^=a5-MI|;B%puhG~YRlsAvFiIe^0f+$Eb`HdNUto8=Lh5WkYmca8#1lhZ7Z z#;c6d0f;ClZlAf>>#ka%$1M!KJ=xk#WEJd+u6?oFdT{qdaSe_dCq)Flv&Poo_O)4D zlR;Op$4AM5{Z<4fMazNj1~_B4Ae<2c=Yd!`@O_Lkw$UjrZ@_KrHIsS(!|IIPi!{WS z@9d+*(h2Z*x}`m-HxgMkQUAR1pgC}GKNYFEfdou>;nKo}mqMkVdZ_BC} z1?&;NzLX%6+P@$6G{Z=d=*3v;P-@H*DQf4T}_RYcHQZaQIi~wlR*5WJ32w!=i zm6_40Fd&Tp63Re}*r(}Sz6Def8TKEiiu4Dt@ig9%!=EO(QjP|seL`tV`Fcs)XDE4)HOLJ~LW2&|vF&br`>f&+)?;&)a41V?#yKyIie>n+!Woy25rU6M z;Zr67m(WgKgGqO-OI&##<`P=X{XqLh70$Ya4fG-E`YdtfPM5Hk3K3W0#sW$}<{CQP zX88d(?*z^e&I8guE3FjS;EGl zjD7k+eEDS{sxiL$GSDr2jW)TE=MG&n{UkIId_We1U=}vOcwH+& z_D&igV{VeY3zD4P@shqh&i6w zyIo0cS7Mvk>-J3p<=FPXu_%38;D?0Rwm@?nJ_LyI+&d8LgJ~X3ABNqmexuWyF&x7L zgwyU;FR_3THhf>fjZZ89rs7L}B{8x&V+6&7211D=F<^g}DSSU}=vR=3$|KqA51;JD zG%l{et(eK7C2InvolJ{Z{b*F+kTsTG)Xc=3(=b_v%+6HDCELfNh@ewe#V$o8LHS0U9S+p8% zae|kaD6QU1D<;rk-pmAe!@ZgGDQ25DGZ7vZE;#$pxU zdg9l1&a=ztL3173=B;~$ws}>OA2iaQ44qJAe3BenJ-kI-W&BkYq|Bm}AkgZnq*i$J zlfC#8TUF9eEC`+<+|I)>2%ke(^{q1s#|?&+KN0;fN*`kOH#b?Z8-rIB`vx)FbeNRf)xCMsZD9 z7MGvFeZ(?}Ivsav7%JT|!v`fkh2~+VWlN65mnCu3dU5A_R6W?)|Bulm3XO$2yG*X! zkN)3}{tx^g_Ra>r>gvA#ToNFO!5b7bC`zo?HmO>33+<9hYp&kF@5=8=T4|*n)^t)O z9c>u~p&OROBsF&-m9_j=x4AWKZAbrqH`(S|RK#2!LeL6{_<+w56>kUv;uD~d|NC>! zxi>e6|E*pBo85X}$?tofAHL`5d%ov;z9*7@IO=qv>-e@v$kJBW13}tN=x3U}*_@8z zY7QxJ)gvs6uGXZi#y2}(LzF;A?A3!XiIjQ(_ZXXAq7%GS|0y9+Q>3F^KC;pT(lZmv z2wf+^#UTO)Th!wHJ-Cs$-n!|Ma_V%L`e!2di>!$}pf93OiqgCWbG+TrVg5c)OlMi3&>^a_<8T>PPQ(N&KrD7P|Em-cs3G_+FXkoyo? zjHp%-MfDTwOns5dv6ZE2C^LZo9LTDgk@4A+dj7j7tXn*=kL+C4>PwaH4RMN8|Mj5^ z!OfA>4~d`7aA3{}Q80;0Wuw4U;$J9TR?|yMi|Wrs5eS=f{lD+G*{Zfej?9MeXIOvNP%@uq092f~D7L#pVpMyH zeq<;m*BME0WWsZ9p-W{b#(>VA6l-e<#;7%xVyw2ta*Wf_-c~{M8Le|eUhA?DGmK5* zav7AwOd-@J;aj7Uh+CtQxWY>gR1keu2D|qO-Dd3gWk)r7Z8P}6OJCOG0~#$^Xx9W= zB?jp76(_{J!FS2fNYR5(TT=)13{MuSC4L2oRYmuQ=4m<_T=FrafoH!q73V4C#O(%i z1E-k`htealpDHoiHgBc!2n_qBMm(6$>hn2$uGD9?J|Bo9_un6p(-OZii_NgGH2G)I z+J;}0#lN!eHTo=Bt!BJ+8G6gHeLd#QKAckn>kVCS$6r~x-~etOtGs3n9Rg-Jz{)Jc z0WQWu*DV3)XSu*3Y>o?Dicu3~%6J-X@C7(&Ixrb9U`kSdu~&2eyi5@SA9DN(KXk6# zAP(w7AI=a0AIT5`rDk<6JB|?e=nx@rL52{x&;uctYUz&`QcToEET)wxb2^ zHtTza97wCMws=`XN~# zw|3aCo$M)PeRKJh*{|yE5+AlnOyIojRVw1?8)M$$AJA|;sO%pu(Gf7nLr>aoGnW{Re z--o}GT~{{iwyv|z7-8q?(B-5u$ES;oHorNbUg5`t7ZLJLrp4jC>5n3 z=^6}hFWWUG%^(z5dY})Jt(s|0Bniw5dg@vbFFf=L)pL|}DZ4q3r2f3+gjIy3TJtw( z&ic1%IrG)wwWJP)oc&O2cyneddIk{)WV^0t0@qKW3C{ZrIc8N{eR-ye=b8>Jv}*i? zOcgB0wXEh}Nw4OAlwQrR9a_!j4Xx&19a_z=bF2CF5*QHjAP(k3ThvK#K(xizWDsG1 z-azf!MO!SeMiB;Iw?+{L->}9CjNdeK!kRcB&O%^ZPCj!YfOFVe#u`K*+nXVPx zczyE;n;_l2?C3Y+Cq1a>Fpuy8OLq5?3Nq+}S`nrfl4mfU*uCUpq|4I?6`gRjN!YyI z&J1Up0u1C3@c2QxSc$u7N1+y`b`Uf8i|W9q5Q7W0{Fo-tHfmu-H5b-bIoqO&>XE^g z?>O5H71cSxmZi>CUs0VKZ26kAEv~2@6>OR7Y&TX^j}Eq6;cQDPs>cLdMEEI(r4`kA z!IqCZ+qWvJ^Mfr~N#ysZ71ag7meZW=&nl` zeY>LiIcAtTFo24neEA`zlOa0x=zZ-{tyfh}k(%*%1N^>9$ zsq_28%HNFeM!cnh0gy%K4U<>NqW7BiE=@}cZWmurudp6XJ^gKsbgnNL83EQyE5G(e zgWelAjnlGq@yE5^uhGazK>D>W(6g`H92uxKS{jI)^AYr|wy9#L=2IEzK&_ z3C>SXT%MUHF3&s?H+u6>j~5&M>{~9h59WS2V?YSzoJh&9J^Y zl>+GL1M24pT}gf5y3V)}LZn}9hV`}1P}G*K#TvJdvj$3W0`8ZlYsdX6yU7F+_p1-r zEbf=yk?emQw`}fLI?Rc=Uy_r55BE!%S?<@>%gBqdphO(^%NX0wYx>?0_sfukWD{XITsCa@Qu~+wZjg5$Y;x&?%~$+wU*&P@KUbt0>1evrb+dy_ z4jMuO_ZDaWtzLyQj6TsVS+v%B&r?4i;Y77@uZEMytFa>B1NlJ`M#ph@2|A|SL>yw5q_&8 zvFzeBII{-viS)~f$Br8BnBH+%5$Z5Q$eysnx@SNSHV$o5#K0dxcq5+5$o>ttD}|5! z*B-{-ZVuCq-<@a^k>SCw!IHH#5exmjTa&f8X0|4V2!RYgfTMnHYoa{~uD>#F+DIVV zm(7FM_Qvcq#e=_+S?fJK_&ys#e9P}I6!~$d$zwPVUb$u{SHpuh=kNA-I?aQ>pFpv) z{qbRpX(cbp^+@WmZ<0E*uhQ}*L}ra}zu#d}AI6$iBoR3(eR?1AmroEq3jwg#-@~0& z_zVyJt3=4+!Iy~*Yw`wm5Kt8PAw@8ZcpX4fl!X*UIrqsbX2G7+O_W<~Y!eqoypze? zD=u&c!7^tK>n>2Nbh2)zcge6?_oZtM-97BoK{1c7@g0v3uW`D%2p~x0B3qn-#=Viy z+H~3@2g{5%<&jv%p42bDF;ol7j5lpcdvqvkb2zWywt6RE#`8eSUYPMOJ7%!L+8xQ! zuV=SwriScD&TSSmKI?2nkeTC1JlA}V881F9%y=mhL$g)-blx4Ub2^_sT8~odfR?;vNG@gB_h zSehAsd72r|DSet5&ndl(?v!3edrEIy#f;~mo@1`hlIvHg*O4{* zUEqg9S$u$5;Wb>6?T>-t-=k85_-`zqLEM@VhP#gG11p2`-)yf8k$s#<%ylcFnt#3+ zs(GAylntrPp@-SHW>Ula!lP{eFMVSWV&5$CPVgQZQejoZjverg7c2Hi=u7IbEA3#?W#gOc-KH#N3`XBHxXU2R!`(f z-2FhrXg-MCI|{$*@Y7!VS^xXbAVk_f@e^iXl!~Dv05RCR6THif!cl4PO`x=t&HQQ< zxer8FBA#9y-X1xJcQvuAM1zP6XAEr(u5d+u6Yg1Vwe zL|0Bz-zD6esNxPpQRa2Di_*&Tw(vTdIjO z6=mK?t0yFiJV)Cn(s&xpJdwszX;SaW%W3wFG~PlpZ=~@ynt3CQx6-7PemZZb>4l@| zyoF}oNaJlZ^F|tPrI|ObCfg zKslAJ%~V?YbD000yg@bNDaz7O>#eZ_<4>%y2xF@;x(Sl<&Y<8kYWZIn9Cu3tB?X8k z1$0=j#)3}3`@UbqX(-oow~_mGGcIV zS`*_SJ=uAoH2EZxIdR^=>^k{2b)&AC8~1IUas97I;hw83RvoNsS4O6Mu9bai{KiUvFT(+n{Yp z%QYa`TY6yA8I^ z3yo%ndvan!a!*m~?9iyhgVr!sF2%Uy%0-4kk6T^IJtY`(yu^dfc#?H3#dV~0m9fCO zmSYT9R~hrIYenn$kaeuUF)v>B%H{3jkJ95AXAgpjsvE}8IdyOD{stW3GoX+M^^~s; zT*unPNM&)G**@*V8WTK^`t~=}X^g1pomXeprlQn-O-hY#XehDZ-0%^`M=Y^dAp>xK z-cD`D-)r2gEMkqDT(CvY&=b>%vm*U9i4&d`Q!9#3`m9J*Zbe$|HT5^JB2_J7Vwle1 zz(T1u9US1*PLnEkSlX}C#GrMFa+@>oJX=OqQ*&Q0vnthSRWhs-^!TQB&DDa%s^WKz zX)WAn45M59jar!eu2Gn@H2Gb*nDq3X-<5+&%ah+V5|b7vzbk-AOH^SO1|gL)<}LpD zaf{6mJvp@AH16#IFPTY^dAABoNRNPK=8!XMYR+?J&CPqit}vY8*j33skg3%K?a5JT zPqrsC>AOgRVl4=YB`TgpH&DOq6|Bm$Z(`mshW(y&6KWl+VGU!>FSED9WPzz{<$`3X zk)?yMvQ){UMFGou*1odaLk)OIGXlw>Y{aUmVOjf!BP@RMWAW2Yeo;7LdL%k)?D$Ks zf6WApwYS)yX7YBgaz3Z^u4dROnD>WtI}|>#29BkX8QQ{hW@r!7w`TBoi(9`s2sX5~ zDKER|nZ-|wU={zY-Dby7#2&)ia!V(n^us8`wpyby)UWH7oqR`YGO_YUUdtoF=7YGI zj*z~%Zdb)y6l-AOu=6duA_beps{-~F!?mrr^(?? zh~F+K4yJ<15U2fOQVoq4kBjbXS$r;8`fan!q83X$7<>DX_&FP5T7gcF8*A*GN8;fP zvA28T=XA%y-Awx|8CMl)(R{lnc1|bDrdbN!?(xp)X2D{mC}9pSCGm0+uOM;e^sBt# ziCb0akpVSu~mwL_nr9Ow$YmZBPes<~$Nqwo; zc54He_SsJZ&9jV+btai+`1Y- z#%m0?)l4mIKj_3ZJmVj9;PifZn=}$XP#VYOZPFV;rWs`P6?@;Lee6|>SdXSmO8SY( zcE&A+`%Aa6w76Q81?-R?-U;4mgR7Q`ut}jA(F^x_P+4s=%FPnz>&?%$Yl=h$_4#y) zwyWkSDBaB=CO71VH;B?v7a`FDvG7i!*D5-O9|~`Z-Q$s&lyqFf#%?aO1thM*(-$ek zH|LAwT1|Mclh^(5MnC)xjuvIn zPy@xnDm*Dnt4YhE3F~cy*X@elEwS(uUN04RDi+@1wcXXB@UGPoll4f&Av_F4ggIr+ zOTBP^Oo-a!B~1INV!a!^@J6q<(}VofuoL3A`wVIzC(32cv*tNo8?=Xm0w2~;dIgIMM&nO9pgLkuMs z#%z1b$HVj}+zt&ukBqz{j(y>Gyd4{2*A_POs@--k zF&@v(kpn0f81}kYW_=iXkCumUcA9$M+5;%hMd6`YVH)Zy1{+d$vmUb4LV6Tk!Adlf ze-tukt~D>nX3CdMZ}cg-#11WaZW&Ln;OWI%uz^ropINERDKdLqnX2?D*QZ3EA`u~? zCOQeOUz)NoBLa67T0aC~RE^Qn;o1BGCc6iOC`-IFFm`1;u%9av!RCAM(V0-U3Gpz; zqO9-S(b>n*PW_+2$hj>d+7Db_?8?K(<@f3b#Fia!$L#jq!49 zG0O{c(^>0`RV8<_o-&$6?(=4mdnNU0Zs)6}>DsZ%eF`s{Irrl$>@ADje;OU`Y*mt$ z)PfSRO%@>TkoIlYWR08p$YY2$@!F?9K=(V!ftRLySpa(QRTI418}0kLmn} z(Hov-X!hsXoAZ5s+eesvnevwu`I^4lmM}HUDm9D5eZ4pD{)gqCJNdKTlxwPD_e@^! zne$i7AN4o>lv&`xyZ4XpkG%7Lo@|;NyLZ*4>vLy+cv+txj6Nd0I%`=))6rj}GsFi~s(?=9YiF{ytf6^B&0C zy6$y@eyl4a_XnwOx;|P+T6wRSt^BwJ5KV1_;02on7rrJRI0~=EP{D!fn>BvUyuDPH z;HaluaN+BI&a%Coe)vTsvW8sW^pYFd7T0HoWY_(?3bHaDJ`C@n#0wvjE}h%rfrpg` zi%Y+k>?-w=$ZYp3s}*>V;> zB}ye$E79H!vB0lP8f=dLoHPuE4;viB!+qL}P?jMqzhkQE3bDnuIk*ixE*uU8J8J5l zPTHh-M9aQV8wXD1vZc=Vi)WZI!L+%YS$4KOJEhsqfZ#3oD>Q|K=^;n)|`M?T?wqFQK#rY(^0M;Y_mH3p3SS;bxJ zPx&eo;5?-$!%cbq6h@ApkYRC=PuV;CgbY^~`%nR^{lwM9-jv1k<>$NvU`o8yM;~b- zUg}M$rts_ss>{78pRW+Pm-u`IOR1NcOyC6#t*4py;#rRSD3lkNVYrslJ*MsD!CQ3d zo!B64MM-HZqN{gh(+ghv73eGau41CH=##Z9$lIi{2)T{UCQ`^*G$SC7*EmMukQ?jzT&^u$ek;ZUq?>dEHidT+yzW&(&Z2mkPb;$uy}kT;I$g{2(wW zz_2we=_n4fh(Td=66`mjK(z4o5m!H!s~5OVXjmboqjnpksFKoAdz@J-+CFDKen~5Y zJ7$&H7_#I?BxuPJG2Ls!=xcjl*gPRX`t37d|5?0rMW@j}tO_jp6Y8%%yV4^$ zX~kAq&?SvODzLH~)PFpfjhPVYKVEK@gZfXm*Lb*r6c(=$U#J$6#`d7UJEWSfGM6V= z5$o&G1!f5~)M-5KHSIq3KpxS|<1&{2AyrsSeTS}SE6i=5b*SNm^4JdEZ!XNaqAfQ! zak$}yir5Yw@QvnJmITA!tL$3*;9DC4^~LJObWMot!uG<=Bs;>}v8Q!>4*VT;ysY8s zB@U_j!KPyziS@@I&@%M(JtYL~KMfYVR-6Z>YV>FAFHrkNuUOz+e7ng}a&roojvQ?I5jzsm$e`y5ofs|@ z1*Q6=gV4zq1iLRyG+!V>(dvH|x!Zt^n|!GP@FPjdD*OBl>^Vc*Hys_h=+7eeAHUx* z__K*{pUrP)JCN!ox3%B_B8An|&poT$&pj%`*%*$=Fc||xvY%TbTDz%}|3l|4-l1$6 zraBKJ+ZydFsv?7%Bk05b0df*m61n&Q3uu?9d>>Tdo%H({&ADoJwqd#M>6~22hsiFn zFhNO!2}%e|P_kcw=II>G`rfD2ix=Om&Zll4E8|G!Xvx|V!Xed|GSYWfa{j9cZ7E6> z=!-^)!SZT`->2~SfJ+ZCgJ`H{%^;ciV%U7Jvxv2QS!lKqJf16fT+2{H;74Z2cXn3{5fH@|+0hTpb$VoRc)=Zx*1yn|9`W#np@ zfej8Wbg!o*ttTR0XP|6Y$sltqLXYL4vlTqc6QLCTwhJIryab`NloTk2awb;#XWwT7Q);mK_ z0(96@)Y=?+%9)EXKaJUIM{@x%vX0M)f%(iY9-_OZ49G<)H&*6}2kCiPmCat6UuI?A z>$TgGc>!o|=)XOw-IuDfx^meS+7vf1z@c+}d3uHZrS|YerXE_PtvPt}Qt`D1n?uF; z^#8Fesv6!Z>v3hSKW;uhLHWP<+OJ*y5C3+{Pw`c|6BPfeCn)dF5B%EYy)hyr{nqg9 zIvr!`cyynryoa3v8nv)~|Bn9A#nHuYvAp1S_P-y$v0uNur~P)z8|4n_2|C03jK9Bf zdT)LW{9bp-Zxz2N$_zG(DW$=u*+El9v)UTk^?8X-^Oi&mfFnKFOXzR$sXESd6t<~`+2pL-9+D_?;< z3e}lCx9J4EAgjQBfP+vw?EJtx@v?X8GX?&|iRQLN4VePppn1)qQ`a(zSh)uNi2Y$U zKK~AtUBjo|$odAb9xFQ*uiQuZ-pneWxzuqm^|enAA=H=pTU)+(;O%(Xo4;Q9+&o~~ zdDv2s!Rc4n>}KI}pVOf;)c*fAKJT5DUEco?K7Z}0mtH1ZT` zkk~zAJdw29#_TO)?yxQ67Ht_PWwwl~Z0xLUV{Y5r(0rZME%cHv7Cd0f{ak7p?0nS6 z2b+UzTyhwzwqxGIF44}k@ zdq#*PdoUGgU>qnoP*Ch*yrF24(@XBMF(GU@MQ;gJNSx& zDF^!<+~eSYgL@r()xmuZ?sxEjgRePw(81Roe8a&v9em5dw;ep>;9&=kIQWi(+R_VO zM;$!opv0QwZU|psKrICv>0pk7xektUaI}MC9F(ZK;^jM7;NVFP7CJcA!IK>fI%s+u z@lB@zp5ok3br>^K6;df@au8 zu!DOU@hgltR(oNU*h-oJ=6K0IWcjQl73WhpdE(>DANY{y0ZEN|lFo+*M-lGBUU-3C}JP z{9nwogCm3A+aOx%M$#_nE$NPwC!?)?Xa~c-PzUK$muqxLJ9%`x=ZTrOSq$81-V#QYnC5jYR@~ zUvTnFyw~?OOfKoIj2;{0FzJ;7-d6LHy8v+Rktsb0#+Ie(x4N0p;4(J21-_EUt%Jrc z&8@M)UvU@>bI@XAG17yuyPj-V#v`_@8ZECT`h(GJgIjrSgLIRL{rwuHYLCWu8Yoe4 zg=WKh%_Qzvvv^7G0hrJGD20Ba!h{qdB88g* zE|lefvDo8y*$+9pP%Zli5?AC_XU~(}5@#72vrtY?ab58`+99(Pvjv9rxHFH!ZB6<3 zgr7ERh{2Q=dXpqccak)sm1B_dZ7oG3DmQR5D@FQx;{xHU&^A&H9Jv!XmLnsgYES#hM zcitTwY@g1FS6_ioAJ!KB_4Nkv5D+C|r#QAsDoGQ097)wSy~9Gf!mQkjE;&vN83W@2wig0%J*n^0dYtk{N&ExO#bN?0q4XbEj-=(9HEt3Y zEt}OgjNd(s-{z@BC0=0o>ub|e2~CHqg120bU(42etLSLK>fZl9CrryP&73eb8)MX* zFts`}l;Efp&Ri~;LbgeL>iCB1l?zYBo{aeI3wU1-u~*H23Sjh#KS%{)iQNP9hSi__ z3i8u%wJv*}Swj;A;q}YrS{#L|wI9(4B%H;~DlXRqbAt1xy$DqYo4+C~x^3!xnqR7h z&YV`-GpEG=AN*Ys$>1+)j23^MGYfydGnX^ssxp;GBar&1sCyOv28mif4`F!^GOsc{ zg+n{5rx5(*Ja;_)vbLtj*qXj=5y*m(dg5b42y|Q0K5~f%-iVjI9c&gQm`32e$06`m zi@?_75AA}th|Qij{PvSfXH75U)KB&jM-2b+(ya5k@wN2t4c_2Zo&Rp({C6;C(bdEH z1BY?H-Sa#19;ZA1eMZ&JoZbbS?_$~430|;8L@~48RH2oXKsb-KFC5ISWhA+8l(vIG zBP*9x)AOilhJWt0|JT+x(*6y$?0SRL)_L#K$JJT=%bZ&N3H7~KmSG2%zwvpc8+Y$> zW@*-w;W^p!$jp8>w{ZAgcSWu?7(+*-&A9IlF%vXrr}%OVW*_4OzdE(Q{rB_ z!t6Jl+@O6DSQOBbNK@A_AJRp6Nd|xA`YKRj4oQt9*RIslALS+`3x6qh5IDI*?$FQS zlgl-j%?#+YYt}~De@hyTM!xCw)n5pdOxbm_KQrhx$ic4>y$BZI5p2^ z%atOC93v-oBvo=gZOOeIQhDF?p$a4R9Ebtk2+~D9_sS$ov`fk<@bE+5B|;zY*<@G4g)a;d`7;+TboXf zx3?t2xJG$MNQ#{UMkT?J?J%wz7@FGF4|WHe-!0I260*FOtpbH8ABl4$`_I-&WTJ$7 zV{7ku`K3u;pE2hFUN&P-A$1yON~(ALz_{?K_r-cv;f{!$2(`(`|Fsr|S{H;Sr!OrP zr|&AAEkcgHL6Hqwit#LKEXQ~nsY)})B+!rXZ0Rpj@qIE1-qmRXl5G;(Hd_cXqc{%| zDX(>2=*$F<4i}Nqco~W@oIyHL&CMk#=txAlR@se>RSYuGOB_%}Y952K@Bw8MgHx33 zKQk6aR$OnQosFo^@B;82Gmi_*riPY)mhK5Nv#Ssn51@*lr2l4%CS_4&FALWF1_a%4Na8e!lA;6phjnkikF9L z@E4taK22b62GKwuq{|{|GKgZQXd5k0L5hfgd?9au^(pb4yqd(@ej9BFQ{xeYz^9W3 zBdsf^zjnn?p(BZnCQ9&^Psh{IN)(`vx+_{df2S>9say}>niVZVG(Vpq&%PAq|Z zsTKNWw0{J&&ZmK)L*X0krT`kbv# zkv?bXbDBPr^vTobOnt`dbA~>J`b^a4%lcfePnAB`=`&ZKuj(^fpLzOxNuO)=@$@l@ z_U6d+6@83m%UYSfpwBhP`^-gVj@#aYi)UyXtQHy!HqS7Q)_QF5h2A|EcX)U0e&!$V z^(t4GrVHK@BFN%Xy?d%6pX^!j)!Txz!sjW2Ew|ZwsTNj=cZLOUoC(SLb-kCDi8s9TyoNtU1t6Jw53zr=P@`5nVRyeW(-i6h! zB-ccQ@;eTZv68K0&={*pqQNDRh?Hn|veZT_wX7r)j6Q|Xg>|o>Cx4*%1mf6a=pp2g zIzbqW>fEG*8ywu|;3fw*JJ{vm76-c>-0Gk<-b(KwK?vwVxJ|W-{}NVsD-L3bCmj59 z2iqK6;b6Oi-*xa-2Uj}y7Y=F*sPw+?;3@~5q*cgCT7`b#+;4O6uN?eq2mi*w+a3H{ z2X*AF^nd7Jhl4+I@W&4R#KAip{HcRKbMWsCq=#shtJCLe`uwpz3-tNAKHt#ioBAx& z=TG!mq|Xid)a$cYpBwdAqR&!&zNOEf>hov%EYs&EeZH;Fa+SZqm7~$Yn;mR&(2(MU zYjO7PICzU7+cwpQv`X~(pvvQ=xnJ5tWbIZ^IaI7#pc=*8Y=Z>kmE?ez4;?nE82&TX z=_9>-rULH0S>K%DzPZDFM-BHKJ=}Lp+IP#0BAf5@Jd!SQN#|Q{tR~|E8#(QJ(s19x z;l5*s`<^`9H<5@7BZ@3l2+ugV6U?G#?fmv z<80*6rm}Ytk6K*8wtZGn+IbPn$Q3Z1XXRyFSnp0YalFdlje(89?{`8l8wD$w`7w)M z%KI8-kE_e%kO=?G3v1j3D+(5k{edwC?dA1ZC1Up3yX-Yy+w|glawwG!m-0BcSzO7Z z=tcL?Nr#+1j8mT3CTBvO8f)W5AIId=sgf>I5c}R~L~6f0C-{r4UhpT~i!N%vd}P{O z)_!>)Z9W&@o<$`*9nH&8N8Vlht9ubO85{`WkTa0acjQ10_YEY9U#v?l^iOV@4xC}| zi4L4WV9i|2e^Ct%EU`RuGar zU2rM&81;*fidq!6wP@5|8#9~SB8uoC+s*z!e&yc%7jm}3>C51KRYD^8F4x6}bF%pn zrj7{n?n-^`Ji0{htAh{KpH`(ilrNjB5$O5YmHGp`1|IXaY*s@KPb?&EX4ZN>l-|hq zPw$nGO>d*pyMI{W-aoy6CX&}XF!*r>s;Z>M1k)aZ|k8l-%RqmJ=j5VW>Qd-Z%an z6Mu-e1qN${!5TPZVoTSUM96AS>Ov)g6Ps;u=jC(;PKI~J-Ns$1LTs_7E(o<{BJST3 z|NhXql+F(AB&COQetfkx^oZbEzQ+-fn~3~}7gbJF5-3{9FN^0dYn zFN5CI5wghE%OG}jT4Bu0q9~=eW<%zdTeBf^ORd?ExtupDZp8=uv@R{5uRA$h%+Y=` zSKH5A?M8F8InC8xHCJ2LT zuk-}7jsV_DeOE@#tBVJNEq4iF^r<4q|6}#z_~Iyv+YFs3w--FQ8Bl0x442i9-B=!PF%SmgyC=L*@{bU?on!Gx*2<@+QO14@75Bi{PW z*Zi_(;<=x#j;&2Mkza`f##8WmVfbhq!~SjBbIhAa*`;WKdk3fqoEIuFlZ|;UrUwCN zMqeysNA3YT7(eQ)eb#!hvyNc=6L^}l7Fp{}&Z=!czTb9MaTz2-slB!aveGgqle<%G zdea-mK(X<<+w9jhqOa8L_9Dw02o_o1z1i6OSI3dsXvOZvfXi_io_T*tL)D0B#4C0M`T01nL=+ z`M^7Y(}52IF9M2t)DILt>Uqf8rIp?T>Wci9dzFWREzNjvZD(-tx4j3>s=4E;8@_SX z)%2Jp;}(D2d!VD_%=yzV>VMv=Ja*%kxy$c(e}4On!B}Rljg@gh-!hlDGPNaE*5hs2 zpsuSiFMhpgoHF>Jt8B|`Nh*6y8eSQDLCb-PCi;TiiRxTG(Faqyq_sN7no3)%M_N;P zYjptbbw#xGt0P*k4z}KOvQ^SvJ=Q*$TFvpvP8%1!axxJY`7@b_n{OUVy_!YX_R7}y z>8)Qs&EC(co*-Di1e`|bl>(|eAvCnd`ITA|Q z0-R}0#6H8Ch&|E9Hhe9S6J{~5E>SNv9ad|5HEX^@n6a|#+XDw1SPZTzR>%+T)j`YT z4ycThK?+F*DI^)BkYtcTl0gbdOvy62^JOZ~=Old$lY6X8C+ic`$1upp$#jZ7r|M(a z;uB;tO!L!aGAwn&AUFJS!#+3Mbi-UXymrHS|3Gt6sE0@<%c3_Nd&9st{CvaSH{5>1 z{Qs|FXlc3@W9xsaEcPK+|Noh?)XiquR{MXE8BFj?4*sEos3kD&S3CG+2d{Ck*1@kh z_(u++pvT0U=ipZzyw1Vv9h~nV8a+(-uQ{kk6I32^1s(WxXGg)LasQ@+3mrrUvT;Y> znt`b4GqB#l#SY%+Aj)OMkj>if!)F%}Nv65qa_~q%f8bz+gOv_`%E5~r zoaW%C9sG=gmpEAEV8p>o9lXrJsDsrGiutGd&T!CkFy`Rp4$gGYcQEeY6%NjFu*Sj9 zI`}yUuXJ#>gP(Ws3l2*9PvsEZCn&m4P;{T5=srQweS)I<1V#4=itZB>-6<%#Q_yVJ zfTBBP7u_i+x>Hber=aLg!Bz)F_sTB1S5S1Xpy*yf(Y=DAdj&=J3N|<>x?6V9-GZXK z1v?x}Iw-nZ?wx}00InE?_4mXNDEMzn{8(}JVdta&N%L#}08D}3_b#E}ziscLZnYm| z(mLGvpGQxmxsJ!Hg&%&N;c*UjJ_?l(g5e;wY3GRG_mb6|@ST4t4^@1xvmuFwtsb~8 z9CUK77T)TYb$S!G;thw3t-VfObFL`%&cB>P$yvo4hwIbQy2AU9UgAwJ@eacBli3j) zjaM8#6a}x^>dtbU>Kt<>v5@7UfoO2^MqAaO_MzI~xD_FZB?OB`28%A|aG^Q$BUy3& zC!paz`Qn(aFTr}xSLCKQY7WK|hf=SMKv7uYCF>=BD+?S?^re1<*RYe3g*q9jb)^Ud zi>k>oZt`m+V!9ZpEv-gHp!3Nt$%*p6o=i@Z`7GRZ08`@KpXTEYd~taDynoq7*c8u6 zSI5-!DCc1(%IW3zaeo;1VOel77(oOhqr5LQxepFb8>bsHVRG7InJbITsmu-bQ0B{q z#e*~%ufQDPXyyuIKXPpG*;)L~%HH5DpTJ{rzE_zFHX#W8LD#4AL0=_UMAP!!9Hi8; zi8xeSUxxADI)!1kkvH%ws#l^}@*I6Wq|dqfg!DO2pAYNv5q(PaDbwep`kb%N1^Qg5 z&&Tw+NS|_j!uot%pDFrG)#nrXd{Uo3(5FJ5N_{@1&&B#o)92Ird`6#3xT~^BGe?$` z$q>C@&r5h6x@m8fCY7R5e_p6bMH5QVsBajv&Wi2i!AYm>(6M!wxI?4+t2AK5r{>A$ z0nSgkuf)mb!IU;DvEIF$zn*5a$a6l-{9z6%PRDEDvDmsaYjk)T$DKE))7wQp?~|VB zYtq}YNetBCiM{uHIGvt^vEC=WCF%4??%$o>rSG5KM@)J{^Fl^FbCuczRrpt*_5*;Kqw83;k1d0arIMdywz& zVS;}zsf(QrMDoYI7JkI9e2vS(UJmk}IO*n=c27EP&@Rjy$8m~|tvFU`dYnp`BCZQ{ z$gk-MQ%g!}w^UjoO@Vm^CYIPT_)xG#`j*9T54M1V5d$!jVV_H>A1=2ljeFCAv{~me z52O#^d@dcu?n$5A416q$AK_Q>^4PP~8zrVxDOUOBto8+CkqX-CP2?(b>evmXI(n*q zLdQ^xOYmRvE6?)c7mxBPA6v5C+tR6SH*gY%N<#S)P9avb!>RR z6qHImmzt!qH=$#g=sm`n^UUW`qg<8LXUrsVBy~^)L>yTv*3)-<4yK;LCc1=9s)(9s zwwizwZXTmTjlGyM*qJ6(&h}Ra7wXuSdgHRZdS|d@vEty>fh}`|zU3O{YI^L&T*wc$ z_|8cM^Ke=iY^lUa#lKBnOnkJ-pwYjs(k?ZQ+&NKr-Ap=?DnqJ9SeWC&*O)o-NNODJ z%&~tx+8)=6t%dc^oK-A0tSg*#rM0#@YlpRNbJjJ+I(ECUBJ*slP7z5_S~1D(?w-{u zd)KST9)Sk&@1wB6GJF3(_pquIA5oAR1Ff{O+IuH<9sjCqetWY?$27ugsped~7{5kd z{3MQjRas!EgE~PDHjDr3wa;Z903Q_wAKW}ws)(4MqpNuBpZJp65$hv6O}`pXznWp1 zc9~ASkEQNxXw>SuDA~ByK~*@Tz-WVXG6*bAhDnE3&aA`Iu5rRsoL=LWsX169HAp<dLE$s8$bpmo0HWIoqVC+R^aEH z>?eXg`&a+eOY_XjIc;Aq(%WvV%iz+l+$mgoIa=4)ueRM-92a)KU?fqvUNVF472ocXN{l6 zy(ytXG{hTH8qNe}NE7Ig5~CS}^G`pqA@wchc4pe|1*D{YT(tybZ?xvPage-cTtc$) ztdDo|rsnMi?eCz_1<6?q3`PeSW$MTk{YkzxRKJ_M79VEcI z)KoVu%356|6XLMG#9?QnrWBc23CmtB9DTloPh9ef)jUl#{+U=??sMzaBctf3-m%*c z{^ZCFAU^4x^P031OBz#)iP~hKBEA3eS`OBab)L*^B?D)BEeGnqG~`k|e7;QG$EAz^ zP)QNXK>q@CX7E-Gr~U`|(3VKYwv@|ATMid?4z^6SLGlPP)&xP;5>e^ASkq@l)Sryv zbSnVeUx+7UpSoezNK*we^4hF!KWEqujCrqoZ{uEjzj$4!R*PqP3uc#HiBgq-+=e^M zDtt&!u@)c(K>?J80Vww|-;S#Bl$YQqG7=DOA%tZE5 z7=bDXbcnp7mCr@)SI&`O^w~(FD-zy@69Eu;X*M^os)Nxti9D6K2NaHXg2P)BhBp}1 z>?l1|{fivT}2Z zpf4RCv{QaVDxc=a>WAhNt4~3bwyY1Ua3gStvY*zNCCogWo{>}dwX8t%K>!u93!RlO zBkNkSZXm3AGkZJ1ZI50N5?LID>X{o&yci8Xgikc_q{d5>LNypqv|2+uR);rMZX?#S z41;Crb}LDal#L!+jET+R$11la9;8rjD5GHXRm!+jqxT1On%#CL{xhf&m2dZYZ9_hNoli&qg|eJ->{Fiy(&CFi73ic<+rS{pHr#JJb_ z=HY8o?{F?fHhejO?nnnRc%Xv=ZQ#|;M;%CLt~EXZ;@pXk8hoLNV>?r%JvwRxert^1 z9cEHf^ED9Wvi97mcwnm6P*hp^GI9z0h%m)nd>CoXo|Y)fCJ(rdb7MF^Gkp1;nlTZ|`B z3(Uzi=ig~MRrGu~1}f-+(CjFyz#Q!AM^R#^_u$0z0y{E#{tC+vpb+XPlw18qOVB)O zIbVhAwC*Li3t4MrU17Khgjhhxg>qIW$LuSHhD=#EzV%{|apUQ3+vVG5hnA*p`aJ3I z^lLk>*NWKepy;XjI5LfuLJ(ijmdhGpvC4dvs}Uz$26{-arad$ zAsU*ab$%-DhMYJ=!t~=jJmyBB#4GTCD6yEtL+f2_lpLqw0ZC(<0grM#=4lG9l8(dT z%i5#osmbSKQ96^_bI-@RfV@1BASHCS0vr~Sj4$#dRNX!e6fO(ZpKm3F6XQeY-)%cY zeJGbUP?7P^6Z%z5;k!_crY@=mD~b}q^a>XH@zrH}welC2Jxha5ik9^fyd+xINv0vk zlRTFCJWN-Vm050oY6i9p^2^CiY`KxM1<|&Ad#Hnq#O0f;a4^Mar=?X>J?VD_gc%Ji{ z6W(km?JOg2%@Ogk%~70W^sk*K?Kf_w!Cw4ETX=6NJ=EkYk9O*KIJlBo9xFwU5t)$Rtv> z!K>?XAxL9tCd5!0U0&Ump){stLJXy`#jD#ql*R>_5JPEfj%7kjjJFSk$fOY|Bgm$7 zBJuXndlQM2ZHm`DkuJ&1_S|vtKyR!uDV)H*iZs3;tVGMYqd9g~h%!{WapPb^69YZ% zSF+hYM#t)r&2}l;?FSwrbys9 z7-MHt2ac(&_u^vavAaVZGBTB^P^rJIG31^$OsqYkyqc1O!JB`^6Bw*qtps7>sC5G> z+}rY2+d8vIBn~hY%|$_LByk`TKGf0~Wmatxk1*3h{i8mwRXN@@(4G%D(EUgZ|8D;jUiH2VWEUON`t0`r6b+}g} zS@~3lpNg`s%!!6IzO-&d!UwbmnHzo(tg7VDbY`)m0@O{b7j>DJcrbjBi^kEO@Z;b` zu0%jR4MDV=5oG@GAaXT9<`ZO<4YFW(5V@Kl3kY(X4YF`}5V@Kl3qh0?K<485h_-{> zrskc|a556^X&WB`Po;^s!%wJ?hr&mgj>B&={gx6T7rzU#{ctj#7vPzPXZdi?JmXo8 zXCa;5i=8X~R9o8_#KY7UNkp+_TttR^d4b&uGSzc2l*f z5I!c1*1r}r{e z@~~@Us?|qAK~^)^f7(5x{}QnwYFVYKM)bKQk-HbbV&%f`GZ z*XIg-W@$KxW)Vya%%=tE(*pJ)t>m8;$WIIEZ+Gq@w&edm^&VA+tN)K2{IP>SaqtcY zf9l}R9Q=C+S3CG~2X%HXeEuH??{x4V9Q;QI?{aXBgAx%{y!3^He{ptggyjCO4yG?W z{L)=3Qdm#^fAJ$1`&Br$$5Gf;3+!OW2bGjj@N<`&ExRWNgO!OSrQGxG{&<`>K? zD42DdXSyV7{T*1s!3TB>KFmrst%n1cEPb-*tdcn-%f|(NwW}Z_o=Aa@bAf~`R57Z=H`pUe$1 zU{4DPm)Tu-u*|WJ*vD5%=SgCB>h4F64K{>AoI|0l(HuenC5u~SU(8LtNIK%ph#dFZ z?+7(uM|DB!%qqFtO_Dw3lUkI`m&p}1gFarCqriFLuGBx^ZivKvsSUn@taCyr`%?dm zU8i70B;46ICQ?3qG6Kb8_@9Ni@l5`6FpuW{3e1<4;8y+t*9pS3S)#e~11wmoE}J!? zhRds`)aIr$BiHk}eYU;S@lu;VL1UQ{bs7ZaHnAYLdZE1BQ6r)}~Tm`<1mdSX+;5uy65r)Ebv!+-Z&V7@x7mg&6y+aRJ7c ztZ_cZlr`#vb+0wnVmx4tb1}YQjdL&_vc}mM-?hdXjE4KiYK$=#EK(dXO*P6f>XZi& zUm2?~YUp5`hH<<#R$$bg3D>C@Cs|`T#u9710At7+OEIeP2pht9fi;$3)BwhH62__4 zSd6j48pmUtW{pJ{tE{o`{z2-%OE1y3JIz6}<_r(N{=eljG=C;i$rMM&xjBjcrWZId z?hZC>BmfDi2SgI1F?ATz^jwT;`bc7W9!9l0#zKs0bd1VgEsjxzQgdTeQPtKMRRJ|L zMpdi98bfelm*G?jTnfAZ2y0-vc*YBX;t?+ZiX}WBC=T#EU=^?yC@$_?pp?(d0ZNE# zHn0X*1Dp-?fpdT!a4s+ktOZs9=K-ey=L0K%3xHFBViA`E>wy;lmjX+H%YdOs;yWrn z#uALC_tHQZO}9q61*7TjaH}zzfdEGvqZuJ^y)g=f)ro&JNPPgTk7(LFc!OWvg1G2B zL?7&KJ{9{mt$%zo-S8-rzt)4*L`!|=7B{UMomkzJVc;M2%FkVro*?0@8Rj_ofnn?C zT9)^W{48tZ;B(9}Wp=z^?Q_;Xvc#HnR*h1uKX%qmS)~!r_GUqi06~o!LG@fg^+&-u z4$gOQDNyTYD^Nd~H4gP3EDGq5;WlH8p?+P1WVO{QR^D0FDRuS=2mig*%6_ifmOfpZ%i9azB)<`tx97g8 zN_)(g7Fr#^yo)YjxJ89&ii0qQI?xOhYdThY!`QVYIH8iK|3^R;Z8Y;yI7#y|cO=wc1(xC_aII?yOloDv!j}qtZR=H||kIIl-ctMmkui42~12h7xIJYgvn` zN>Euy10UC*uw+-_@3l9M5ZV&1v(d?zg@$)BI#(5)tBR^rhhObCvw50I9NvZ0)zpO+ z8fdxy!cmRM$&S+0(d$hY>Lj^EQzG|kpQ476-Rhg9H(aFtBT~0Vfty~X|GcUGvswM; z{wjhF>pyX1R?CndFgIo5-9tS{98#$1w^H8LW13r2YjUY<)5a1nvCC^;5o#bM)97!d zz7S*^lxr>^1w=spUaQQneQAnaq!6mzWTS&OJJ{r)I*BJ37+%lB{jlOiQ9TnB&L}uWrYw7Zi-}>R!M8 zoqX;*ynbcZfx%c{n^*Tx?}U*fyg-s;7nboC*>9o48-bsZHx`kwk4B!Qj4m>oSIBhuh|6N@&fz(y82Lu^5Iqj;Z7l3jtkc| z>!M`;NQ*76uHOcFGurme36aK^m5^Wen$2}z+0vyEpj%;14r9t zoWUce&i$|o77J{N2c8ED23vtGZ8Ih%Q)66+Cso2&U}G$>qiseonHp`}y}GRi`3Mep zRhtURY!7_buj{s*t=F&X5&C`V zkA(CdukmGD)IC0Zq6c4tXna?D$s-eDfk)bAprr69Q-SS!ijqdJqOA7n9#__Cm%1)H z3U>N+&ne>=qq}aKGNvLl*BF|M3C1wxd766k@tlV1r+`LviDQ zr*wJBj0w`~GR3EV(*X~(%@~y)H?g{Hwj9ZL-8Q8a54;eo>rz_uiCEopwnH)*>{ANy zz+D<*hIo*r_E{y zQa1+Ibv@Q=Lp<;xZYHa`19rT2#RAW@RgW-;jMa53T^~81%wY9+kKK@-61uAh5lyb^lYbeHz2 zD5k6rt+~8?>KW+)$3Ra>`Dt>HAv5VyKJz_u&7{*~jT@DYxD&=}{AWymiUp48tzdlO ziGwlvU_5ZdV`M4sKA%T%^%AWsCl`}7<^l{NS47D5WfVGjbOJ%kyg+|u#629tF&=n0 z3kAt{%uwr~_9wBgJSmgx;Y@mmbUoUPVBgN9K1kW==6l`OUgIHoXUDr&_NG_&r0Kkx z7u`7V%D4+SIKksWTit7>4>IbRdY+Fpz9_W%WzTtTEz+2bHNIqBciC7+VlMHzt?4#d z?U%h4=kYMA9t))0f*nt6j8Pvh3%VscoaEGCy&;8C}N`H7c>C@=7^ zTgcfKrzaU2kqO`!^JKmH0CVOM0b?RJ19eK$Y?%nnZo5Anc3uWq{?%`dnmvu$2zwJVHWjqOOZl#81=-16$x4H|?5 z*v)S?4X!nApX$sT*KwHP6g1A_pLj~WC>D4yp6EsN4%1&11<|Pi=ZR=hF7*zm8~~J}R0xz>3{EeKf?x$&ser%emK>>>Cr!KhQdT4CdZc z>-0RirSe;+=SPwQ<04Hj2BV3dWM6JHe`o9T0+~~}tK=FHaV+&IBd&^mn_gx)EUCQfqN za|v*YB9CgFermFRZ0i-g%)|g_oj#%Uy3=f3SY1tl`SxOMs&D9trBJriP)n!dE$RkW{l`XpP=vs^XLrk1T=o}5L)4nN=MW`CnOt|TUn zJeDO8ZTn=~^pUPFr>QF5&}*AMv?e4rwtc+qQp6UYu-$1W*oH{<+L73VbxfMTGFJrj zS|N|~9pn-1w|NFqouU*?zj8$mO9KH9KkoAuaso(6-O8j|rb9U%9hcA!SliaKH zU?75x4LF1LCkghDb_mEywzge1MEROYnv$hVIMwbDK|8tgY|-|JZGs?8z^=tKfhKnD z+CoRLt{4Q9=xxjv@!(xOJOK*n!KF==evWcuVwDr_z+n+dBx$AR?S=rNOW3byn>k^qxT{IY2Rn>q^XQ~V4x%;?mA2ZY zgAg@A4`}y6!vS#cjvqKes@f<7@9Oshk5PaLL%DMop5{|SsLT&Z08wqnPyrQC&X1ch zV}(3b(bYbkOIV@W^B}2Bm#}Rar7f#6`ywxq%fZX0=A$HU6NR4NpAr5EXnHv*JF%-qIRhCD+x$nzxlLl9!Mlz^GWh|Ba9W2U>TYKf0laEgNqV~>9wjs1;LxfL!`YM&+l}`T zM!wfJ4GC1$jjnC%YPgz7$vqIKvqP;6PZdIapFxFmFa}CaJljfOnaxu?v6ou(DLdR5 z1~gv1X*6b~O~M}Y$7@9OrG3Y2(t6ZluE*xJeKwsT*LTzdgLgg5KhOQrnKt@?7oklw zwNiC@+FUc}$eC)8?J-lF*uu)woky0hf ziME-$?kcU9L0%7mHmfq!h#N6v$6VYM4}jWj@c>L_e2A!Z;~g*X1o8Y(tFH!G7NS*| zDv3DHpaMMj;D)qRcQdz;Bv>`2E>fvYhX%2VmDsNW*+G!5q`~6>6-gaoEdf9g_#tb< za$!>5sdRj;J6ge%i{EwuBE%XYo8qV@lEXZ;k(nPgnNlo|iXC>%x>M;Bnx&6%ag1>x zl-EjwfiSw#t(#`l88=qCUc9LD5ra+@!H$%<(3|LYa8-tWdnJ?4GNymi@2E;Wo1*Oe zG!7a0as&_k0D~Dl++NOqc;d20~sdxWtDgL+gx9$46I%na*9w zA9i?G&ma)!`@ihH4}8?sneU$=0|X3C&;S8ZM~yZkR1?KY+O%fO1iqs)iYrxGP}xE$ zsI3$xfE6{G&}KT!Ubbs@-Cg&V?z+A1*4=t{U8>eQsbNEUT`KJE#93(^DSGjNj`cTFxR4SjyM6 zczD;I;g0_BC3u%lEgnGKpe7(HC7SksMI{$=rTt3O-TnZrQI@epdnmDyqJgeG!yI9R zHmOc-icV1~H(~#2Mc&n34Z&0uU+aobrT7>Jh~B}7j-_RDXg;VR&)8B;T!Y)~`Qww^ zKx*-Xu02DF1v^YZKLr`cK?QfJU@OTU1wl~#f}vCQSx`Xjpsb)Bp;GeQCm$E;hBAZN zozjqHz};ghjMDkcF}YgFRij*ME=W)2*1yTzuh}ze#c3+j?%Ae%@ZB0{t~-B2_nFPN zw{VR$MzO&57o{hUgLU@RB8#iQ+4@i`tRtF6zEGg_7cR^P#klC;TM}IME02l|V0k@) z)fjGxbC~|*GqT$P8Fn6HeA3ov2#Yxo60wuEL_?5VP@PzLF?aJ>Fi7e>9~W}NIFm6) zrIZ@GRNRftwLcbuLIuMYuJ&651h;nhe1O?VcSno0oUdbRo&(5W%>=pRIdGsQjyOu} z75qAVr&1L5m$V%Rlm{s&7t9Bn3x!9UYwF-!vy_YPh1guT=W{WwMOXwLxjOT??#t(z z0Rv-nVW~qM3g~#cyz-)S<;hXIdqg4<3k!E_tgxlbef1r#xi1zrB;U*7YT?fwA8p;ZmRREK9-*aksa5=duQTux-j5wBE$WGdFpC3EGHzMh@kK9L=EH{QqTRO6595oTvS4Cl0{T9> z=uunamiVGa1!+`doJKp(<=h@!^r2y9Eb6rEzrEg~ZMHJ_k{Oi|>tjaL_b4=+Taoe5 z$M7~(OpMqXZ_!@kU-XKq@ON3eLl zm&DJiuq!*C<Zvcq4h17jyD~0TB!njDb_M>7f3BWM=`Ar{jyBv;ze% z5Pq)CsYcNNdA8jlH1Jr)4QC4Ed!p%yXXe+Ys!LA>0ULEn;hLWOE5Rl{EI$Ut!nzC7 zm38hs!Mt9fRfr|7i3aP?^ZUp(KJI&@u#gS^Dh4LGy}*fs*YkqdMi-^a#|wqr&cWvL zduhzy6gc-6r_0ZRf91Ae+;WJbzIWU@jEA^@n@yb?11dg~Eqi7muVX@W71r!i;^YZ0 z-^FHQ-FLb-lw`MM;60sZ3U7)#zFMij8r{nLI=)APy~a!5YH_h1AXFj84`+w)8}vpV zUx-e(e0{gqqoL=ci=MGF$n`s#=c>Wlz&;raKrRjW#b8KPvRFntD`InW(S9v1<{ehM zTcU;0ga)HesFK?C3AOJwq(*hoOJ)Nk5jKVawkOvf%A@VB}Jk!qL!&>fUB= z(T8T3(i7|KL6@Fe@%G#z9@|1PqO)^DrGH%+;Dfg!5*=^Y>t+L?7b;v z1tNz>OUtr-FTV#sFtBU2SqxRmhz!jy`_?>SFwDBUlG3bW_;%?LBY!IF7{KY@7PXIPu~61=`rX^E&IRsSc}~9$$|9%# z*&{gpyx*GHU}LQ@#Kq+sbA#gN4XCg);#x$ zk1V?$H3#z=5a%iihtIL=?{WiXkOVbb8rCvE+*7}MJIFr}@IrCB?kAc9o{=q+u2rljvp5bp^)55VId1n?dCB0!{j62Rx)1n>=d z6d>$g1@Ikx#IpdA?p*+%dlZzz<@`Bf%e^s@-BR)x6jYTFZqXph_aV{u=>8z z5~zpAGXG9mvdh>AU9}~!s*NDBmS0E1N}b zpzn*3GIvcc6Hq558&@QbYfuLl(N9O2mLad6YBRp-Pm!jUlxGE7+C1~_(Y7&55Wrc zNiDxao5ue;Rr+9c70V~HoJ^YV5LbKs)XIEcBhAX@pQO2wq(z57KXZ!uG@^s9Ra`=% zyX_{I)rC;_P8Sv%Bm7W~Uzu$Q#?q*2u{N_6R=zdd)}0Hq{aDGyynAN$MY3-3Vb|`` zNbu6R4Vj122YvA-kMe64s-X4@FNRJ2$Xq94#nSfbntOOzXpJK3I; zhD5=>WMEw->M@B@-&vLZ#$W z)|j6)7HsLM>)ij`_)d_Ukk(-2Hn9Fd_)SZlk1ZRk#pnDz95PMZ8r#FnkY?eFb<n0Axz8S)v)?YAk=%*RaemyYH>c?-jNi@!zc1l})Iu))@^R z$}A{^GAWad1fBw3%fw9rBm7ooNwKQ$6}@wc-b;$!X~vtcq^wxUmy3B$E_%lmz2!x( zujt*c*|y!t7QH)*-lvM*tp%?-u-a@c7zSn*`ZIT%ROV<;=9fk9UlzToqPMl^Z7F)M zE_$yjdS@2B(W3VoMeo@~Z%xsAe9?PM(L228J=~<88+hLo7062cGFnRN+h z{h4zVk$bEK5w7{x@|s(;T3MOlG|8f6j?79cGo|NIdchh8%7^E>@=^k>D6a5BnP020 zT^iez$+TL;cP{Ba29}3sX=O7^;-SpKVijrfxgxjXHJcEjg6A_4@_6an%$~LTF;i!v zctdmHlT=NyW^KoBs18b5%wR_1qhSVhsQVD+8*#uOYjHvu(9{g5qB|A0c@jvB2x)I_ zRs=z!WD#i=QGc*ekx3Sv5sQ_%2MysDN%j#dv_|aQ)ZJFqG(%yr`sY*lS(_Fkn~)r_ z3nM&3yoet5z z|7rYxiT@h@L;Ro4|0Mn=^Z#Z3&*1;eK>KviYs!u-+=dkZDg9TCE~yVG+O>=zBgun|j)_R=o%Vpx5b^?Zt?YT}jF7df z#S!d)>4=cc+=1g$b@YFN@i8}xz^cClKFG&YxhmwhU}oXh&B}Kt#RYyrdu?@~v2*uu zksFPSs}}bqqm1+kaqo*@4;R2WD3sm|b~bcGZE|6A#P|9+-`> zz#G^qFUx9ahE_B0l|qj;Nqx%RPMV;$n&$U3C5Vs;6bb{R4Inj&-$&v5M7O-NmTdOZ zC_*YLIL$JTeM8-l%!3R^y_I=LVPiuo^Y3{t=i0tZ3vK6*zkQkS5#nn_X5+t=N#=7J zdh0@yldZcyv)Ck_C6q+*_6(L1K-9a;3|m}Qz`qLpAllc-06Q<7mvq{DKR^}H)?`=hIvgp03=v`X$YJJ$j%Un_P z{z=h0z36Qydh3eb(~92Xir$K%cWBXjsG-o>yG8FCMelRQtCK?~;NP-l+?aLd6Z$j% zQS|^_^hT_Wn$Sgx~7`6!krT{&t|9N^=y|5`o=w4_P0bNwIl9DD&oBhDlCdWn%O6jR~ah8$Vur7;qETXidd+6pF~ADg$Vl4l-71u%Fhftzi5+ce@|J>=&F9lNd;hH5gO5T7?1 znL9BPYk+wJtN&?}fPH)~1sws&SFVBh+MpR|jbe^VH8Z=E{KQn-x0%6`V zGC@Kiq{Abr26%?*WJD+_clOZ)ScY3oSuP6hvu=_}d zt<3`!H*ax`N%206r2FZXJ5+neU(fF;r3lb2l1w%H?1wJv~C50yovYX>c$zMS{ zAq7Bz_azF%ncqqhL43?f7hpbX6ZSZcmRz}_-)Xr%M$732#6rIVMSIF)Fs%$v&X zMDB_Do|6313ds`PLlp<&KAwM&sG?S;;5O4&sxi4FRhM8s>{IEQ7}LlLKC28N9c1U= zOj84bYYD`ufDDoLD621toX-vt)>94gdQ>bO=}#Vw%ORlASV%Z=W0Yl22iMYh zzKZZN4-3?SJRstW(huW#S<`(FztSG@@%5hfViJYKSgFW)s?^Rtq@ir2ZqP%YCZiGc zdx9S!KBZ21G{bY3Ce@3j?l3sY%4PA44b>P!=|FCKE!*l9{geB4XjYgP>)un%D2$Es%$kDS(Hdc;J|%7|J?1cZ90l8ruW)x1m<#2rLH zUYo6N)=&pHf%018pjO0&tt=X&gik3ws7I*tAQp*YBDO2RGr(b}5K!caicCz)ZDtXZ z!yr4JRu1uvAN8$IQo;0X25E?evIVsF>?>(2!_7WZ>|&CQkXM;T5b<&WdLO^q!BK?# zXi6(-GtGh6HemNkQ#y+=LXPoTkgx%F6Itf-{Ddf`6RHCQz(X37f{3RXq1L_X82*^= zRY~};$7=Z&7LDmulnajz4=ZZN{zm}Rei@GR$4vDV0;bPG81wP?%M{rN9Zy^(wEi@C zWVsfxARmk5Db0r#on1eA>qWOi2!+L@ajHR2h^MirZgYc?es`TVpa@Y%*A>4csvIG+2)vIdB5UpoV>OPHmnTe9aQF3-2nt(*Y*t`r| ziBF@G(c>4w6;JAcu%ti4n|^M>#px;a97h53II4UJ77%h0d<5_rn=c&VG3oGlUQkaG zI@W8iZFo(rw7fc=e3OSaK^O_WMup?Gl2MQHJ#Jo3)ulPrtuem-g^^&1QYd3TuX__N zOqYi)mg1vC^dXl_0a?=7xvNl3Fsy%Uvp?P z0u#sDOaQZ$XH0BoPuzj+>~GsV{mr4r5N;Z1HhX=vYuALZZ`Y}+B8+Wp7KVtKNRr2} z8UEyqvZPm@oW*O^+{)zQsz`DLk4KHcNOEyCB+^cD4!Geg7@$v*IwP3{%F-+SdUMQ` z^3k*~gBvNmsZ8&VIUk=L<^#YV*jNgN?rz zo8svyo#`oyeS3^zOU?&_|Kkb1m-IB7D@DC$KT0~&hSE7-S7yAA*R=`045cEzK49`~ z#_QWA=zN_fmI(aBkmFh7H*HMpO#5f~_J)(2c<*44cQ+^B39}HJeTSQqZ!;RiJsv^r z^W*Wpr_5+Yd>fK4clA@vW66E$n{P*QyL#<=E}Yz~0crN_H5yT24M%T9ga^>BOjX-X zX#?HGBWOe#QNwryHJ+n+`wVyO9vg0Zb2z5>-z?z;)VzfbHLspLijLydlQ+>ZC=Q(n z>xH+)$cUZ1f6AXJHE*7HP_a4+wbEn5ZEv4P15$G8%O#qDG!=@s2tugy;5$VWBQn`J>%Nm8lLt)-7$Tvo+|;% zC_4dF6*5U}uLV$(+UXy$jkirHO!@V!iCeB$&ECp_&J|ixdelVI3UGMT!~;CMX=aXz zbW9(p@$`xN%piXMX0Evr2pxUxLDYGWFi1o)=+ICev22bK@e^u>HvJqY)NZ~ZK``Qi zbIC-o*#-5KL2#N2wkoKHEr=<3ao0Nc9_MyCcbju}I(M&gOMpq`DsVAU{bw>s3314y zeEJRpO!hki?`z?QTFDR%b#t@uqj{<346k)xrt4N&?RmvZ-)n%#_)gB{3YJ@x4_hy1 zJP@RKEVM847v#pqOOTg|#MjJo5IMYbhaNjLR_YILq3oX}Wgd?Nw-aUowe7r_k532M z5+pS94ZpSdqS+yJon~X~E@g4v%(Lppv}CGkR8^MdSFKA6_5Hrp0JA$*|M4<^-R|u5 zdg~ITNaRi~bnYz~6MEwo`;s|>R9)$+PtwcjhkEym*MGdUK_!*OQ|+MpEwmp`sFS%l zX5I_xYiFC(CK%kj7}kHZ5D&ioB9}a<{Tt_N9G16BqZU_iXMNsz7}QHdE2_nR!ZrgwrZ@ z*Vx5}lsivn?#aw0EQVJrr3$8!Zs5Do1J?0M7o1BqEZiGiaIp>EHp%Y0)zy2c zO~CCmtv+ZLQKtI@t*%@rRW4}=KlHGv+HS{B;2ZYT6l;I$mCPEIYTJYTq<+~?>X!Ya zhZ-q6XczJ!(0EAPKyimg-CSJOS}r@)63b|4tJ%w$@BfY|b_j+f0op3JX4D35Z-J0C zEJg567i?u@2(EI$#WuLW1+^MAnRhu?3{5k+&VAOo<~5moeJ)(4K_XnGL1L0?aP=qu zTJqH5$9oUnlKJ1Aw*ddW_^-QO%T@Cw;S!IGefyNc1 z|DN@)lwXgr)a%K=$og-P|7z>MTK;RS|3~s)YyGq2|GxEKFaHm$f1&)>S^o<8wKAwb zDE|+wf4Tfitbc+0ORayI{0ZxyDSwOgx5&TD`sd5P-1-ypudx0``LDPBrSkvC`oAOp z4c5Oz{*~50L;f4B|A+G5Wc_pHzuEd1|0g#cc0XX2g!gaq+0Q!$R$cxv$Uk+r{q&)f z4_+JU@r4$VBLf#%deoI^R#-hB#f`7p9cZ^Jk41RS0^+^&ZPl$*9812M8B3NxjlVij zV-5^&xL9TXzTueNLx7o^*u0fRmynZ zs7*g*VXl4n|DnGBhx&LQmz}ud^fOUE$Lr?={hX*}`(Jvy9)bE}d<0!Ytq2}3*A;E+ z8Q$2s=~Sr0qgpqFu=$|7mP!1YrF;Xc#vf1-dq4HsdV<3LKe6~f2ukqMtE*eV44Kgn z=5ma;;xz6jIrn7ep5k0tzgGIG&OOb!Uvh4Zb3@KO-MN#TJK4EkcJ3L@)z_R=Z>@9d zoO_mY&vvdXYAfG4&i#sW&voup=hi#-zt$&ztZ`=YvoO{^0hMfSM{VuEvyx=Zz65;m$2{?g;0Obna2kEqCrH=N|1`iGiy93g`a+^~tYRHx9Qr_g3e&Ik(-ptDO5| z=YmoU{CW*kKa$Snq&MM|b0ub^bVyL+qR?ktE4Aaqizb_YUX&opU8qpz^;kE;lcoJU8p-2l}~5Kjzy=H_CUdepc$o zd;{nP`L5Q_kMv`{t7Ja6wNOE0I5a5Va{VmO&ocd(Z!)#WH(x&q{WR)lseZnrpC$S+ z7F2&I-(3AH7MyDxyuWnr-#hmooO`Ep|IxX>a_;|d?p@CPC+GgNbN@?x6NmBJ?E1UK zxm%t4h;#qo+((`Jm~$U@?l$LcckT}7KH=Oao%@t?pLXsu&eh|I=J7e_KJVNYocp44 zUvlot&V9wX|E1}ny3SAV%eveBGa!W+Q2!;){S)V2>fD*m{ibs-bMEENo#otbIrrPn zo$cH?a#0i*<$Yaq-?Ex_ExiN`eAg=QkIZ|4r1v_?;6(8q`zNIBQIv1um9jhIogM4R zK&Wk84ChiYdlkb=_Uer*Tuh&0c#>bevDL(scY9}-W1(X8#twz0w(xafiFo7wAyng?tLF zWF8|BepvCyXc>FQ=Lag<8%p3$;91?!YCQ;_^_!SG8sJ0b0wFjJA;W2al^O`aNwvjz z1CTmTBcVt`tGqMt`Wxh%dqJwbp;i6lk$SNLrqcwF+&Pem07;R9QKNcmOueJ?;Ec|L zGdd5>Xa`P>X;K*l03li(EdcP&#p`d-OZnmpk`4d@D-;j_5a1PXb`1-9RgTWpb*|c- zh!*78pw;b+osBE#>&mfXwR+mt{>$SS*dS_!uNlBKMLIy*1JHb8z2(F8BGJm{qmime<5Q`OgL zEJAHY>LHf8Zf>*=Tb{i4q^8sM<&8W+pPt01&W`3}FX9DT9r;eVq|&Sr5wtvDEa1 z(K^2P+!=Hhd5{jd*duG1shJ00YTkm`y?7mO+v};Me29TN0$@cEl1#L28XnjYbrmNhW!F+CHn z`zYGAK~0ysrX>TCmtNUP-BsAS#HdEYF;`9|>UB;uQ z5hKZSFq&F6D~YJg4hlN~5F{k(9`R@k<5ieYf(_`~!5cboAb^1tx_4uBm`3TWHABnu zU*I?7py@|rXCvGa-M&5oCq|u z7E?|0bK%{OrANLkw8FGXfNwieHkZwc)jjWRM<5U3N-u@IIIj+KxtG;yXYcg#GhLWy zx6YX0TJ*aFmBtC)&B8EnFe}U)LQBgkVufLlFr{7Z4vy^z>a3^1Mo%At)+8HS zT3x^-bHa#O*=0o*M4V)FSZj+0X8Y_z_Y+Ce6jjqzcCTp$WZ!8})23a| zt2;znR3998ZViyf;LO@=P{c=GYMSRAHLa2V8T`)*v=0G?NYO2QSx}FTcae`rM`M5! zOq)3uO+RUkGEz~k04cNR51t7#ha)2w4P}=Mjn+T0w5hr7h3Ng;I8Qmw zN@Deog#&-J;llLfs^+>aDCp*BvkP_o_-HIazSUgU8L#h+wrvB3u3UWddY<_uooL}y zR>84ZaD-dC(DZ!@L4Yi6@eURZeME+yXg!L^&k(&mjzm%jN#(O+$tGmiW+R(?VO2bI zNO0{~`X}o0!$r9IfonaIO%+CyxkqqeKYv?%^h>}+B?h#Y1X?V4Lm6TLfQP8)HMZ6y z_7eKM&|a_Yxe}o3rt(;*i#pcBR<;9*SX}67YQZcs@tv{yXQ+&o^h_+&6ANJh#g7=Q z3_w{~UkSX0)-M?vtKYgb4ygAPLG1+67;klqV6?5%j3B!>IvNqKovKE%jb)rF_3wFZ zBqgj`;f*%tz^Y)ZHKN!NG}IMy!pmL8`@U94Q&0*^o|n3i6OI6#p$B#Qy-Bg^8R`~` z!)tq*5@bl-P%A?_p?#XjNr*+$t1hqpK(uW;87^g+-7po&-B`%TP0o!WkC9wDm7c0k zJxecS#s>DOG4NMCbgF((rw&F(ZYNlY5hM_WOJ|FiqW8%kKpIsHz2b*@xNWKkRE#>`c#q<&2xWK zBi8T^Bb9wZ@-On+lE{ryJi zqt#FgVzL&;WPW{(y7QpJQRY$ncA}Kgvl$JB+LyW2Wh^tXM!WDjm(gMoYD5lM)$&rN zMlu(Z9;IoaANmj)z zzkYJwq}4`1k|oq*me4cgwJLT(xS@WtgskwST}6k?DuTl+)-B5{hV?ey#5)C5Xnpke zc{C%+88plC)V7%=aavZITVmh*f=i5jlQ9*spR{MT(`_hbp&)NGBI8K%+KQ8pAs#Ni zvl$)W*$X`0B&%~KOe)~PcT&?66-;o0@Km(z2@vLL^La;v6yu@i!B??KR&JY2J@NYI zgtR1&EHoKUJ_quOAy&GUTH>MiB)$$Z2YE}9da6KbjE6p2GBjTQ{?f~ZVIL}@hXcZ} z2uXrrX}MeYfiHf5VV{Y~q&YPiTqg8GM-vEVw)I$KwlZIJ62a`f-iR)5#CzU|?Sc01 zaaih7wh-W?bLMeBF?v#mGDk1c;#?_7X`_7AztApLZ8l+;yKuE9-!kz?)0!RmbmnP= zQ>9)CStpRrLa*&*kWE+J4sQ|$9d!^1zw034V`5N@p=W^Fo5WE>e>%PT?FM_MF06_s zx5PutihcS8uRq~NQ@|r9k8zWY)E|cD`x+c3ba=QT8!E(c<*smx&spVd!zOc z^q8!+=b39ehWj*zelvoo^~6HkkZXyB7=xd6$LqI6&1u6SLfm6svYaClgUZ^hEQ*AV z$OAe~9$$LV;Jx1Ej+3trI!+EnIkNe31NGQ5l352B&3YG5n=_JDsMns4P)tTqYhR{j zfmU^%D`E5k4rGp1q&YM>x@$)KHV52;&|B5(}S9~+b&lR=VNjTt3ml4i`%jtJ^Wk`mk z=2!A%qi6GfDd(&CoU<)JU#fUA@6rY8ne4#fw<#g0g0P`a*G+v1Wld zmil%@vy3}sgI;P`l@SvpNI`wmpe%9(|jec6a9)C5P z9^ozN!$wjo@2H3-yU{N=fcexHTbtn@BwH8r%~^Qa+!w zrkJ$GCKXEEUQD{(COw%iwYHeF)+W7}Pr9p^beEFQH^?X5Q%t%?NdoOYuF?INo^IAL zu=2(GOuR4gVkb5~d@;U|(biuUSamG-0AndpH_}c{&aO;QR^&G8b2MGM@T24emUZ4| zWm#EcjlI2{QDY92#?YqXvj=8gP=wfP6j!2MuLomC&8cpUB~P^aYA#|;@`YIPsxrmV z4V!5#F)%-5s@lceLrj&|wKM3A0F>!LgpKf`BL$O&*ysq0Hw={bkFZiAf!{9kM*{0^!pzp&W5Zo<4QonIJU8&$i-uz;WklD`F~(>YxfYBJ z{C3)?z`DxZW4JeBJz1wsAZ}vU&dM-?;NcOS#2il( z7oEr#H*OAhy?RV@dg3h+6x}|MCoDU7CFm9)d_Erav`&mnA`>^QyldyNP3g(!${b(` z$_2Y+Os?dnQ1<-veki#E84QAN+>4_l& z<&flaUA+@DkqAPcO0J_rT}ClvWjOSla(hfvAAn5?`(6s0mWD)76Ud?#0LSEA6hzdC z@<=FiQM&T0aHpKY3bZThh+MCQlkb>3-w5qO$e#jl3z+86A;u!u?AsZ>rIUyu&B-35 z`_YnES^WWO5+*A2O8AzY_!=UiP04d3$>nGiTwIpCxI8(%B6(G1BzbvN^74tv>tJEs zTs;7183z7fetGzt7Uk54;0k*4hN16?rtD2)J!F8j^$HYsqp4a<5=YgQ#29M?c(^5;clsTnM)}; zS4K07%C<5+Sx zA#^OIb(X?#Cr!SDHQFnst_tEqA9KDLjTiaOk_?Wng@8XV25%$qo0rkUqDJP+7|GSp zf1u&%h+^AR-{g2sp(+5#Vk%gCo_kQ|A{YGted!A+@5lfzd^Mx%cAvFc-w!}A9AOzg z4Yp@c3BRfyNEc4bV>AV2BGOca*0tdzuVFm-+>@7@f-SP&K=A8>3RqjQdYSr+#OpN2 zbHGYYJ!$PhVw>+Y+$F@%#P(^0-K8aj<&h~B&iXE@X`BUR&IGUY<{*3$R6#xl9_DVW z_(4Ck)9eY+DJzpwor6=Gn}AXR21EVhWSCPbaWkblC#%KD9%bvdRdSN{O(0uo)Q6qS zPG#jQiF_o%?tIkPFqoYKjaGh@Pbr{aK@Nb6F)GP^i%P;FtYv;Q9{syXEb>V%k8JcN z`)rO7xTt>u-v_LNZa4No0VsLEBGxb;H3J5ar$6r7>0nTs#>ypUT>+Bri_-pc^=%Cc zg?I`eLNqMVj>?15hg!ynXxcx*ber!H$*k#w7>nR8nP$HSWw@iwD1~}1L|Y-&;8I0PdLP^X@yglEM!}x&Zgn$1xsRj-C}ia8e|J5Id7t%$4gL|yS_Y<3|Az- zRhhh`D*3~S$?L(7->T-&tQm+SKMg=HWJ_8;+5wotTM3N5Q!5)iZIq zW6zRvTeXCU(vzDt-#*lIc1wS)DINQ6i8)OUhceUBmETlqB>9w7+irP{0EgT&)6z3< zESa9Z^SgvmzI&5`oH}n1Vl}4wYfiUsIrW$b7!dIUsu(mso1J&KO$DLc}YOWe8+hx&|3 zBzC`{IfnSwOFa=4L&_SHxh01~PkUHoD`7!sYIQ@9DSxN$P z?&{41Wx#1IZjA6fN(MR{bx4yS)i#v#crp@73gMA+LC$-dZS--hdZmfQ=`lMSbDF}c^f++D)rnxOgQ zYD!P}5-59SXfOCDJ;mqSJe15nTQ$g+v?=5dY62w=9>!`XadXIjXhM4O5Z^YN-y!Qw z4PSmDGqY!$?Fz z>$U33$NKh}rhUn6rrG|`b_!Jiv!vC(9OAa^fKNEnahC}PMr_tf9XiAq?Sj>&?Y#w2g zXEHyN3Xj4}dVJ+9FX|+wVI#Q8IcbW|=KBy)$GHJD$%*dp>t6S-z7-w2-vX7>1 z5t;O$*$W?r|7+S&i zh?^z`z;xN1p4Lw2G|Y_WXAzBMm)vcKxje`S1~r151!f;Hu%PP+jB`Cn^)AU2)oA zW&?9fK;mzt#7y8>8)!5E3AT|k)dY6hK!XYB_}2rCAd!x$A#UJOr9sDzQ-0?<@!W2} zqRgH4K_Pv&fz{|e?#isP$%q0$X79=@Q?PBr48!Vz=lfb_zG80KC<Y zAzewlq|bTaI|2q%IK10rzWy$BQ)y{aO*lC$)#NX2njB6}^QW51N}Fo&m8F`>OPi+P zD^E35lr~MpSCML}ENyDQSD9+6DuoT0oK}@;npoNt!8Z{WXlav&FPLhoE^Uh8t4=l5 zOv3)bw3<}YWP1LisY{Bg9wep0uJFks2ba@Owm7Dspji>U#}D2Jy#;tWxB{aO%3ow7&)~ z-;J6MV*xshuXIK@6Y$Oqr>+Wyku2hS!~|zGr!KE1TdbLbS9Io_$RwVO4F;byrJ7^8 z66`BrApxs|sRdz#lWbyS66SWqdI}2Xf>`HQS5HDrf=X&8@%YApH97PgIcp>AM2-cK zBvbVcoZOfBw|^H=e;GvmO7ZXeGru$u%T`3wm#+|Ezn0ySS*^&n9^T)&4YjZ1+8O2h zAxWGciV+>5gEqqEYBesVAaMzH;H0M3;PKfYWLGR%8BRCcU`@7!`8)t820%E=Eg|mY_UIzhS@xn4 zhQ=~>Kc0nAt>kBEDu3%Z1g#g&<8h$(QvPZd+H{Nc!=d+MI#)qU#955aJz;#hVSjqc zO$M8DuEnUck2<-ND$5YCco)YYVhk|m(f8R&n(`hFbqm32H^r(ONN6Pzg06zYPYX1) zvbvH*y27CS85Bxx4)sy@OuQ6qWGEL?vWgYC&>V8JY=XI{h}fk{A$yaiDX+Hd&yXlOtALO|niDlRV!h&mg%|knwpYl11^GLM0H8QHly`bXOJu{GIk z*TZ^dzLu<%xX+fj%~byybUCEFf1K}-qYkVoq{C)uG?G;a;cmIfz6gYei1=Z?y{g`4 zD;JTw+K?Hbj0ef72YD&>Bia|*4)C7>yBYond0FCM<*|(ISWAT^AESwgPn1go>6Rcf z*{2c&{7%!tgZ7@AJgDK}s+=F4AxS$rheIN8JQZQFnIi5goO~tn`-F4e{z}(5l zN79x1q0ijK9P+M*!zxDE^Z7;%hIaCF+g?$nVc)Z&k0+Dt=(Ypot|2JHRy%RAYqge9 zlJryELFU~-u7QAB2e z!hP6fF|DK>GL#C3_UHoXuk*d<7Rh67L2cGWvHYuUdVP6IS4@mXzH*qIbp||tvx1Gl z$dyqk151u^49U1W-f~^tS0JKw3#u#wwUwDHAnv^;Pk%^x%1tdS^~w{e#<%wEH$ZlsX29={ z*R--6K6PWW6%eb*n=U^gU3qrCB2mX$etuF0R9ZriaPYYN7JrgYFmjU#u2s) zx!5fxxRY2)2vHIPKKDB|N9oY#aW<^+R^<{$wccD+(J~ZS=-`IB&JB7HD)2iI zHzCJx0TDRS4ro9`owfdkD9Wc)BG>*&6a>d5jx#qB5sOl^JK~2t^M;^d&j;D=MBGb*^#7 zOvTB=Py2y(sk((PyL1Lj>+HnA?_*HfxiPtaqWG#865qwlqnA1kZd=tqX^vWsiKcF< zP!~pI{$gr>u9$Ef2|*1fZiAdQsR93bhIUy);~a{OIx%r9(?smhT}L(Z2$ z&I4hjq(o-4Z(a~MW9U(rVQFcc7GEn9Fv?Gm=*>(0`vQ|YlhbSV1x7O~R(`-d=c!|D zWT#X2Y44sgy{fIRqGc%zY>C~h+Ml>4mYNrguX-c#9cgf^KNka1N#4@l&iq#1`c2Eu zjphiyn;WxVBf6?H@s+4Vjj%gvZh1kvjNqi{=dj$E=g5QzO)z^b7C+=BBOFn0=KE0Px8H;&%BMc)zC#5 zG8qb{b&lV`bvL}g42-dW&D<2*3{Z?MsWEg zW}%>u60d)USx7hgOd9jGY`2%&oGq{G?EMwB*taTxsmNBTmldj7b&bsaTQv6vfe$f- zRAw$Cy*V12Bg-WJYKk&25UXXPXIRwCw&c?42JeT~hO*%D0GjW5W~}h)x3=ugBEPn^ zWrUZ$fK6>8w=W$=zLLbZ)y|L3>Bs!Z{=`(h8d5nx)giL>n%B0==LMR!#_D%^f%7+O ze+`c!I`1V*W65)>f2%3As^k zr`(Bo1icCYu52Oa8|=adM3ZIiv^f{4p5t9Te^9Ea=f@^B(5^#+kD>;Cd>hf2GHVK~ z^Eb5ho!k=BBP1=nOgF0v8|8}LFQ1JersgVDf}mPO_#sweN$zf@|Rc>l(s`9@6HY+9Y zlf(RaxlRPCtHP}f)#TVkOf<)qzkyMHC1WzcYw^C;R2FFSfyunoQLPQWD*~%@iSlw6 zYR~~WNQ1FtOGR&zw(`J|qok)%2cSuF-TA2+)nX;Oq;9vD9{QC_%(Q36w;lX$OT)nP z2~e;PmpGM?D{n|&vaRjV$R*kIB~P^-ny`F_H?MpRLIAl}f3Fs@PjID-rf>G;E(Jw( z2HHit{XfWVb`N;?{M!5TSo@nZgthM{(O~V12o|vR z=Hvx6J?B@Ikr)f@^n9?E%=BijgKt!rk*m$97G~szz$)oW4ww;>!Oh6hrFJ%M46Hgq z!)|6{H?z^GMv~vJuydPVETgGO5{${WbC=fimJ;G~d|unD{dJw$QAQ!IzAMn)hl<3& z5eT=g^!v_ec_rK(!OcDc{$agT^mdvSjUV|97IROlQ+WmzOF2efLxAM#)|tX1;HZ{^g9uK4NatOFiMg|AI8cpMj5i8`z#D3 zgFe96Yumxef%fH0McYA^r}XZCEk^#7}Wt8pwjb**>(D``soh&{0 zAUjljf!F$WX8W1_G69MG)xk(e1Y&_c)OpP7LI6T6F~jK|aty3r+T`W1)!ywLcv%NV zA%E-2(YfIAv52ejRUQA>D5I2HA8qUNwY-@fp^s;_9rCpdH&Zda!qI~hE`?^voCP>K z=ILYIj=C+gdw*DKPIy&ctY1mJmy_b?QM-di6wb?ULOH-*ZbSwq`OzRk;9*g4xiQDB z)pS{!piEJq`f5)2QsVWoI!1KVaFZtnYf!6iod%rs$=F}1HIjJ z0s^zKq;7xjbL{UU#!L9PiXN4$+P~r&qaqR((FZZS3xf@2z<}!#iR70Ra5rUwD&vN* zlbmh{hi){bBI2%^k*ikKorz&O!m~WT3FO79N8W;-M*nw9dP)}j z>9tFHw-9m5!B+AGR;|bHNOy2Lfy}Qbi!AN|;ZlqFzDd5KzchK51>He%%$NIfy*Co~ zkO%@b*_zLJEwSB=ei}?HY58vNtbv(k=QB+&WcoqNwb%BZP4NA2;HGJ{3%tNh;LOXn zfGA_t6BZcNZH>83juKC2js9--PU^BBTP!Krd$R}TUcDHMj@gPzeU`C8wT2&}){@@K zbr9-;V(1Oe5YSij{!bu@rb4^v*3Ehpppw5T;X`mz) zI@nD{--Q+N)K{SN0_~>|?UrT(i%Yv>k6$Zw5!=3ve`)Z*x$V5q#!Qh79xieGjs!ulXc&N|-!>2h22Y%k8wrGXNkL5zX znft%&#_u1gLYWf7x=SWA?rJa4{varTv>b}6~TcLEiTE6)sSOVt^ zucr<@q;UHmL94x+=*mOtk6O5#7JiT4uC{N`%z~bhD@uB0vdT8HQ;8b)TzBtt@~6*P z$~Mn`QtBR`E@QQsf34vX4ysI_Ge<}u&25GglQuWe<^tIhsMXOkl^Zm75h6w%zCaXN z&65iPGq>YDi~HQZz*SG|3(VPx|J8kgZ;e?y%}{MmE*-0$n2PDiJ-*C&wz<09;kN$q zfuF1bux9S4>1AfzZkcR4lsEIa7PeWg=~Rq z`l=l(nk2zk#wQkZbu8e1@)=s4Tis-i#J#s$e2%u{&=2q{bE7&{miZ>~ZEACaHuzYq zoK=|bf5G2LoyU>CD%Y~#u$tCbe$ix3l^Vn`$;T&i2A=0$imlX@L(N9#p1*xZ%kDw+ zhhYWOy`W~q$q@Y4pvzy89mNqADJiM%Pt1lzqjT>P@XS|Lg z=r^&&>~K$U!#!EUt?@lc^aGm`s%U=#aqR<{cNt-3ZKR!mmeukGLwEBA--97e* z>3b`CC(WX=4UYeLXMPg31sKY(z-drF&c&_-z1x!Dq41J2z1e`2b|g3|&+?L&aYO%M z8Di4QS+>iddqiB6m`r(oVv+wa1A?XgnLzvhB#R^)M7;c?@gJa}K>J_gmlDaU9f9^V zeh^jW8*~w#PM+;{^Uu|KXX1ytwd%Uc!}PAqKaq(GVsaDTZGe*hC*8YS0!D+GUciW( zK{2$DrzWqdil(L+Rt_YmS7va7`gW3+?iQY;Ps+4B71dtsQV4nPntC%h6HyQ*Ms?(W1|Ow_7_Z+?oaJV zxpdmNFz|~E#Gt-#5&~Uqxb>p(zHngluxS0uf%gAKU0Q~}J!#M~?E9G=EfDU2Wq9(* zmf`)0b(m*svko^?E}mR~P|gpk%oUlX{E3AaOV&k`7jZ~0W|qa_inFhxc3*w^LNwRE z;(0b;{1~+23`YcWlH-UW67S254|?;`lpo#1Sd{iO8Mc~E=9`3Nu8-BfpGc_t_XP)a z{~$LzEtXk>x_=5|^tromE5$ys3(wLn)LI`ma08AM2{e;72cXM2VYlx-u{pCZeWL#} z+{OC3F8ypm51+L8aJ8r%x|2l5}eBilaq-KUYqHUa$SPZ_uq z>BNBJajt`D@W%WpnLc;m>L~Wb2kv(hzv#dj`sLCBhF;I1t`Rf3x907qup`?(@|g=L zCQFiD70;lbiqGIA`>O{H;dQFg4dIgvlw+ngiG|uEj%rhoA7M7J2Ds#uS5wJ_wfRTH zQQcg_ijRmxA10_vnt4R*LjU?x#{AdE4I1-zR@+_;9`msb(&vu(? zd2-O~Y){#txt1yVR4iOUd2w66CT{Dc69$dzZLaW%9;U)X>hVUmEAtSUJn6>Qzuyw< zg+0bQNmCGqW$;QSNGkR_>fg5yy~W8Bb#6|wJ>v&8a;9r!@Z^1qYCd=JN+|ZlC-0}U z`GqFW?3eugsd#e#^rv(`Y962zs$&?4YUM@)O&^!L#r_nLc+G{)S>- zd={#y?hEa*&!88p9gv?+|EQx!oacj(-5Wpt0^=3@^zoWCX3%*3wW>Oj{vp%nj@Q)` z`{LvELFND5czpnK@3ZM2b<~cRr|QYBz6(A^YKFO} zR(dIJvs_4O1?manZDc2^(UgG{Xvl}ctd|UA{qh({N&u=)!b9Au{{C2)J^KS4e}gMf zI=Slhh~HP>-{ma-PP`S`FZ1dTCXg^exd4D@pGy$0V&gOvCws~B%d)|`TyKop-18zZ zT<%~f6AWPs!|>i3^5ppu=KPSn6PrP|=VzG-7OLT}{K5G7!0Y!a4BHL12rK6G?6?8+ z7W}nuJAUEy$b-Od+AV(J$BAg>l4A_N3GP|Ia+AJUVi|^y7><+WXTfipApY7on`Pw~ z@;HX!8`;0tL40zw{zW)0=&ZPYALS+K+EKa(((iCt_N2NOvZq9O2vdd;QHc|4?!`UG z4voTOwH+@t%tv=K%&&+1`DwbJXtZ6|dttuYiPPzFk-i+jughi5(-d+C`V@YhU~jN*3;f6A-h zr}up-7-`K7Q7H@6pa6%MlU(eON}J4WdfU@W_xH>4E_3LWi8~V_2jrb{yLCZ z8URg5 zmpYdL!B#O$CBeR5urn$kAJ&Ot^y)0ka~6p$yj6%fw>dk~tG~GbcZGp_c;=y_9nb}M z>$YUe=|LepfL_!Vgs=NFJ-|$bSruFH%P8mv$SsEVPa8j^B`|Mv`ESTT?b>KH9_qULaFIY;Bd7oW2sL|3^D z@mTai@r@u)@|o#wWZ1;Eizm;uhXep;+DG;0vX3oTV?LnM`?M7?(E2N~Fn@WIJyMT1 zr6e3_ge2k5>7pi)Rru3EyJ8CCJ)r!b9$B8Uszc=)ZTX7NE`QSJmJfb<`A^I@C(a3E zOn#`fD}njqJ!bulZW_&dWs>&~EBSQfoixbkN(xhtN6o3z+-3IGeEDAz4GBzf%+H;^ zk;m1*Xuh;L+M~A~y_FQ8z1DKqN0QL8Qe3x{t0oN;FoMm*+lX2+`t=Fx#u} zxa1a25uPK*b7f&X^Mgu=gc^;&(khGhcAv)|5t-xZM(sAH?iRV1kRVg1e~`O;QW$< z^H#yxdp*(;9Ah$7$qaE^agG}v^tKV7P9uK~zGMAgPap02mpEQ=zSjT9&DUi_Ghcsv z{cHcQ#4Cmkf&u_I`2>D zCg_K!=+VQ&J=3ckD=g4HjI25XBf|%sfvZ%V&cL^rm%;%EQz!FlGRBkN3rZjvBp0~# zL2~e1DKa_yFtt$~Vix)NyErvD|L9kjuk-K8L{&h@aRDMN@MU%EkfuAp572wa{AQR) z)lFUwmK<6TdzTxa5`Q9TJRQn=^d;*F1|Wt#n@T01$V_%aVyQKXBw_^7^ zr*Bigk$(!Si6!TjWlxSL4`&h9d^;}$su!qfrr}pI-@s+g4E`J-7Cyk9b8;UNe3!Ep zj^t08`;fjWCPut8Cma?G*=_{dn@B6nvO7ABj>3JiiXKK`L}K8n={0bMwJ)dFvf`U+ za#B~rm^IbBE-GvakpIW$Vy^&}L@1G7`RHus0 z8(dLqz9MbKL5tv5WYpbXrzX@A>+oi-c)3J8dP{!*+_!vQG>2L9H*_vFA;NVY)2;L~ z4bimU+}|=MF>#^>&lMQYL|MFMLbjXX?hIzDib&F4c98nwAnTm!_0XgsdL{FB{^A4; zd@e8G4>Jk+bRXg-?c5W_Xw2^`VUy#bii3t6E}n;hhxWDgpBeaxUP!#aKkQw4G}2?e zmf0`)7O$=UWUdX$vr0V=th>b5cIfoPkJ=6qBPtrgt#m+r>n_RZe)4yuAS-WQ?x`~S zR>wHROjTT9K?)&0)RjH9?a;}Ixh{c-_8o}@9peJ)Mz`A7sz^dc52`amXQ@YEvGMd&` z4JWpHxOB3NNRrh*D>3?H%mMQzeKg-E!b1jz_fM}XMXY0klP^+!|G;#@!B2!6KM`&< zVM~w9yj1&?h7psHsA*y}ND}J>fpr&F)pjLNXnKQ7|3)V+bH#?_Q-O7xoe1jpy$4dQ zB^nHj%KgLorB{US_3^r%cuexCSL)lVCo$G}`Bd$zY3!?t3wgh(8T0*W9Z*IL?RiAd zx)4FziZ~h~Y2P}2d{AjK*K>ejariGbnu9zqxO%k)_UHM~970CGQ!J5-32^NXn%l|w z1Txc0b#F#Ya%1l~%GuUkfij_b#ZB5t-&`)<#h+LqLa^MOrbx>xy?zSB(>EzI`%0Ut zKAKvNSnn(3+Veb332nas0%5Nq5KinuTXtLTW?~r%)9{1%)u_F@D=N@_m*Nm8qE(9h zGGE!BD_W+0RoQX2vdz6W5W8Sa?$mEDVOR?HgCaet^G>su8Wv0apyHx5=8mfVd*QCK>elt%0^~qQH?+-zKYt!8Z|-Z)Q)>b~}$RK%BuL08WuL`cSu5 z|5%{y=M+E-C2>wkTmPJvXE2dRp6-0=a#cq#w?2y%=Utp17zJm==XFtx2ree1lSmSZ zzune9B(S=QLg>!L(xZOKT*tXl9|O$cj^P|9D1HUn4se}k;rXi@=Cr)&B{R|+VYvUS zl!C;^s(tB8UT!;dY2YXCkuy54^3P+wt-0629f9n`aQE=C>{x!wvlaY~%#P%DRMyY$ z2;4<9uD89b_ow#GM4Z!!uP3J!b8IkQKV$~_kHVq?{&Ra*7d62w>{!}-1m6b z+kVW;g_HfwzHW3QPAx&8_Uk3ja?PIe^^zAE9LeP~WqVEl8yjjkd`o{TbuBsRzF|0o zN}x^yZTjHk1D(W}x8L5sl~0F&$(66=E^6sDEQEBV8a_p@7eapMAZu7`z||8d&^mQa zV3od3i6Tz*@Rq=;q4=W+*Op~;7X_4gRALIGJ4%AN+${$MbKusOXbcOvBV{!AaG>PW zoK_7Q=u-anl{~Ol0Qm(_IX~$i<{AU*&;~is*!uQ)jjfNjhC3S04R`Gs)^>160s~qD z;#Z!VBi!}wFt4q<9}Z38m`{``&z6Ef!y}$1CjlRcwe}?3mMuvu$mOrplO#@9y zw!=h@E?IhZ5uRSYWs_r>vSpD@bwpY$*%X(mZLC)k0MiS(kXnnBsmY9 zHc*$2(}nXBW^OvX&8*&sw5}RPE;%V3e)u6Riv|{BBpv=mUFSn~RrIlv{wGjjTX?!J zoNnAk?vrutdRO-5v`2Qc+QY2}$$l%@FD3uQL&CHZHVuV;=UO_ExQD~Pb8U?dr@uwv z@bAkZ&$cPgg6)r>8$0HoJ;eD$-XepJ_$;g1*NhzV%&im36)5_w8%;JtCt8 zU9bDYzC+DEzHYP)o=2YkHS%!qT*OBMSD%+>=8j{-c(;H;b2l$P$J_<$yOsp9r|09~ z6kUkMPQ%flhG>1yvZLB=2$m!&DU}`O`5tFBW68Z%ijvQ-(dot|M`QB%#PGwaGBGs! zRNMzHZ<5MPB7o##LD8iq%foGsc~T0OVH8`=Mm*kVwZJ%fScdsqYTLo4ZS~m%cs!0uwQlF=^9Cob(-B@a&Ae9mNeREB2gMeQ*B>*tXLQv27z^z zr=Wmz^0HoZSmB6`fyzm|9!<9lZ9No>12gn!agBxOZeKq0u`UVOWm)^V%4H5JzSy7wxq6YEx2SjW^Yudq(3TMC1#E~Att7&YP& z<{k;(uEI}6H|@K1t9uG(I4<}|Y#;i)B>mJ*cc+<+R$hx4c2GA|*^NIz7EjiNLmZr- zJb4GJtcK_Z!26+lD1nC(cqoB~5_l+qhZ1-wf&YyX@RB%X_u;W`Zas3%aQ3U*nIYaz z^Urtfy18^kq4BN99l;-`8>#?8=2 zr}mAENuBADnxr6{5*ZhfkPw|18$BgCB|0V^c@&<}`bZ&dOsYqGVpKG!M4LKWA03$# z740!CA~8NDI#sU#SFR3zNbsSXYBnW|EKfczlQftq4}odCiUTvO@}h6AA2|E7^p9=4}4+@_2==07eAps zeJbI(-qf#e?D4-#eLK7By!O<;{{922vR*r}cSwHFrx(v||B7~;#!md(h{iPIXPbLh z@4m73>t96_rUp0Cc~-feZw0K<4Vr%JZrK+fpZVhQZ+e&pHBMPWZKy*=}#anI#WtLRr)*;8F~aQ?)-H|=UF0#|MNeq7`stJtTXc3x!r zWl!I(ojsbS^%>pX!FPN}s%`Xc+t+7$e6xSSqCP3B*I(NE(Wscm3zL_P+P`63!og0L zRv&k@T61#I0Bu8Z{_9SU?JMY{Nf`8G@T{PoUz8O2d~V-m@W?HvUL12MZ}QC~kGQJ~ zSL%}@KTdN_tIX7>4rbk9rP*}}V+;P;aJ%$I+VqOZIkWzKA%$V+%dx@ zqtD}=6DQ@rH00#xPharN9DiK#hfC9ElVAU~bnwXF&|_8Cw%i>yy8oi#w^j4U4*jrj z+y=jutJnGdS+euTu1@Uci6i#yuS|JQ*P(pVyl10c?UWk6V9eQWy|R~%xVp>v+-r7c zYP_GZeZk5#Z2x!LrmlLYcJ^9A+6B>Sx{?~TRS7~k@T{ae1T>44>)Tlkn z-b#M%i9F@=K%D=YzK%s9 zyB||1RTdg27r*q*1Gxe5lFjSidky^9cdy@u4;I|~;qIIZgBPy#WJ`Qj4AYgLyZmj) zzjM4d?(hEW)!=vR0`44mQRA^b#{IPGj+v7`dSTkK<8CR9x1tuudE!(|7yrlL3on>&3m(xLCK|N8Ll zZ}P6?*9;lGz1N>bm36y^W{h7G(V@4+ysu`R?D2EFbL7ii?dRTif=Lpj6}f!c?~>Z~1-hS2k|aYdy9c+BQO=-Vy#$ z_y-Ml+wT4Y>Chw5*SeD)^&B$#3hC04GjA>+ed_x~&`r{*ovJK%l3raqrqidSTRSe8 z_9N-n0F`DbbwuFXYW2Xg!Jx!M-IA??#<~D ze~k3+iILYoART-m_gq@$g@i~EwEe!lD7T+-FG+H0kxulKJke3o>!+a}w$NpB}DRM(L1PEOR? zlKyV;JTZcFc#m&-gpnTqqgVMP(>re`g)(^K+4(E+d^@xMS8(((4zu{}D*KeaL`? z&ZOU?)YpSa$1A-8N=VPIdatpHbiG$;h!yGkjxSw1Njl$YbM(uk_j|cae}#1auH(DC zN&i@bYhiIlDN>uIgHGC1mSS})w6jOaT-)&5LfzdwTxFD|b#lvo-14DIG!sM`m@>CD7QVIY)DV=HMgn zbnZ#>h~@;{6QXh55uWA-jdPkO?cp8ZY3|~U7dQ844$>T^`AGACYzgwjPdYDf3afC1 z?+cGJXO0Ku?+pJmye~YJ-3h)o{B!VBx1R9N!qZ&u125Vu3)2yRdn=uTbOrsVP^9D4 zGUt$vds-mSZFqV*e*eU|r2>MUPLGfahr+9A`bihL^f%LaR12sQkOxJ&t?(e^PZ!He z7t)FK&A#Gy&L!;|vY(3@gKiW1~k$Y+Otl54Cvr-L7y{{&yUnVm-8F-T4qPLyew4vXKJU+mYxeP2G_+s`_!ks2)&YNiVh!#`4i%7|$wx@SB3;Bi_lXzNnZxh?!~Ev>{$2WV z{5_NApLl=A2CM%7yQKZalCZQMUrFN2gm4A$Jbd?B#zw%8g7=0W3;(k4OyPgZPXaGqU*KCPyqhl~ z=*QgRJ-GiB*LLq!&}ZYg@8m^uAForm_4DXAAXed)0FTE(v3Px`nBnH-lcrF1Ru7@e z40L^=_G~k&#&hlYX4V8OuW4q^SeR>pUciuFn%Nv+6fhguP}|HJfCaxcvoI`_dc348 z0cPH8X7*Ts3;t?malmrm9H35NV26R3Dg$%E;*^7%l3_r)N16<@z+L7{U?wmZSPtAq zVOs-h0P5@w%nwFF4t`RRMTnoLQ~_&&O+X*~G%6UzKs8Vg)Z_P-bT1+bUl*{zGGc+R zpNu7RH?SOF2(SoPi*L3y0n71iI~@!FS9}$4DbNRBv#bWz;!D7dz;b*Q*%`)1^*{p) z0X6{RfUZLftQc4htfY8+K+hhAOD)g^*zl}@MFB0IGq5zED=-Tf;%{K5h^{lRE--wY zfnLA{UZhO9xof#uuKUqI*2(O*Odx;_rM?8W#2=Iq1x0X6`0fz<~L ztQJ`NHRKATuAvg+4XFPK?E;nqOMtGw7?=jeVDSZvA7H_+$WP(R$Pe_ng8WY)KTroO zxQ2ECi-83|R~yFai4L^KH6Fb^W1+xWC#<)?oW~Il3~^)3=P8WmXK{A~7=?>{hk@lI z8SCf*`iqQ(0SjJXtPmJ7iLon0$2DZ%u8_wJ#`HiR{2E{vuwX7@UAlq32>gIKOBgEw z7UQahqC48Z0vGy#KDmtL0yEd3JYdcS| z5isOq#?AsWf%QP$M(}Y19jF6l0`-K2jL~1OtNnyA*B(d*`@6)FFf7MpgwTnPV1DR3e{@x7zCdNzYTHIP!`1nXgdaIT>`Z30a`DAt*^h< zFT*;^YPsbKiU(w&h?^^s7`DsP`(hjj8}`y7~pgjKgz?@T{69SGFPwY=@%mjacuu<5@AXO&fIDvBpKH=4v$u zDh?kxdW?%$tP-0yLau-Vl(2%AVFAkV`>B~7GS`+WrEx*(pOxBE%1~{MGC+F?zFzr) zwo0iglq*INjH+9JZxQSjCp!e>WL0A-s~D9WD}QYb_=JK*2r&8gX4c8V48LqwlltCK zYI9u!wE3ERx=w6zB2qwn7OIij7x5Ohk{2!V>7dnmw~iS^=dP9azYgfz{{(tnHA!h z9MN`xb{MoBG=JcTb{MoYLo=JsQFvMnXtkh01#nJ>e;Yt^!Y35JBuZO(N#>qvom|yu z4i81!mV!^2{2UI%2sDvZfo#w6`d}$|HNmzWE-!OQoz$MPLlAZHgRdr;s}1ZfKmlFR zqFbOtblQAnhEW}MQ2UNQCvmw_^4L8lEhVN+!#HyDtW#zQc8gyPEsc_ja~ z<0=pu)r48Pg*-DqWX}b!ZTNOcGukM_%Y@Hm%eHfn)}OQB5r>aFjlwhW$WWWN{g5{ceA1r4zJ=yfn{hj~ zb=az(RmzQ9YF8C_kUy=bfx*1zToC>(0xc6XS94>m z#&nEn%#1PO-vnN9*sreT*>k$X$p-onH2FH4=#I9P1|Q-Z0v=_342;A^&JUjU@Fk%A zCw#Yo$5QO8MZQ#48EBcHVe7~_l2J8i*`T?TK9ZY<^`?5+UREFR^>(p^t%yG}u~mLN znasoYWONMGF$z3tyj$z-0$LhqXF)rUXR0G~Tn1FD>Wnb9d_c}an%w>dwv*Dd%m?Ue z=Ir9#Pp?KX|)T-JmpD zJz$e^eI0_l`Mw7BqI6y4)}M54{SopHmP}+~!Re=5@Ypuk!0Zr~?ZYxto9vpkoL1p^ z)q+G%=7ZHhkY~(XBS{4!%L?vG8tQ{Bzaa`8|Q#mD0nUTo4gY5 z!K)Iy^1!RwoV@Z)<+aO<5g3HJI6J`J!nqLl%N)7+BeA#FnVCPtD;&HEUNNxO&<8T| z3e-yaF~Cl~ku;CCf!D672G#@5vyDq4Umpmeho93=Up*q zC3`V`{RfYrC<8k!&qJ~wQkR)odbl~13tnfV4eSrpt#$6BRBjGs8{2Rv+bkZ)Lh?Eb z9!p~kY%1}9BYD+>Rtwq@BDC#WiJhtXT&_2$Uws{6-^Uu*@A7Shtz&5-v!iAjOT03{ z%PHQ#M#$?csoeUSF-17JRe_iD8#u$GdbQ|GUT&zA%E*X#U7~w9+w^G$)*THlqJO}BDWaW@;EQfc^$$`$3FQvksr8dKnO+#nPdOglQiO?bkUcRle zRV_DK79dy7w&&(ZGV<3V{|I^eMW@B`V07@I?rTOE?kK*5>WA}Z(WYqKC#bEJEz;ok zD3_4-FwUqq$&ax>ZIP*doGm-Z{E>{GTfUFry*rfvjoUhY7Azm4(Sn)Iq+Uf<)q9Iu%Ae}qXg=4LfEwjv2XXxzZ>rb-Y1ztlx zHn1}^zTt>Q?DsTpX-4PK z*rj(T+qS^IA{&*T$J|T-jWJ+F zj`-7i8^wMDn_zB?LTwt)$!E;WNk6cmcR$V*u#cqc0=I4@8<~!&61Bsbeq4R%y;1l< z1G^&S!LP+QPcYRHRc7Q&ZL0woQPHkMPk ztPbJFY8rSo9TjYQ)RXF(1zO=T1KS|aKTs>N;g;Nk|0(dYKVe{G@{%L|S3uK%HVyB+ zWXF+Y{*CJ-KaS{Z!58n~`qUX%3ZD5nuSyne_YlNoBkq{Fc1ZT9pPOk1)j1!$oUa;K zgt`99w=j|Ua5H0*Yd?7T-oReY>^{I$|Ex5#5AZ|1d?AFqy9TzG7J#;Plu4bo+REFD zbHJmc72ao%K-==*%oXGDr0FQ%AJn!=@MwC1vE%0CRAg&Xr!VdyCufY0>~45JqA^pi zdeb$_&&*!J581=PtDqNayO)4I;LcN8_mbTs#I1tH7D>Ad zahttaWn@`$av|B4AaCV(#;(erbC}fOM+g^ITFXfqWv1iFTXrw+uPsyK4V5njj-(+Z$4Zn@GL^9xNscXc zKjf&>hKpw<>LJEr;!4b#@N{F`RV2P@7livqu7+%&@0jB{kGrGb_J8qBj6 zS{7(&l63i?=|RJ^ZAn)QTDnm>nue92%>m7WQgJqD0xd^e`L&44M%-+QgL8*(1g!}) z?(7r$nAW{R>pO-6NBR7&xOb79&X^~co8Jfdf|do^PNKDpH;fDnPosU%+eT9k0}hc$ z&b1%;?HAy#rogigFbgydXp}|~$n7!dy$9D(lf*t6e&FaaMAJAa5z^n4x09sr*pA7X zN&ed8aPZRLkhx?b?o^l?zozeUcgoGf;o7n4nPvtywK)N0?85z$*YHe^`i<_eSS)3% z8xcm(-XTP*8GeA1Q6AztB2Khnh({4<&Y+PhAxE+*2h9mI`uL}yU$`+I&cCiwJGP8* z-o_+8jmXnv#E0Z>?*ck#%kV72M>S8JDqutNWGK>y;+{-^iOe{iliZ@ZCn-%;_i$~p z`moxR23};^XA!>@xW|BJ1*WKKvQ|TNEpUMy1UkB#b41(a0(}FT4^QI8^kL9Wfwq)r z{FrPRKd5}mB&R%$NIP^TV<%hFVr@X(Y5a<3%N0h|#OQTJTCc9yza!sBA)nE_!5K7X z#As-Kg(97OHe<_M`M1_3qqQ#7mQ18gM%ouBEu0QM7qo4logzY8+ems&wO?*L(z;Ls zKDt$my+eFj-c5?_~x0qoS)CRg2RfxQWlvgUx4_ez0gy9<=&2v3?`Mt+jyig8~OEgqQ7U;R4kNSIM zxSA`2=JzS^%FD&wF)E{FeG_sjKT(G;@M>K1;C0ABzOnEB zGvlTPye!tX9XE}jcLDvM7@Io0kf@TBL3%9=^h=;mq&|QnS~6%pdAJiv1UOFrftC;2 zQl7+}H|K)ZWgTO;iPqA8ZS0HROzjJdUnH+a-Wufn*d(u{?_V_Sd$LSuY&!MCdD#bo ztp#BcOZr}Ck2zk{?}gxHzp2$0q;{8prU7kg-K`$poL0k>7a#3XgQ#b zmCy=73pS$pqJAZy1%b9*+J0dV!do{nMk)fv4dSS6jmTTFrFH*7^0e=b^%}I!G`ZT^ zFw*`~nKAXbIUEXJS=$-=4XhNZXl{ZpfN4c`h#qm_xSxHR;-q%fMA5E-MTTgqR{_#H z6*2a2X*>DzU}--`o9kz=dgvZX;GqN_O5mXc9!lV$1pW_7pq?|*VGj!^m~)Dmc6?*S z@Ipy=l_b1Y2-}M?^5P#Hgm9lhD(_YDjR z)4q?KNH4_xn4DP8I6-hWJh`Pj1LNx-B&26TSd??U5U#~J4Y}JwSZpUx{h$B$l+ZuY znhX^BeT0A$1dI`IrhtnDd{@AY0`3&BLco&(UKH?_fL8tZk{=VWhkyeG93kKY0b>N5 zDd1uO-xYA9fI9`O5b&gc7X`c}pp~~!zJNUh94O!j0VfC;Bj8K{7Yq2VfExweDPVU z7@-m-fhmYfiB5>148l`#f?k0yNyg(}JbJ_?;XxmrhJRx~MWUpr2z`XYBRXzMOiIMG z=qYhgU?PV2tWzRVQX*#aImBm_5HT%262&L!i8Ct4@tK;Ms_=+Rnl>#uQEyK24@M`w zNV-haf4;(lh&UcKapHNBNThcyx1g)U{Ncibh>b!9QQwMsQq1p;2daaZU)0|s(mhUc z#D>P6m|sx)+30})02k0hKE$FCTs^8rkNmimkOSHvhGgIIsD91({jKk0Tc zEb4a=!-ayR^%uyB5Ij(vm|t8^L=@K(v3zO!=^Y{E4}d2pu1_M0>lx+86|Xk(ivx|C zD6XHJKoJ&ET&Iadj>}NiAYyCwz z%#!5y6Y_~zrxqp4a}diH`OTH&7wr`hMLR}JFKvH@kiVrAKjBHhq3y+-JW9)7Cdof^ zJfC00l9rC)^GnNLBgx-5kiyuMEl~DkiTA-e`34Eaz#0&AP*%G^H+Yz*Sm5hk2JrHotR(5 zazq;E7wthsHlLq<<8JK4d?Fr^{WdJT;t4m7W0YZ?*y7yf3J`DeD14x%oiIf pWRL{8->*{@y+noriuo5x=wfq>Q7qdy=@y~= 0: + # Verify our LD_PRELOAD (netnanny+logintercept for v3, aegis for v4). + if semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + if dump["env"].get("LD_PRELOAD") != "/usr/local/lib/chutes-aegis.so": + logger.warning( + f"{log_prefix} Invalid environment found: LD_PRELOAD tampering (expected aegis)" + ) + return False + elif semcomp(instance.chutes_version or "0.0.0", "0.3.61") >= 0: if ( dump["env"].get("LD_PRELOAD") != "/usr/local/lib/chutes-netnanny.so:/usr/local/lib/chutes-logintercept.so" @@ -1475,8 +1481,6 @@ async def check_runint(instance: Instance) -> bool: return False try: - from ecdsa import VerifyingKey, SECP256k1, BadSignatureError - challenge = secrets.token_hex(16) payload = {"challenge": challenge} enc_payload, _ = encrypt_instance_request(json.dumps(payload), instance) @@ -1514,35 +1518,76 @@ async def check_runint(instance: Instance) -> bool: return False commitment_bytes = bytes.fromhex(instance.rint_commitment) - if len(commitment_bytes) != 162 or commitment_bytes[0] != 0x03: - logger.error( - f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " - f"invalid commitment format: len={len(commitment_bytes)} prefix={commitment_bytes[0] if commitment_bytes else None}" - ) - return False - if commitment_bytes[1] != 0x03: - logger.error( - f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " - f"invalid commitment version: {commitment_bytes[1]}" - ) - return False - pubkey_bytes = commitment_bytes[2:66] - vk = VerifyingKey.from_string(pubkey_bytes, curve=SECP256k1) - # Message format: challenge || epoch (8 bytes LE) - epoch_bytes = epoch.to_bytes(8, byteorder="little") - msg = challenge.encode() + epoch_bytes - msg_hash = hashlib.sha256(msg).digest() - sig_bytes = bytes.fromhex(signature_hex) + # Detect v4 (Ed25519) vs v3 (SECP256k1) commitment + is_v4 = commitment_bytes[0] == 0x04 - try: - vk.verify_digest(sig_bytes, msg_hash) - except BadSignatureError: - logger.error( - f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " - f"signature verification failed" - ) - return False + if is_v4: + # v4: Ed25519 commitment (146 bytes) + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey + + if len(commitment_bytes) != 146: + logger.error( + f"RUNINT v4: {instance.instance_id=} {instance.miner_hotkey=} " + f"invalid commitment length: {len(commitment_bytes)} != 146" + ) + return False + if commitment_bytes[1] != 0x04: + logger.error( + f"RUNINT v4: {instance.instance_id=} {instance.miner_hotkey=} " + f"invalid commitment version: {commitment_bytes[1]}" + ) + return False + pubkey_bytes = commitment_bytes[2:34] # Ed25519 (32 bytes) + + # Challenge-response: SHA256(challenge_string || epoch_bytes_8_LE) + epoch_bytes = epoch.to_bytes(8, byteorder="little") + msg_hash = hashlib.sha256(challenge.encode() + epoch_bytes).digest() + sig_bytes = bytes.fromhex(signature_hex) + + pk = Ed25519PublicKey.from_public_bytes(pubkey_bytes) + try: + # aegis pre-hashes with SHA256 then signs the hash + pk.verify(sig_bytes, msg_hash) + except Exception: + logger.error( + f"RUNINT v4: {instance.instance_id=} {instance.miner_hotkey=} " + f"signature verification failed" + ) + return False + else: + # v3: SECP256k1 commitment (162 bytes) + from ecdsa import VerifyingKey, SECP256k1, BadSignatureError + + if len(commitment_bytes) != 162 or commitment_bytes[0] != 0x03: + logger.error( + f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " + f"invalid commitment format: len={len(commitment_bytes)} prefix={commitment_bytes[0] if commitment_bytes else None}" + ) + return False + if commitment_bytes[1] != 0x03: + logger.error( + f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " + f"invalid commitment version: {commitment_bytes[1]}" + ) + return False + pubkey_bytes = commitment_bytes[2:66] + vk = VerifyingKey.from_string(pubkey_bytes, curve=SECP256k1) + + # Message format: challenge || epoch (8 bytes LE) + epoch_bytes = epoch.to_bytes(8, byteorder="little") + msg = challenge.encode() + epoch_bytes + msg_hash = hashlib.sha256(msg).digest() + sig_bytes = bytes.fromhex(signature_hex) + + try: + vk.verify_digest(sig_bytes, msg_hash) + except BadSignatureError: + logger.error( + f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " + f"signature verification failed" + ) + return False # Check epoch is advancing (detect replay attacks) epoch_key = f"rint_epoch:{instance.instance_id}" @@ -1559,7 +1604,7 @@ async def check_runint(instance: Instance) -> bool: logger.success( f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " - f"verification successful {epoch=}" + f"verification successful {epoch=} {'v4' if is_v4 else 'v3'}" ) return True @@ -1571,6 +1616,203 @@ async def check_runint(instance: Instance) -> bool: return False +async def verify_bytecode_integrity(instance: Instance, chute: Chute) -> bool: + """ + Verify bytecode integrity of a running instance. + + Flow: + 1. Generate random challenge + 2. Call miner's /_integrity_verify with target modules + 3. Download manifest from S3 (via graval_worker, cached) + 4. Compare miner's reported hashes against manifest ground truth + + NOT wired into automation — call manually or from a future watchtower check. + """ + if semcomp(chute.chutes_version or "0.0.0", "0.5.5") < 0: + return True # V2 manifest only for >= 0.5.5 + + challenge = secrets.token_hex(16) + + # Target critical modules based on template. + if "sglang" in (chute.standard_template or ""): + modules = "sglang.srt.entrypoints.openai.serving_chat,sglang.srt.server" + elif "vllm" in (chute.standard_template or ""): + modules = "vllm.entrypoints.openai.serving_chat,vllm.entrypoints.openai.api_server" + else: + modules = "" + + # Call miner's /_integrity_verify endpoint. + payload = {"challenge": challenge, "modules": modules} + enc_payload, _ = encrypt_instance_request(json.dumps(payload), instance) + path, _ = encrypt_instance_request("/_integrity_verify", instance, hex_encode=True) + + try: + async with miner_client.post( + instance.miner_hotkey, + f"http://{instance.host}:{instance.port}/{path}", + enc_payload, + timeout=30.0, + ) as resp: + if resp.status != 200: + logger.error( + f"BYTECODE: {instance.instance_id=} {instance.miner_hotkey=} " + f"returned {resp.status}" + ) + return False + miner_result = await resp.json() + except Exception as exc: + logger.error( + f"BYTECODE: {instance.instance_id=} {instance.miner_hotkey=} " + f"error calling /_integrity_verify: {exc}" + ) + return False + + if "error" in miner_result: + logger.error( + f"BYTECODE: {instance.instance_id=} {instance.miner_hotkey=} " + f"miner error: {miner_result['error']}" + ) + return False + + # Get expected manifest data from S3 via graval_worker task (called directly). + try: + async with get_session() as session: + image_id = ( + ( + await session.execute( + select(Chute.image_id).where(Chute.chute_id == chute.chute_id) + ) + ) + .unique() + .scalar_one_or_none() + ) + patch_version = ( + ( + await session.execute( + select(Image.patch_version).where(Image.image_id == image_id) + ) + ) + .unique() + .scalar_one_or_none() + ) + + from api.graval_worker import verify_bytecode_integrity as verify_bytecode_integrity_task + + expected_result = await verify_bytecode_integrity_task( + image_id, patch_version, challenge, modules + ) + except Exception as exc: + logger.error(f"BYTECODE: {instance.instance_id=} failed to get expected manifest: {exc}") + return False + + # Cross-reference: compare miner's disk_hash against manifest for each module. + for mod_name, mod_info in miner_result.get("modules", {}).items(): + if not mod_info.get("in_manifest"): + continue + + if not mod_info.get("disk_matches_manifest", False): + logger.error( + f"BYTECODE: integrity MISMATCH {mod_name} on " + f"{instance.instance_id=} {instance.miner_hotkey=}" + ) + return False + + # Verify the manifest_hash matches what we have on file from build time. + manifest_hash = mod_info.get("manifest_hash", "") + expected_entries = expected_result.get("entries", {}) + mod_path = mod_info.get("path", "") + if mod_path in expected_entries: + expected_hash = expected_entries[mod_path].get("hash_hex", "") + if expected_hash and expected_hash != manifest_hash: + logger.error( + f"BYTECODE: manifest hash doesn't match build-time manifest for " + f"{mod_name} on {instance.instance_id=}: " + f"miner={manifest_hash} expected={expected_hash}" + ) + return False + + logger.success( + f"BYTECODE: {instance.instance_id=} {instance.miner_hotkey=} verification successful" + ) + return True + + +async def verify_package_integrity(instance: Instance, chute: Chute) -> dict: + """ + Get package-level integrity summary from a running instance. + + Returns dict with manifest_version, total_packages, failed_packages, all_verified. + + NOT wired into automation — call manually or from a future watchtower check. + """ + if semcomp(chute.chutes_version or "0.0.0", "0.5.5") < 0: + return { + "manifest_version": 0, + "total_packages": 0, + "failed_packages": {}, + "all_verified": True, + } + + challenge = secrets.token_hex(16) + payload = {"challenge": challenge} + enc_payload, _ = encrypt_instance_request(json.dumps(payload), instance) + path, _ = encrypt_instance_request("/_integrity_packages", instance, hex_encode=True) + + try: + async with miner_client.post( + instance.miner_hotkey, + f"http://{instance.host}:{instance.port}/{path}", + enc_payload, + timeout=60.0, + ) as resp: + if resp.status != 200: + logger.error( + f"PKGINTEG: {instance.instance_id=} {instance.miner_hotkey=} " + f"returned {resp.status}" + ) + return { + "manifest_version": 0, + "total_packages": 0, + "failed_packages": {}, + "all_verified": False, + } + result = await resp.json() + except Exception as exc: + logger.error( + f"PKGINTEG: {instance.instance_id=} {instance.miner_hotkey=} " + f"error calling /_integrity_packages: {exc}" + ) + return { + "manifest_version": 0, + "total_packages": 0, + "failed_packages": {}, + "all_verified": False, + } + + packages = result.get("packages", {}) + manifest_version = result.get("manifest_version", 0) + + failed_packages = {name: info for name, info in packages.items() if info.get("errors", 0) > 0} + + if failed_packages: + logger.warning( + f"PKGINTEG: package integrity errors on {instance.instance_id=}: " + f"{list(failed_packages.keys())}" + ) + else: + logger.success( + f"PKGINTEG: {instance.instance_id=} {instance.miner_hotkey=} " + f"all {len(packages)} packages verified" + ) + + return { + "manifest_version": manifest_version, + "total_packages": len(packages), + "failed_packages": failed_packages, + "all_verified": len(failed_packages) == 0, + } + + async def main(): """ Main loop, continuously check all chutes and instances. From 28941f285611d7bb58a9d92f1bb50ba18d0017ad Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 12 Feb 2026 10:41:06 -0500 Subject: [PATCH 02/58] cllmv v2 --- api/chute/util.py | 68 +++++++++++++++++++++++++++++------------ api/instance/router.py | 23 ++++++++++++++ api/instance/schemas.py | 1 + 3 files changed, 73 insertions(+), 19 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index c722cba3..29c80242 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -82,7 +82,7 @@ track_request_completed, track_request_rate_limited, ) -from cllmv import validate as cllmv_validate +from cllmv import validate as cllmv_validate, validate_v2 as cllmv_validate_v2 # Tokenizer for input/output token estimation. @@ -842,15 +842,30 @@ async def _invoke_one( + target.rint_nonce + chute.image.package_hashes["hash"] ) - if not cllmv_validate( - data.get("id") or "bad", - data.get("created") or 0, - text, - verification_token, - challenge_val, - model_identifier, - chute.revision, - ): + # Try V2 (HMAC-SHA256 with session key) first, fall back to V1 + cllmv_v2_key = (target.extra or {}).get("cllmv_session_key") + if cllmv_v2_key: + cllmv_ok = cllmv_validate_v2( + data.get("id") or "bad", + data.get("created") or 0, + text, + verification_token, + cllmv_v2_key, + challenge_val, + model_identifier, + chute.revision, + ) + else: + cllmv_ok = cllmv_validate( + data.get("id") or "bad", + data.get("created") or 0, + text, + verification_token, + challenge_val, + model_identifier, + chute.revision, + ) + if not cllmv_ok: logger.warning( f"CLLMV FAILURE: STREAMED {target.instance_id=} {target.miner_hotkey=} {chute.name=}" ) @@ -1053,15 +1068,30 @@ async def _invoke_one( + target.rint_nonce + chute.image.package_hashes["hash"] ) - if not verification_token or not cllmv_validate( - json_data.get("id") or "bad", - json_data.get("created") or 0, - text, - verification_token, - challenge_val, - model_identifier, - chute.revision, - ): + # Try V2 (HMAC-SHA256 with session key) first, fall back to V1 + cllmv_v2_key = (target.extra or {}).get("cllmv_session_key") + if cllmv_v2_key: + cllmv_ok = verification_token and cllmv_validate_v2( + json_data.get("id") or "bad", + json_data.get("created") or 0, + text, + verification_token, + cllmv_v2_key, + challenge_val, + model_identifier, + chute.revision, + ) + else: + cllmv_ok = verification_token and cllmv_validate( + json_data.get("id") or "bad", + json_data.get("created") or 0, + text, + verification_token, + challenge_val, + model_identifier, + chute.revision, + ) + if not cllmv_ok: logger.warning( f"CLLMV FAILURE: {target.instance_id=} {target.miner_hotkey=} {chute.name=}" ) diff --git a/api/instance/router.py b/api/instance/router.py index bf577d69..d93c9f9a 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -16,6 +16,7 @@ import orjson as json # noqa from api.image.util import get_inspecto_hash import api.miner_client as miner_client +import cllmv as _cllmv from loguru import logger from typing import Optional, Tuple from datetime import datetime, timedelta @@ -1151,6 +1152,28 @@ async def _validate_launch_config_instance( detail=f"Session key derivation failed: {exc}", ) + # CLLMV V2: decrypt miner's ephemeral HMAC session key from init blob + cllmv_init = getattr(args, "cllmv_session_init", None) + if cllmv_init and semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + x25519_priv = os.environ.get("CLLMV_X25519_PRIVATE_KEY") + if x25519_priv: + try: + cllmv_session_key = _cllmv.decrypt_session_key(cllmv_init, x25519_priv) + if cllmv_session_key: + if instance.extra is None: + instance.extra = {} + instance.extra["cllmv_session_key"] = cllmv_session_key + logger.info(f"CLLMV V2 session key decrypted for {instance.instance_id}") + else: + logger.warning( + f"CLLMV V2 session key decryption failed for {instance.instance_id} " + f"(invalid init blob or signature)" + ) + except Exception as exc: + logger.warning(f"CLLMV V2 session key decryption error: {exc}") + else: + logger.debug("CLLMV_X25519_PRIVATE_KEY not set, skipping V2 session key decrypt") + return launch_config, nodes, instance, validator_pubkey diff --git a/api/instance/schemas.py b/api/instance/schemas.py index 7d8fb62c..24eabe36 100644 --- a/api/instance/schemas.py +++ b/api/instance/schemas.py @@ -66,6 +66,7 @@ class LaunchConfigArgs(BaseModel): tls_cert: Optional[str] = None tls_cert_sig: Optional[str] = None e2e_pubkey: Optional[str] = None + cllmv_session_init: Optional[str] = None env: str code: Optional[str] = None run_code: Optional[str] = None From fdb59aa24a90b11413c60af64009c07f5bc47c69 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 14 Feb 2026 07:35:49 -0500 Subject: [PATCH 03/58] https, mtls, httpx client change for http/2, E2E encryption, aegis cert validation, logging server tls, etc. --- api/chute/util.py | 107 ++++--- api/e2e/__init__.py | 0 api/e2e/router.py | 554 +++++++++++++++++++++++++++++++++++++ api/graval_worker.py | 6 +- api/instance/connection.py | 157 +++++++++++ api/instance/router.py | 226 +++++++++++++-- api/instance/schemas.py | 3 + api/job/router.py | 3 +- api/main.py | 2 + api/miner_client.py | 135 +++++++-- pyproject.toml | 1 + uv.lock | 66 +++++ 12 files changed, 1156 insertions(+), 104 deletions(-) create mode 100644 api/e2e/__init__.py create mode 100644 api/e2e/router.py create mode 100644 api/instance/connection.py diff --git a/api/chute/util.py b/api/chute/util.py index 29c80242..dfd49069 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -3,7 +3,7 @@ """ import os -import aiohttp +import httpx import asyncio import re import uuid @@ -234,15 +234,13 @@ async def safe_store_invocation(*args, **kwargs): logger.error(f"SAFE_STORE_INVOCATION: failed to insert new invocation record: {str(exc)}") -async def get_miner_session(instance: Instance, timeout: int = 600) -> aiohttp.ClientSession: +async def get_miner_session(instance: Instance, timeout: int = 600): """ - Get or create an aiohttp session for an instance. + Get or create an httpx client for an instance (with TLS if available). """ - return aiohttp.ClientSession( - base_url=f"http://{instance.host}:{instance.port}", - timeout=aiohttp.ClientTimeout(connect=10.0, total=timeout), - read_bufsize=8 * 1024 * 1024, - ) + from api.instance.connection import get_instance_client + + return await get_instance_client(instance, timeout=timeout) async def selector_hourly_price(node_selector) -> float: @@ -698,7 +696,8 @@ async def _invoke_one( ) path = encrypted_path - session, response = None, None + response = None + stream_response = None timeout = 1800 if ( semcomp(target.chutes_version or "0.0.0", "0.4.3") >= 0 @@ -715,34 +714,48 @@ async def _invoke_one( session = await get_miner_session(target, timeout=timeout) headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) headers["X-Chutes-Serialized"] = "true" - response = await session.post( - f"/{path}", - data=payload_string, - headers=headers, - ) - if response.status != 200: + + if stream: + # Use streaming request for streaming responses. + stream_response = await session.send( + session.build_request("POST", f"/{path}", content=payload_string, headers=headers), + stream=True, + ) + response = stream_response + else: + response = await session.post( + f"/{path}", + content=payload_string, + headers=headers, + ) + + if response.status_code != 200: logger.info( - f"Received response {response.status} from miner {target.miner_hotkey} instance_id={target.instance_id} of chute_id={target.chute_id}" + f"Received response {response.status_code} from miner {target.miner_hotkey} instance_id={target.instance_id} of chute_id={target.chute_id}" ) # Check if the instance restarted and is using encryption V2. - if response.status == status.HTTP_426_UPGRADE_REQUIRED: + if response.status_code == status.HTTP_426_UPGRADE_REQUIRED: raise KeyExchangeRequired( f"Instance {target.instance_id} responded with 426, new key exchange required." ) # Check if the instance is overwhelmed. - if response.status == status.HTTP_429_TOO_MANY_REQUESTS: + if response.status_code == status.HTTP_429_TOO_MANY_REQUESTS: raise InstanceRateLimit( f"Instance {target.instance_id=} has returned a rate limit error!" ) # Handle bad client requests. - if response.status == status.HTTP_400_BAD_REQUEST: - raise BadRequest("Invalid request: " + await response.text()) + if response.status_code == status.HTTP_400_BAD_REQUEST: + if stream_response: + await stream_response.aread() + raise BadRequest("Invalid request: " + response.text) - if response.status == 451: - logger.info(f"BAD ENCRYPTION: {await response.text()} from {payload=}") + if response.status_code == 451: + if stream_response: + await stream_response.aread() + logger.info(f"BAD ENCRYPTION: {response.text} from {payload=}") response.raise_for_status() @@ -752,7 +765,7 @@ async def _invoke_one( any_chunks = False chunk_idx = 0 cllmv_verified = False - async for raw_chunk in response.content: + async for raw_chunk in stream_response.aiter_bytes(): chunk = await asyncio.to_thread(decrypt_instance_response, raw_chunk, target, iv) # Track time to first token and (approximate) token count; approximate @@ -974,7 +987,7 @@ async def _invoke_one( else: # Non-streamed responses - always encrypted. headers = response.headers - body_bytes = await response.read() + body_bytes = response.content data = {} response_data = json.loads(body_bytes) if "json" in response_data: @@ -1202,20 +1215,9 @@ async def _invoke_one( yield data finally: - if response: + if stream_response: try: - async for _ in response.content: - pass - except Exception: - pass - finally: - try: - response.close() - except Exception: - pass - if session: - try: - await session.close() + await stream_response.aclose() except Exception: pass @@ -1584,7 +1586,7 @@ async def invoke( elif isinstance(exc, InvalidResponse): error_message = "INVALID_RESPONSE" instant_delete = True - elif isinstance(exc, aiohttp.ClientResponseError) and exc.status >= 500: + elif isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code >= 500: error_message = f"HTTP_{exc.status}: {error_message}" # Server returned an error - connection worked, server is broken # skip_disable_loop = True @@ -1718,24 +1720,17 @@ async def load_llm_details(chute, target): } payload, iv = await asyncio.to_thread(encrypt_instance_request, json.dumps(payload), target) - async with aiohttp.ClientSession( - timeout=aiohttp.ClientTimeout(connect=5.0, total=60.0), - read_bufsize=8 * 1024 * 1024, - raise_for_status=True, - ) as session: - headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) - headers["X-Chutes-Serialized"] = "true" - async with session.post( - f"http://{target.host}:{target.port}/{path}", data=payload_string, headers=headers - ) as resp: - raw_data = await resp.json() - logger.info( - f"{target.chute_id=} {target.instance_id=} {target.miner_hotkey=}: {raw_data=}" - ) - info = json.loads( - await asyncio.to_thread(decrypt_instance_response, raw_data["json"], target, iv) - ) - return info["data"][0] + session = await get_miner_session(target, timeout=60) + headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) + headers["X-Chutes-Serialized"] = "true" + resp = await session.post(f"/{path}", content=payload_string, headers=headers) + resp.raise_for_status() + raw_data = resp.json() + logger.info(f"{target.chute_id=} {target.instance_id=} {target.miner_hotkey=}: {raw_data=}") + info = json.loads( + await asyncio.to_thread(decrypt_instance_response, raw_data["json"], target, iv) + ) + return info["data"][0] async def get_mtoken_price(user_id: str, chute_id: str) -> tuple[float, float, float]: diff --git a/api/e2e/__init__.py b/api/e2e/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/e2e/router.py b/api/e2e/router.py new file mode 100644 index 00000000..865e0b93 --- /dev/null +++ b/api/e2e/router.py @@ -0,0 +1,554 @@ +""" +E2E encryption router — instance discovery with nonces and encrypted invocation relay. +""" + +import math +import time +import uuid +import secrets +import asyncio +import random +import traceback +import orjson as json +from loguru import logger +from fastapi import APIRouter, Depends, Header, HTTPException, Request, status +from fastapi.responses import StreamingResponse, Response +from api.config import settings +from api.user.service import get_current_user +from api.user.schemas import User +from api.chute.util import ( + get_one, + get_miner_session, + get_mtoken_price, + update_usage_data, + safe_store_invocation, +) +from api.chute.schemas import NodeSelector +from api.instance.util import ( + load_chute_targets, + load_chute_target, + is_instance_disabled, + MANAGERS, + update_shutdown_timestamp, + clear_instance_disable_state, +) +from api.util import ( + encrypt_instance_request, + decrypt_instance_response, + has_legacy_private_billing, +) +from api.miner_client import sign_request +from api.rate_limit import rate_limit +from api.gpu import COMPUTE_UNIT_PRICE_BASIS +from api.user.service import chutes_user_id + +router = APIRouter() + +# Lua script for atomic nonce consumption: verify instance_id match + delete +NONCE_CONSUME_LUA = """ +local val = redis.call('HGET', KEYS[1], ARGV[1]) +if val == false then return nil end +if val ~= ARGV[2] then return nil end +redis.call('HDEL', KEYS[1], ARGV[1]) +return val +""" + +NONCES_PER_INSTANCE = 10 +NONCE_REDIS_TTL = 75 +NONCE_CLIENT_TTL = 60 +MAX_INSTANCES_RETURNED = 5 + + +@router.get("/instances/{chute_id}") +async def get_e2e_instances( + chute_id: str, + current_user: User = Depends(get_current_user(raise_not_found=True, allow_api_key=True)), + _rate_limit: None = Depends(rate_limit("e2e_instances", requests_per_minute=10)), +): + """ + Discover E2E-capable instances for a chute and get nonces for invocation. + """ + # Load chute and verify access. + chute = await get_one(chute_id) + if not chute: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") + + # Load active instances. + instances = await load_chute_targets(chute_id, nonce=0) + if not instances: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No active instances found for this chute", + ) + + # Filter to E2E-capable, non-disabled instances. + eligible = [] + for inst in instances: + e2e_pubkey = (inst.extra or {}).get("e2e_pubkey") + if not e2e_pubkey: + continue + if await is_instance_disabled(inst.instance_id): + continue + eligible.append(inst) + + if not eligible: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="No E2E-capable instances available", + ) + + # Select random subset. + selected = random.sample(eligible, min(len(eligible), MAX_INSTANCES_RETURNED)) + + # Generate nonces and store in Redis hash. + user_id = current_user.user_id + hash_key = f"e2e_nonces:{user_id}:{chute_id}" + redis = settings.redis_client + + # Build nonce mappings: token -> instance_id + nonce_map = {} # token -> instance_id + instance_nonces = {} # instance_id -> [tokens] + for inst in selected: + tokens = [] + for _ in range(NONCES_PER_INSTANCE): + token = secrets.token_urlsafe(24) + nonce_map[token] = inst.instance_id + tokens.append(token) + instance_nonces[inst.instance_id] = tokens + + # Store all nonces in a single HSET call, then set TTL. + if nonce_map: + await redis.hset(hash_key, mapping=nonce_map) + await redis.expire(hash_key, NONCE_REDIS_TTL) + + now = int(time.time()) + result_instances = [] + for inst in selected: + result_instances.append( + { + "instance_id": inst.instance_id, + "e2e_pubkey": (inst.extra or {}).get("e2e_pubkey"), + "nonces": instance_nonces[inst.instance_id], + } + ) + + return { + "instances": result_instances, + "nonce_expires_in": NONCE_CLIENT_TTL, + "nonce_expires_at": now + NONCE_CLIENT_TTL, + } + + +@router.post("/invoke") +async def e2e_invoke( + request: Request, + current_user: User = Depends(get_current_user(raise_not_found=True, allow_api_key=True)), + x_chute_id: str = Header(..., alias="X-Chute-Id"), + x_instance_id: str = Header(..., alias="X-Instance-Id"), + x_e2e_nonce: str = Header(..., alias="X-E2E-Nonce"), + x_e2e_stream: str = Header("false", alias="X-E2E-Stream"), + x_e2e_path: str = Header("/", alias="X-E2E-Path"), +): + """ + Relay an E2E encrypted invocation to a specific instance. + """ + user_id = current_user.user_id + chute_id = x_chute_id + instance_id = x_instance_id + nonce_token = x_e2e_nonce + is_stream = x_e2e_stream.lower() == "true" + + # Validate + consume nonce atomically via Lua script. + hash_key = f"e2e_nonces:{user_id}:{chute_id}" + redis = settings.redis_client + result = await redis.eval(NONCE_CONSUME_LUA, 1, hash_key, nonce_token, instance_id) + if result is None: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Invalid, expired, or already-used nonce", + ) + + # Load instance and verify it's valid. + instance = await load_chute_target(instance_id) + if not instance: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Instance not found", + ) + if instance.chute_id != chute_id: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Instance does not belong to the specified chute", + ) + if not instance.active or not instance.verified: + raise HTTPException( + status_code=status.HTTP_410_GONE, + detail="Instance is no longer active", + ) + + # Load chute for billing info. + chute = await get_one(chute_id) + if not chute: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") + + # Read raw E2E blob from request body. + e2e_blob = await request.body() + if not e2e_blob: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Empty request body", + ) + + # Transport-encrypt the raw E2E blob. + encrypted_payload, _ = await asyncio.to_thread(encrypt_instance_request, e2e_blob, instance) + # Encrypt the path for routing. + request_path = x_e2e_path if x_e2e_path else "/" + encrypted_path, _ = await asyncio.to_thread( + encrypt_instance_request, request_path.ljust(24, "?"), instance, True + ) + + # Connection tracking. + conn_id = str(uuid.uuid4()) + manager = MANAGERS.get(chute_id) + if manager: + try: + key = f"conn:{chute_id}:{instance_id}" + await asyncio.wait_for( + manager.redis_client.eval( + manager.lua_add_connection, + 1, + key, + conn_id, + int(time.time()), + manager.connection_expiry, + ), + timeout=3.0, + ) + except Exception as e: + logger.warning(f"E2E: Error tracking connection: {e}") + + invocation_id = str(uuid.uuid4()) + parent_invocation_id = str(uuid.uuid4()) + started_at = time.time() + + session = None + response = None + try: + # Send to instance. + session = await get_miner_session(instance, timeout=1800) + headers, payload_string = sign_request( + miner_ss58=instance.miner_hotkey, payload=encrypted_payload + ) + headers["X-Chutes-Serialized"] = "true" + headers["X-E2E-Encrypted"] = "true" + if is_stream: + headers["X-E2E-Stream"] = "true" + + if is_stream: + # Use streaming request for streaming responses. + response = await session.send( + session.build_request( + "POST", f"/{encrypted_path}", content=payload_string, headers=headers + ), + stream=True, + ) + else: + response = await session.post( + f"/{encrypted_path}", + content=payload_string, + headers=headers, + ) + + # Handle transport-level errors. + if response.status_code == 400: + if is_stream: + await response.aread() + raise HTTPException(status_code=400, detail=response.text) + if response.status_code == 429: + if is_stream: + await response.aread() + raise HTTPException( + status_code=429, + detail="Instance is at maximum capacity, try again later", + ) + if response.status_code == 426: + if is_stream: + await response.aread() + raise HTTPException( + status_code=502, + detail="Instance requires key exchange, try a different instance", + ) + if response.status_code != 200: + if is_stream: + await response.aread() + raise HTTPException( + status_code=response.status_code, + detail=f"Instance returned status {response.status_code}", + ) + + multiplier = NodeSelector(**chute.node_selector).compute_multiplier + if chute.boost: + multiplier *= chute.boost + + if is_stream: + return StreamingResponse( + _stream_e2e_response( + response, + session, + instance, + chute, + current_user, + multiplier, + started_at, + invocation_id, + parent_invocation_id, + manager, + conn_id, + request, + ), + media_type="text/event-stream", + ) + else: + # Non-streaming: read full response, transport-decrypt, relay. + raw_body = response.content + decrypted = await asyncio.to_thread(decrypt_instance_response, raw_body, instance) + + # Time-based billing. + duration = time.time() - started_at + compute_units = multiplier * math.ceil(duration) + await _do_billing( + chute, + current_user, + instance, + duration, + compute_units, + multiplier, + None, + invocation_id, + parent_invocation_id, + request, + ) + + # Cleanup. + asyncio.create_task( + settings.redis_client.delete(f"consecutive_failures:{instance.instance_id}") + ) + asyncio.create_task(clear_instance_disable_state(instance.instance_id)) + + return Response(content=decrypted, media_type="application/octet-stream") + + except HTTPException: + raise + except Exception as exc: + logger.error(f"E2E invoke error: {exc}\n{traceback.format_exc()}") + raise HTTPException(status_code=500, detail="Internal error during E2E invocation") + finally: + if not is_stream: + # For streaming, cleanup happens in the generator. + await _cleanup(session, response, manager, chute_id, instance_id, conn_id) + + +async def _stream_e2e_response( + response, + session, + instance, + chute, + user, + multiplier, + started_at, + invocation_id, + parent_invocation_id, + manager, + conn_id, + request, +): + """ + Stream E2E response chunks, extracting usage events for billing. + """ + metrics = {} + try: + async for raw_chunk in response.aiter_bytes(): + # Transport-decrypt each chunk. + try: + decrypted = await asyncio.to_thread(decrypt_instance_response, raw_chunk, instance) + except Exception as exc: + logger.warning(f"E2E stream: transport decrypt failed: {exc}") + continue + + # Parse SSE lines to extract usage data for billing. + # Decrypted chunks are SSE lines like: + # data: {"e2e_init": "..."} - ML-KEM ciphertext + # data: {"e2e": "..."} - encrypted content + # data: {"usage": {...}} - plaintext usage for billing + # data: {"e2e_error": {...}} - error with encrypted message + chunk_str = ( + decrypted.decode("utf-8", errors="replace") + if isinstance(decrypted, bytes) + else decrypted + ) + + if chunk_str.startswith("data: "): + try: + obj = json.loads(chunk_str[6:].encode()) + if isinstance(obj, dict) and "usage" in obj: + usage = obj["usage"] + metrics["it"] = usage.get("prompt_tokens", 0) + metrics["ot"] = usage.get("completion_tokens", 0) + metrics["ct"] = (usage.get("prompt_tokens_details") or {}).get( + "cached_tokens", 0 + ) + except Exception: + pass + + # Relay the decrypted chunk to client as-is. + yield decrypted + + # Billing after stream completes. + duration = time.time() - started_at + compute_units = multiplier * math.ceil(duration) + await _do_billing( + chute, + user, + instance, + duration, + compute_units, + multiplier, + metrics if metrics else None, + invocation_id, + parent_invocation_id, + request, + ) + + # Clear failure tracking on success. + asyncio.create_task( + settings.redis_client.delete(f"consecutive_failures:{instance.instance_id}") + ) + asyncio.create_task(clear_instance_disable_state(instance.instance_id)) + + except Exception as exc: + logger.error(f"E2E stream error: {exc}\n{traceback.format_exc()}") + raise + finally: + await _cleanup(session, response, manager, chute.chute_id, instance.instance_id, conn_id) + + +async def _do_billing( + chute, + user, + instance, + duration, + compute_units, + multiplier, + metrics, + invocation_id, + parent_invocation_id, + request, +): + """ + Handle billing for an E2E invocation. + """ + user_id = user.user_id + balance_used = 0.0 + free_invocation = getattr(request.state, "free_invocation", False) + + if compute_units and not free_invocation: + # Per megatoken pricing for vLLM chutes. + if chute.standard_template == "vllm" and metrics and metrics.get("it"): + per_million_in, per_million_out, cache_discount = await get_mtoken_price( + user_id, chute.chute_id + ) + prompt_tokens = metrics.get("it", 0) or 0 + output_tokens = metrics.get("ot", 0) or 0 + cached_tokens = metrics.get("ct", 0) or 0 + balance_used = ( + prompt_tokens / 1000000.0 * per_million_in + - cached_tokens / 1000000.0 * per_million_in * cache_discount + + output_tokens / 1000000.0 * per_million_out + ) + else: + # Time-based pricing. + discount = 0.0 + if chute.discount and -3 < chute.discount <= 1: + discount = chute.discount + if discount < 1.0: + balance_used = compute_units * COMPUTE_UNIT_PRICE_BASIS / 3600.0 + balance_used -= balance_used * discount + + # Don't charge for private instances. + if ( + not chute.public + and not has_legacy_private_billing(chute) + and chute.user_id != await chutes_user_id() + ): + balance_used = 0 + + if metrics is None: + metrics = {} + metrics["b"] = balance_used + + # Store invocation record. + asyncio.create_task( + safe_store_invocation( + parent_invocation_id, + invocation_id, + chute.chute_id, + chute.user_id, + "e2e_invoke", + user_id, + chute.image_id, + chute.image.user_id, + instance.instance_id, + instance.miner_uid, + instance.miner_hotkey, + duration, + multiplier, + error_message=None, + metrics=metrics, + ) + ) + + # Deduct balance. + asyncio.create_task( + update_usage_data( + user_id, + chute.chute_id, + balance_used, + metrics if chute.standard_template == "vllm" else None, + compute_time=duration, + ) + ) + + # Push back instance shutdown for private chutes. + if ( + not chute.public + and not has_legacy_private_billing(chute) + and chute.user_id != await chutes_user_id() + ): + asyncio.create_task(update_shutdown_timestamp(instance.instance_id)) + + +async def _cleanup(session, response, manager, chute_id, instance_id, conn_id): + """ + Clean up httpx response and connection tracking. + """ + if response: + try: + await response.aclose() + except Exception: + pass + # Don't close session — it's a pooled httpx client. + if manager: + try: + key = f"conn:{chute_id}:{instance_id}" + await asyncio.shield( + manager.redis_client.eval( + manager.lua_remove_connection, + 1, + key, + conn_id, + int(time.time()), + manager.connection_expiry, + ) + ) + except Exception as e: + logger.warning(f"E2E: Error cleaning up connection {conn_id}: {e}") diff --git a/api/graval_worker.py b/api/graval_worker.py index 62e4a1ff..f357a59c 100644 --- a/api/graval_worker.py +++ b/api/graval_worker.py @@ -530,8 +530,9 @@ async def check_live_code(instance: Instance) -> bool: path, _ = encrypt_instance_request("/_slurp", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", payload, + instance=instance, timeout=15.0, ) as resp: data = await resp.json() @@ -560,8 +561,9 @@ async def check_live_code(instance: Instance) -> bool: path, _ = encrypt_instance_request("/_slurp", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", payload, + instance=instance, timeout=12.0, ) as resp: data = await resp.json() diff --git a/api/instance/connection.py b/api/instance/connection.py new file mode 100644 index 00000000..c0bec16b --- /dev/null +++ b/api/instance/connection.py @@ -0,0 +1,157 @@ +"""Instance connection helpers — httpx + HTTP/2 with TLS cert verification.""" + +import ssl +import httpx +import httpcore +from cryptography import x509 +from cryptography.x509.oid import NameOID +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.serialization import load_pem_private_key + + +# Cache SSL contexts and cert CNs per instance_id. +_ssl_cache: dict[str, tuple[ssl.SSLContext, str]] = {} + +# Pooled httpx clients per instance (reuse TCP+TLS connections). +_client_cache: dict[str, httpx.AsyncClient] = {} + + +def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: + """Get or create cached SSL context + CN for an instance.""" + iid = str(instance.instance_id) + if iid in _ssl_cache: + return _ssl_cache[iid] + + ctx = ssl.create_default_context() + ctx.load_verify_locations(cadata=instance.cacert) + + # Load mTLS client cert if available. + extra = instance.extra or {} + client_cert_pem = extra.get("client_cert") + client_key_pem = extra.get("client_key") + client_key_password = extra.get("client_key_password") + if client_cert_pem and client_key_pem: + # Decrypt the client key and load into SSL context. + password_bytes = client_key_password.encode() if client_key_password else None + client_key = load_pem_private_key(client_key_pem.encode(), password=password_bytes) + # Re-serialize unencrypted (in memory only, never written to disk). + client_key_unencrypted = client_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.PKCS8, + encryption_algorithm=serialization.NoEncryption(), + ) + # Write to temporary in-memory for ssl context (load_cert_chain requires files). + import tempfile + import os + + with tempfile.NamedTemporaryFile(mode="wb", suffix=".pem", delete=False) as cf: + cf.write(client_cert_pem.encode()) + cert_tmp = cf.name + with tempfile.NamedTemporaryFile(mode="wb", suffix=".pem", delete=False) as kf: + kf.write(client_key_unencrypted) + key_tmp = kf.name + try: + ctx.load_cert_chain(certfile=cert_tmp, keyfile=key_tmp) + finally: + os.unlink(cert_tmp) + os.unlink(key_tmp) + + cert = x509.load_pem_x509_certificate(instance.cacert.encode()) + cn = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)[0].value + _ssl_cache[iid] = (ctx, cn) + return ctx, cn + + +def evict_instance_ssl(instance_id: str): + """Remove cached SSL context and client when an instance is destroyed.""" + iid = str(instance_id) + _ssl_cache.pop(iid, None) + client = _client_cache.pop(iid, None) + if client and not client.is_closed: + import asyncio + + try: + loop = asyncio.get_running_loop() + loop.create_task(client.aclose()) + except RuntimeError: + pass + + +def get_instance_url(instance, port: int | None = None) -> str: + """Build the correct URL (https with CN or http with IP) for an instance.""" + p = port or instance.port + if instance.cacert: + _, cn = _get_ssl_and_cn(instance) + return f"https://{cn}:{p}" + return f"http://{instance.host}:{p}" + + +class _InstanceNetworkBackend(httpcore.AsyncNetworkBackend): + """Resolves cert CN hostnames to instance IPs without external DNS lookups. + + httpx uses the URL hostname for TLS SNI and cert verification, then calls + connect_tcp(hostname, port) for the actual TCP connection. We intercept + connect_tcp and remap the CN hostname to the real IP. This means: + - TLS SNI = hostname (correct, matches cert CN) + - Cert verification = hostname vs cert CN (correct) + - TCP connection = actual instance IP (correct, no DNS needed) + """ + + def __init__(self, hostname: str, ip: str): + self._hostname = hostname + self._ip = ip + self._backend = httpcore.AnyIOBackend() + + async def connect_tcp(self, host, port, timeout=None, local_address=None, socket_options=None): + actual_host = self._ip if host == self._hostname else host + return await self._backend.connect_tcp( + actual_host, + port, + timeout=timeout, + local_address=local_address, + socket_options=socket_options, + ) + + async def connect_unix_socket(self, path, timeout=None, socket_options=None): + return await self._backend.connect_unix_socket( + path, + timeout=timeout, + socket_options=socket_options, + ) + + async def sleep(self, seconds): + await self._backend.sleep(seconds) + + +async def get_instance_client(instance, timeout: int = 600) -> httpx.AsyncClient: + """Get or create a pooled httpx AsyncClient for an instance (HTTP/2 if TLS).""" + iid = str(instance.instance_id) + if iid in _client_cache: + client = _client_cache[iid] + if not client.is_closed: + return client + + if instance.cacert: + ssl_ctx, cn = _get_ssl_and_cn(instance) + # Build httpcore pool with our custom resolver that maps CN → IP. + pool = httpcore.AsyncConnectionPool( + ssl_context=ssl_ctx, + http2=True, + network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), + ) + client = httpx.AsyncClient( + transport=pool, + base_url=f"https://{cn}:{instance.port}", + timeout=httpx.Timeout( + connect=10.0, read=float(timeout) if timeout else None, write=30.0, pool=10.0 + ), + ) + else: + client = httpx.AsyncClient( + base_url=f"http://{instance.host}:{instance.port}", + timeout=httpx.Timeout( + connect=10.0, read=float(timeout) if timeout else None, write=30.0, pool=10.0 + ), + ) + _client_cache[iid] = client + return client diff --git a/api/instance/router.py b/api/instance/router.py index d93c9f9a..0ab96aee 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -241,6 +241,103 @@ def _verify_rint_commitment(commitment_hex: str, expected_nonce: str) -> bool: return False +def _validate_tls_cert( + tls_cert_pem: str, tls_cert_sig_hex: str, rint_commitment_hex: str, nonce: str | None = None +) -> bool: + """Validate TLS cert signature against the aegis Ed25519 key from rint_commitment. + + For v4 commitments, verifies sign(cert_pem || nonce) using the Ed25519 pubkey + embedded at bytes 2:34 of the commitment. Also verifies the nonce is embedded + in the cert as an X.509 extension if present. + """ + from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey + from cryptography import x509 + + try: + commitment_bytes = bytes.fromhex(rint_commitment_hex) + if commitment_bytes[0] != 0x04: + logger.error("TLS cert validation: not a v4 commitment") + return False + pubkey_bytes = commitment_bytes[2:34] + + pk = Ed25519PublicKey.from_public_bytes(pubkey_bytes) + sig_bytes = bytes.fromhex(tls_cert_sig_hex) + + # Verify signature over cert_pem || nonce (nonce-bound) or just cert_pem (legacy). + signed_data = tls_cert_pem.encode() + if nonce: + signed_data += nonce.encode() + pk.verify(sig_bytes, signed_data) + + # If nonce provided, verify it's embedded in the cert as X.509 extension. + if nonce: + CHUTES_NONCE_OID = x509.ObjectIdentifier("1.3.6.1.4.1.59888.1") + cert = x509.load_pem_x509_certificate(tls_cert_pem.encode()) + try: + ext = cert.extensions.get_extension_for_oid(CHUTES_NONCE_OID) + raw = ext.value.value + # Extension value is DER: UTF8String(nonce). Parse tag+length. + if raw[0] == 0x0C: # UTF8String tag + # Short form length + if raw[1] < 0x80: + cert_nonce = raw[2 : 2 + raw[1]].decode() + elif raw[1] == 0x81: + cert_nonce = raw[3 : 3 + raw[2]].decode() + else: + cert_nonce = raw.decode() # fallback + else: + cert_nonce = raw.decode() # fallback for raw OCTET STRING + if cert_nonce != nonce: + logger.error(f"TLS cert nonce mismatch: cert={cert_nonce} expected={nonce}") + return False + except x509.ExtensionNotFound: + # Legacy cert without nonce extension — allow if sig verified. + logger.warning("TLS cert has no nonce extension, skipping nonce embedding check") + + logger.info("TLS cert signature validation successful") + return True + except Exception as e: + logger.error(f"TLS cert validation failed: {e}") + return False + + +async def _verify_instance_tls_live(host: str, port: int, expected_cert_pem: str) -> bool: + """Connect to the instance's logging port and verify the served cert matches expected.""" + import ssl + from cryptography import x509 + from cryptography.hazmat.primitives import hashes + + try: + expected_cert = x509.load_pem_x509_certificate(expected_cert_pem.encode()) + expected_fingerprint = expected_cert.fingerprint(hashes.SHA256()) + + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + + reader, writer = await asyncio.wait_for( + asyncio.open_connection(host, port, ssl=ctx), + timeout=10.0, + ) + ssl_object = writer.get_extra_info("ssl_object") + served_der = ssl_object.getpeercert(binary_form=True) + served_cert = x509.load_der_x509_certificate(served_der) + served_fingerprint = served_cert.fingerprint(hashes.SHA256()) + writer.close() + await writer.wait_closed() + + if served_fingerprint != expected_fingerprint: + logger.warning( + f"TLS cert mismatch: served {served_fingerprint.hex()} != expected {expected_fingerprint.hex()}" + ) + return False + logger.info(f"Live TLS cert verification passed for {host}:{port}") + return True + except Exception as e: + logger.warning(f"Failed to verify TLS cert live at {host}:{port}: {e}") + return False + + async def _load_chute(db, chute_id: str) -> Chute: chute = ( (await db.execute(select(Chute).where(Chute.chute_id == chute_id))) @@ -871,9 +968,18 @@ async def _validate_launch_config_instance( f"{log_prefix} aegis-verify hash challenge success: {launch_config.config_id=} {args.netnanny_hash=}" ) else: - # aegis-verify .so not available, allow through (commitment already verified) - logger.warning( - f"{log_prefix} aegis-verify not available, skipping hash verification" + # aegis-verify .so must be deployed for v4 instances — hard fail. + logger.error( + f"{log_prefix} aegis-verify library not available, cannot verify v4 instance" + ) + launch_config.failed_at = func.now() + launch_config.verification_error = ( + "aegis-verify library not available for v4 verification" + ) + await db.commit() + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="aegis-verify library not available, cannot verify v4 instances", ) elif chute.allow_external_egress != args.egress or not args.netnanny_hash: nn_valid = False @@ -965,8 +1071,62 @@ async def _validate_launch_config_instance( detail=f"Job {launch_config.job_id} has already been claimed by another miner!", ) + # Validate TLS certificate for v4 instances (>= 0.5.5). + validated_cacert = None + is_v4 = semcomp(chute.chutes_version or "0.0.0", "0.5.5") >= 0 + tls_cert = getattr(args, "tls_cert", None) + tls_cert_sig = getattr(args, "tls_cert_sig", None) + rint_commitment = getattr(args, "rint_commitment", None) + + if is_v4 and rint_commitment and rint_commitment[:2] == "04": + if not tls_cert or not tls_cert_sig: + logger.error(f"{log_prefix} v4 instance missing tls_cert or tls_cert_sig") + launch_config.failed_at = func.now() + launch_config.verification_error = "v4 instance must provide TLS certificate" + await db.commit() + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="v4 instances must provide a TLS certificate and signature", + ) + if not _validate_tls_cert(tls_cert, tls_cert_sig, rint_commitment, launch_config.nonce): + logger.error(f"{log_prefix} TLS cert signature validation failed") + launch_config.failed_at = func.now() + launch_config.verification_error = "TLS certificate signature validation failed" + await db.commit() + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="TLS certificate signature validation failed", + ) + validated_cacert = tls_cert + + # Live TLS verification: connect to logging port and verify served cert. + log_port_mapping = next((p for p in args.port_mappings if p.internal_port == 8001), None) + if log_port_mapping: + live_ok = await _verify_instance_tls_live( + args.host, log_port_mapping.external_port, tls_cert + ) + if not live_ok: + logger.error(f"{log_prefix} live TLS cert verification failed") + launch_config.failed_at = func.now() + launch_config.verification_error = "Live TLS certificate verification failed" + await db.commit() + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Live TLS certificate verification failed — cert mismatch or unreachable", + ) + # Create the instance now that we've verified the envdump/k8s env. node_selector = NodeSelector(**chute.node_selector) + extra_fields = { + "e2e_pubkey": getattr(args, "e2e_pubkey", None), + } + # Store mTLS client cert + key for API-to-instance connections. + tls_client_cert = getattr(args, "tls_client_cert", None) + if tls_client_cert: + extra_fields["client_cert"] = tls_client_cert + extra_fields["client_key"] = getattr(args, "tls_client_key", None) + extra_fields["client_key_password"] = getattr(args, "tls_client_key_password", None) + instance = Instance( instance_id=new_instance_id, host=args.host, @@ -988,19 +1148,11 @@ async def _validate_launch_config_instance( hourly_rate=(await node_selector.current_estimated_price())["usd"]["hour"], inspecto=getattr(args, "inspecto", None), env_creation=args.model_dump(), - rint_commitment=getattr(args, "rint_commitment", None), + rint_commitment=rint_commitment, rint_nonce=getattr(args, "rint_nonce", None), rint_pubkey=getattr(args, "rint_pubkey", None), - extra={ - k: v - for k, v in { - "tls_cert": getattr(args, "tls_cert", None), - "tls_cert_sig": getattr(args, "tls_cert_sig", None), - "e2e_pubkey": getattr(args, "e2e_pubkey", None), - }.items() - if v is not None - } - or None, + cacert=validated_cacert, + extra={k: v for k, v in extra_fields.items() if v is not None} or None, ) if launch_config.job_id or ( not chute.public @@ -2375,15 +2527,38 @@ async def _stream(): log_port = next(p for p in instance.port_mappings if p["internal_port"] == 8001)[ "external_port" ] - async with miner_client.get( - instance.miner_hotkey, - f"http://{instance.host}:{log_port}/logs/stream", - timeout=0, - purpose="chutes", - params={"backfill": str(backfill)}, - ) as resp: - async for chunk in resp.content: - yield chunk + # Build a temporary client for the log port (different from main port). + import httpx as _httpx + + if instance.cacert: + from api.instance.connection import _get_ssl_and_cn, _InstanceTransport + + ssl_ctx, cn = _get_ssl_and_cn(instance) + transport = _InstanceTransport(hostname=cn, ip=instance.host, ssl_context=ssl_ctx) + client = _httpx.AsyncClient( + transport=transport, + base_url=f"https://{cn}:{log_port}", + timeout=_httpx.Timeout(connect=10.0, read=None, write=30.0, pool=10.0), + http2=True, + ) + else: + client = _httpx.AsyncClient( + base_url=f"http://{instance.host}:{log_port}", + timeout=_httpx.Timeout(connect=10.0, read=None, write=30.0, pool=10.0), + ) + + headers, _ = miner_client.sign_request(instance.miner_hotkey, purpose="chutes") + try: + async with client.stream( + "GET", + "/logs/stream", + headers=headers, + params={"backfill": str(backfill)}, + ) as resp: + async for chunk in resp.aiter_bytes(): + yield chunk + finally: + await client.aclose() return StreamingResponse( _stream(), @@ -2476,6 +2651,11 @@ async def delete_instance( {"instance_id": instance_id, "penalty": compute_multiplier_penalty}, ) + # Evict cached SSL context and httpx client for this instance. + from api.instance.connection import evict_instance_ssl + + evict_instance_ssl(instance_id) + await db.delete(instance) # Update instance audit table. diff --git a/api/instance/schemas.py b/api/instance/schemas.py index 24eabe36..6d2e2687 100644 --- a/api/instance/schemas.py +++ b/api/instance/schemas.py @@ -65,6 +65,9 @@ class LaunchConfigArgs(BaseModel): rint_pubkey: Optional[str] = None tls_cert: Optional[str] = None tls_cert_sig: Optional[str] = None + tls_client_cert: Optional[str] = None + tls_client_key: Optional[str] = None + tls_client_key_password: Optional[str] = None e2e_pubkey: Optional[str] = None cllmv_session_init: Optional[str] = None env: str diff --git a/api/job/router.py b/api/job/router.py index 8e2ccd7c..fb681c2b 100644 --- a/api/job/router.py +++ b/api/job/router.py @@ -243,8 +243,9 @@ async def delete_job( path, _ = encrypt_instance_request("/_shutdown", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=30.0, ) as resp: resp.raise_for_status() diff --git a/api/main.py b/api/main.py index 5aeb044c..cdb43a4f 100644 --- a/api/main.py +++ b/api/main.py @@ -37,6 +37,7 @@ from api.server.router import router as servers_router from api.misc.router import router as misc_router from api.idp.router import router as idp_router +from api.e2e.router import router as e2e_router from api.chute.util import chute_id_by_slug from api.database import Base, engine, get_session from api.config import settings @@ -195,6 +196,7 @@ async def lifespan(_: FastAPI): default_router.include_router(misc_router, prefix="/misc", tags=["Miscellaneous"]) default_router.include_router(servers_router, prefix="/servers", tags=["Servers"]) default_router.include_router(idp_router, prefix="/idp", tags=["Identity Provider"]) +default_router.include_router(e2e_router, prefix="/e2e", tags=["E2E Encryption"]) # Do not use app for this, else middleware picks it up diff --git a/api/miner_client.py b/api/miner_client.py index 6487b9ae..ef451c3f 100644 --- a/api/miner_client.py +++ b/api/miner_client.py @@ -2,9 +2,9 @@ Helper to send requests to miners. """ -import aiohttp import hashlib import time +import httpx import orjson as json from contextlib import asynccontextmanager from typing import Any, Dict @@ -68,40 +68,131 @@ def sign_request(miner_ss58: str, payload: Dict[str, Any] | str | None = None, p return headers, payload_string +class _HttpxResponseWrapper: + """Wraps an httpx.Response to provide aiohttp-compatible attribute access. + + This enables gradual migration — callers can use either style: + - response.status (aiohttp) or response.status_code (httpx) + - await response.text() or response.text (httpx) + - await response.json() or response.json() (httpx) + """ + + def __init__(self, response: httpx.Response): + self._response = response + + @property + def status(self) -> int: + return self._response.status_code + + @property + def status_code(self) -> int: + return self._response.status_code + + @property + def headers(self): + return self._response.headers + + @property + def content(self): + return self._response.content + + async def text(self) -> str: + return self._response.text + + async def json(self): + return self._response.json() + + async def read(self) -> bytes: + return self._response.content + + def raise_for_status(self): + self._response.raise_for_status() + + def close(self): + pass + + def __getattr__(self, name): + return getattr(self._response, name) + + @asynccontextmanager -async def post(miner_ss58: str, url: str, payload: Dict[str, Any], **kwargs): +async def post(miner_ss58: str, url: str, payload: Dict[str, Any], instance=None, **kwargs): """ Perform a post request to a miner. """ - async with aiohttp.ClientSession() as session: - headers = kwargs.pop("headers", {}) - new_headers, payload_data = sign_request(miner_ss58, payload=payload) - headers.update(new_headers) - async with session.post(url, data=payload_data, headers=headers, **kwargs) as response: - yield response + headers = kwargs.pop("headers", {}) + new_headers, payload_data = sign_request(miner_ss58, payload=payload) + headers.update(new_headers) + timeout_val = kwargs.pop("timeout", 600) + kwargs.pop("params", None) # httpx uses params kwarg natively + + if instance: + from api.instance.connection import get_instance_client + + client = await get_instance_client( + instance, timeout=int(timeout_val) if timeout_val else 600 + ) + response = await client.post(url, content=payload_data, headers=headers) + yield _HttpxResponseWrapper(response) + else: + timeout = httpx.Timeout( + connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 + ) + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.post(url, content=payload_data, headers=headers) + yield _HttpxResponseWrapper(response) @asynccontextmanager -async def patch(miner_ss58: str, url: str, payload: Dict[str, Any], **kwargs): +async def patch(miner_ss58: str, url: str, payload: Dict[str, Any], instance=None, **kwargs): """ Perform a patch request to a miner. """ - async with aiohttp.ClientSession() as session: - headers = kwargs.pop("headers", {}) - new_headers, payload_data = sign_request(miner_ss58, payload=payload) - headers.update(new_headers) - async with session.patch(url, data=payload_data, headers=headers, **kwargs) as response: - yield response + headers = kwargs.pop("headers", {}) + new_headers, payload_data = sign_request(miner_ss58, payload=payload) + headers.update(new_headers) + timeout_val = kwargs.pop("timeout", 600) + + if instance: + from api.instance.connection import get_instance_client + + client = await get_instance_client( + instance, timeout=int(timeout_val) if timeout_val else 600 + ) + response = await client.patch(url, content=payload_data, headers=headers) + yield _HttpxResponseWrapper(response) + else: + timeout = httpx.Timeout( + connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 + ) + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.patch(url, content=payload_data, headers=headers) + yield _HttpxResponseWrapper(response) @asynccontextmanager -async def get(miner_ss58: str, url: str, purpose: str, **kwargs): +async def get(miner_ss58: str, url: str, purpose: str, instance=None, **kwargs): """ Perform a get request to a miner. """ - async with aiohttp.ClientSession() as session: - headers = kwargs.pop("headers", {}) - new_headers, payload_data = sign_request(miner_ss58, purpose=purpose) - headers.update(new_headers) - async with session.get(url, headers=headers, **kwargs) as response: - yield response + headers = kwargs.pop("headers", {}) + new_headers, _ = sign_request(miner_ss58, purpose=purpose) + headers.update(new_headers) + timeout_val = kwargs.pop("timeout", 600) + params = kwargs.pop("params", None) + + if instance: + from api.instance.connection import get_instance_client + + client = await get_instance_client( + instance, timeout=int(timeout_val) if timeout_val else 600 + ) + response = await client.get(url, headers=headers, params=params) + yield _HttpxResponseWrapper(response) + else: + timeout = httpx.Timeout( + connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 + ) + async with httpx.AsyncClient(timeout=timeout) as client: + response = await client.get(url, headers=headers, params=params) + yield _HttpxResponseWrapper(response) diff --git a/pyproject.toml b/pyproject.toml index 561b0e45..257eb0c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,6 +54,7 @@ dependencies = [ "httptools>=0.7.1", "bittensor-drand>=1.2.0", "pyyaml>=6.0.1,<7.0.0", + "httpx[http2]>=0.28.0,<0.29.0", ] # Dev dependencies go under optional-dependencies diff --git a/uv.lock b/uv.lock index 62bc303e..c90e86e5 100644 --- a/uv.lock +++ b/uv.lock @@ -804,6 +804,7 @@ dependencies = [ { name = "fastapi" }, { name = "fickling" }, { name = "httptools" }, + { name = "httpx", extra = ["http2"] }, { name = "huggingface-hub" }, { name = "jinja2" }, { name = "loguru" }, @@ -862,6 +863,7 @@ requires-dist = [ { name = "fickling", specifier = ">=0.1.5,<0.2.0" }, { name = "greenlet", marker = "extra == 'dev'", specifier = ">=3.1.1,<4.0.0" }, { name = "httptools", specifier = ">=0.7.1" }, + { name = "httpx", extras = ["http2"], specifier = ">=0.28.0,<0.29.0" }, { name = "huggingface-hub", specifier = "==0.34.6" }, { name = "ipykernel", marker = "extra == 'dev'", specifier = ">=6.29.5,<7.0.0" }, { name = "jinja2", specifier = ">=3.1.6,<4.0.0" }, @@ -1407,6 +1409,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "h2" +version = "4.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hpack" }, + { name = "hyperframe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026, upload-time = "2025-08-23T18:12:19.778Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779, upload-time = "2025-08-23T18:12:17.779Z" }, +] + [[package]] name = "hf-xet" version = "1.2.0" @@ -1422,6 +1437,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, ] +[[package]] +name = "hpack" +version = "4.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276, upload-time = "2025-01-22T21:44:58.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357, upload-time = "2025-01-22T21:44:56.92Z" }, +] + +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + [[package]] name = "httptools" version = "0.7.1" @@ -1451,6 +1488,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6d/de/40a8f202b987d43afc4d54689600ff03ce65680ede2f31df348d7f368b8f/httptools-0.7.1-cp312-cp312-win_amd64.whl", hash = "sha256:3e14f530fefa7499334a79b0cf7e7cd2992870eb893526fb097d51b4f2d0f321", size = 86694, upload-time = "2025-10-10T03:54:45.923Z" }, ] +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + +[package.optional-dependencies] +http2 = [ + { name = "h2" }, +] + [[package]] name = "huggingface-hub" version = "0.34.6" @@ -1470,6 +1527,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/92/1e/4157be4835fd0c064ca4c1a2cea577b3b33defa4b677ed7119372244357a/huggingface_hub-0.34.6-py3-none-any.whl", hash = "sha256:3387ec9045f9dc5b5715e4e7392c25b0d23fd539eb925111a1b301e60f2b4883", size = 562617, upload-time = "2025-09-16T08:10:49.372Z" }, ] +[[package]] +name = "hyperframe" +version = "6.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566, upload-time = "2025-01-22T21:41:49.302Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007, upload-time = "2025-01-22T21:41:47.295Z" }, +] + [[package]] name = "idna" version = "3.11" From 381aaad38787120b9897067d1ef036c4e19e63e2 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 14 Feb 2026 08:25:17 -0500 Subject: [PATCH 04/58] gzip --- api/chute/util.py | 26 +++++++++++++++++++++++--- api/e2e/router.py | 1 - api/invocation/router.py | 13 ++++++++++++- 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index dfd49069..4a2113dc 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -672,6 +672,7 @@ async def _invoke_one( metrics: dict = {}, prefixes: list = None, manager: LeastConnManager = None, + raw_payload: dict = None, ): """ Try invoking a chute/cord with a single instance. @@ -679,7 +680,6 @@ async def _invoke_one( # Call the miner's endpoint. path = path.lstrip("/") response = None - payload = {"args": args, "kwargs": kwargs} # Set the 'p' private flag on invocations. private_billing = ( @@ -690,7 +690,18 @@ async def _invoke_one( plain_path = path.lstrip("/").rstrip("/") path = "/" + path.lstrip("/") - payload, iv = await asyncio.to_thread(encrypt_instance_request, json.dumps(payload), target) + + # Version-gate payload format: >= 0.5.5 uses plain JSON + gzip, < 0.5.5 uses pickle. + if raw_payload is not None and semcomp(target.chutes_version or "0.0.0", "0.5.5") >= 0: + # >= 0.5.5: plain JSON + gzip, no pickle + payload_bytes = gzip.compress(json.dumps(raw_payload).encode()) + use_serialized = False + else: + # < 0.5.5: pickle-wrapped args/kwargs, no gzip + payload_bytes = json.dumps({"args": args, "kwargs": kwargs}) + use_serialized = True + + payload, iv = await asyncio.to_thread(encrypt_instance_request, payload_bytes, target) encrypted_path, _ = await asyncio.to_thread( encrypt_instance_request, path.ljust(24, "?"), target, True ) @@ -713,7 +724,8 @@ async def _invoke_one( try: session = await get_miner_session(target, timeout=timeout) headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) - headers["X-Chutes-Serialized"] = "true" + if use_serialized: + headers["X-Chutes-Serialized"] = "true" if stream: # Use streaming request for streaming responses. @@ -767,6 +779,8 @@ async def _invoke_one( cllmv_verified = False async for raw_chunk in stream_response.aiter_bytes(): chunk = await asyncio.to_thread(decrypt_instance_response, raw_chunk, target, iv) + if not use_serialized: + chunk = gzip.decompress(chunk) # Track time to first token and (approximate) token count; approximate # here because in speculative decoding multiple tokens may be returned. @@ -994,6 +1008,8 @@ async def _invoke_one( plaintext = await asyncio.to_thread( decrypt_instance_response, response_data["json"], target, iv ) + if not use_serialized: + plaintext = gzip.decompress(plaintext) if chute.standard_template == "vllm" and plaintext.startswith( b'{"object":"error","message":"input_ids cannot be empty."' ): @@ -1013,6 +1029,8 @@ async def _invoke_one( plaintext = await asyncio.to_thread( decrypt_instance_response, response_data["body"], target, iv ) + if not use_serialized: + plaintext = gzip.decompress(plaintext) headers = response_data["headers"] data = { "content_type": response_data.get( @@ -1246,6 +1264,7 @@ async def invoke( metrics: dict = {}, request: Request = None, prefixes: list = None, + raw_payload: dict = None, ): """ Helper to actual perform function invocations, retrying when a target fails. @@ -1329,6 +1348,7 @@ async def invoke( metrics, prefixes, manager, + raw_payload, ): try: if "input_ids cannot be empty" in str(data): diff --git a/api/e2e/router.py b/api/e2e/router.py index 865e0b93..42d77a47 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -239,7 +239,6 @@ async def e2e_invoke( headers, payload_string = sign_request( miner_ss58=instance.miner_hotkey, payload=encrypted_payload ) - headers["X-Chutes-Serialized"] = "true" headers["X-E2E-Encrypted"] = "true" if is_stream: headers["X-E2E-Stream"] = "true" diff --git a/api/invocation/router.py b/api/invocation/router.py index 1a7e2ef5..d17d58e3 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -555,7 +555,16 @@ async def _invoke( # Load prompt prefixes so we can do more intelligent routing. prefix_hashes = get_prompt_prefix_hashes(request_body) - if chute.standard_template in ("vllm", "tei") or selected_cord.get("passthrough", False): + is_passthrough = chute.standard_template in ("vllm", "tei") or selected_cord.get( + "passthrough", False + ) + if is_passthrough: + raw_payload = {"json": request_body, "params": request_params} + else: + raw_payload = request_body + + # Keep pickle for < 0.5.5 backwards compat + if is_passthrough: request_body = {"json": request_body, "params": request_params} args = base64.b64encode(gzip.compress(pickle.dumps(tuple()))).decode() kwargs = base64.b64encode(gzip.compress(pickle.dumps(request_body))).decode() @@ -621,6 +630,7 @@ async def _buffered_stream_response(): metrics=metrics, request=request, prefixes=prefix_hashes, + raw_payload=raw_payload, ): if include_trace: if not first_chunk_processed: @@ -741,6 +751,7 @@ async def _stream_with_first_chunk(): metrics=metrics, request=request, prefixes=prefix_hashes, + raw_payload=raw_payload, ): if response: continue From 2da8b1ff49d1a06d2751908aea6ff3f0197cc891 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 14 Feb 2026 13:26:45 -0500 Subject: [PATCH 05/58] Better disconnect handling. --- api/chute/util.py | 30 ++++++++++++++++++++++++++++++ api/e2e/router.py | 10 ++++++++++ api/invocation/router.py | 1 + 3 files changed, 41 insertions(+) diff --git a/api/chute/util.py b/api/chute/util.py index 4a2113dc..b558c76e 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -673,6 +673,7 @@ async def _invoke_one( prefixes: list = None, manager: LeastConnManager = None, raw_payload: dict = None, + request: Request = None, ): """ Try invoking a chute/cord with a single instance. @@ -777,6 +778,8 @@ async def _invoke_one( any_chunks = False chunk_idx = 0 cllmv_verified = False + last_usage = None + disconnect_chunk_check = 0 async for raw_chunk in stream_response.aiter_bytes(): chunk = await asyncio.to_thread(decrypt_instance_response, raw_chunk, target, iv) if not use_serialized: @@ -909,10 +912,36 @@ async def _invoke_one( raise InvalidCLLMV( f"BAD_RESPONSE {target.instance_id=} {chute.name=} returned invalid chunk (failed cllmv check)" ) + # Track running usage from continuous_usage_stats. + if isinstance(data, dict) and "usage" in data and data["usage"]: + last_usage = data["usage"] + last_chunk = chunk if b"data:" in chunk: any_chunks = True + # Periodic disconnect check (every 5 chunks). + disconnect_chunk_check += 1 + if request and disconnect_chunk_check % 5 == 0: + if await request.is_disconnected(): + logger.info( + f"Client disconnected mid-stream for {chute.name} " + f"{target.instance_id=}, populating partial metrics" + ) + if last_usage and metrics is not None: + metrics["it"] = last_usage.get("prompt_tokens", 0) + metrics["ot"] = last_usage.get("completion_tokens", 0) + metrics["ct"] = (last_usage.get("prompt_tokens_details") or {}).get( + "cached_tokens", 0 + ) + total_time = time.time() - started_at + metrics["tt"] = round(total_time, 3) + ot = metrics["ot"] or 1 + metrics["tps"] = round(ot / total_time, 3) + metrics["ctps"] = round((metrics["it"] + ot) / total_time, 3) + await stream_response.aclose() + return + yield chunk.decode() if chute.standard_template == "vllm" and plain_path in LLM_PATHS and metrics: @@ -1349,6 +1378,7 @@ async def invoke( prefixes, manager, raw_payload, + request, ): try: if "input_ids cannot be empty" in str(data): diff --git a/api/e2e/router.py b/api/e2e/router.py index 42d77a47..cee752e6 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -365,6 +365,7 @@ async def _stream_e2e_response( Stream E2E response chunks, extracting usage events for billing. """ metrics = {} + chunk_count = 0 try: async for raw_chunk in response.aiter_bytes(): # Transport-decrypt each chunk. @@ -399,6 +400,15 @@ async def _stream_e2e_response( except Exception: pass + # Periodic disconnect check (every 5 chunks). + chunk_count += 1 + if chunk_count % 5 == 0 and await request.is_disconnected(): + logger.info( + f"E2E client disconnected mid-stream for {chute.name} {instance.instance_id=}" + ) + await response.aclose() + break + # Relay the decrypted chunk to client as-is. yield decrypted diff --git a/api/invocation/router.py b/api/invocation/router.py index d17d58e3..af3defc2 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -507,6 +507,7 @@ async def _invoke( request_body["stream_options"] = {} if not request_body["stream_options"].get("include_usage"): request_body["stream_options"]["include_usage"] = True + request_body["stream_options"]["continuous_usage_stats"] = True if request_body.get("logprobs"): if not request_body.get("top_logprobs"): request_body["top_logprobs"] = 1 From 70be33e1754c0b81400a4b46cefd45ac873eb95e Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 15 Feb 2026 05:07:16 -0500 Subject: [PATCH 06/58] stricter epoch validation for _rint --- watchtower.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/watchtower.py b/watchtower.py index 24261760..0b7eab58 100644 --- a/watchtower.py +++ b/watchtower.py @@ -1589,9 +1589,12 @@ async def check_runint(instance: Instance) -> bool: ) return False - # Check epoch is advancing (detect replay attacks) + # Check epoch is advancing (detect replay attacks and ticker-stall) epoch_key = f"rint_epoch:{instance.instance_id}" + epoch_ts_key = f"rint_epoch_ts:{instance.instance_id}" last_epoch = await settings.redis_client.get(epoch_key) + last_ts = await settings.redis_client.get(epoch_ts_key) + now = time.time() if last_epoch is not None: last_epoch = int(last_epoch) if epoch < last_epoch: @@ -1600,7 +1603,30 @@ async def check_runint(instance: Instance) -> bool: f"epoch went backwards: {epoch} < {last_epoch}" ) return False + if epoch == last_epoch: + logger.error( + f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " + f"epoch stalled (ticker frozen?): {epoch} == {last_epoch}" + ) + return False + # Ticker runs at 10 Hz, so epoch should advance ~10 per second. + # Check that it advanced at least 10% of expected to catch + # severe stalls while being very generous with tolerance. + if last_ts is not None: + elapsed = now - float(last_ts) + if elapsed > 30.0: + expected_advance = elapsed * 10 # _BUF_EPOCH_RATE = 10 + actual_advance = epoch - last_epoch + rate = actual_advance / expected_advance if expected_advance > 0 else 1.0 + if rate < 0.10: + logger.error( + f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " + f"epoch advancing too slowly: {actual_advance} in {elapsed:.0f}s " + f"(expected ~{expected_advance:.0f}, rate={rate:.2%})" + ) + return False await settings.redis_client.set(epoch_key, str(epoch), ex=86400) + await settings.redis_client.set(epoch_ts_key, str(now), ex=86400) logger.success( f"RUNINT: {instance.instance_id=} {instance.miner_hotkey=} " From d4961a7294306768bf4b698807385390e5aab9bf Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Wed, 18 Feb 2026 12:05:44 -0500 Subject: [PATCH 07/58] Ensure no connection re-use for non-https. --- api/chute/util.py | 43 ++++++++++++++++++++++++++------------ api/e2e/router.py | 19 ++++++++++++----- api/instance/connection.py | 27 ++++++++++++++++++------ api/miner_client.py | 39 ++++++++++++++++++++++++++-------- 4 files changed, 95 insertions(+), 33 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index b558c76e..e9e58357 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -234,9 +234,13 @@ async def safe_store_invocation(*args, **kwargs): logger.error(f"SAFE_STORE_INVOCATION: failed to insert new invocation record: {str(exc)}") -async def get_miner_session(instance: Instance, timeout: int = 600): +async def get_miner_session( + instance: Instance, timeout: int = 600 +) -> tuple[httpx.AsyncClient, bool]: """ Get or create an httpx client for an instance (with TLS if available). + + Returns (client, pooled) — caller must close the client when done if not pooled. """ from api.instance.connection import get_instance_client @@ -722,8 +726,9 @@ async def _invoke_one( timeout = 600 elif semcomp(target.chutes_version or "0.0.0", "0.4.2") < 0: timeout = 900 + pooled = True try: - session = await get_miner_session(target, timeout=timeout) + session, pooled = await get_miner_session(target, timeout=timeout) headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) if use_serialized: headers["X-Chutes-Serialized"] = "true" @@ -1267,6 +1272,11 @@ async def _invoke_one( await stream_response.aclose() except Exception: pass + if not pooled: + try: + await session.aclose() + except Exception: + pass async def _s3_upload(data: io.BytesIO, path: str): @@ -1770,17 +1780,24 @@ async def load_llm_details(chute, target): } payload, iv = await asyncio.to_thread(encrypt_instance_request, json.dumps(payload), target) - session = await get_miner_session(target, timeout=60) - headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) - headers["X-Chutes-Serialized"] = "true" - resp = await session.post(f"/{path}", content=payload_string, headers=headers) - resp.raise_for_status() - raw_data = resp.json() - logger.info(f"{target.chute_id=} {target.instance_id=} {target.miner_hotkey=}: {raw_data=}") - info = json.loads( - await asyncio.to_thread(decrypt_instance_response, raw_data["json"], target, iv) - ) - return info["data"][0] + session, pooled = await get_miner_session(target, timeout=60) + try: + headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) + headers["X-Chutes-Serialized"] = "true" + resp = await session.post(f"/{path}", content=payload_string, headers=headers) + resp.raise_for_status() + raw_data = resp.json() + logger.info(f"{target.chute_id=} {target.instance_id=} {target.miner_hotkey=}: {raw_data=}") + info = json.loads( + await asyncio.to_thread(decrypt_instance_response, raw_data["json"], target, iv) + ) + return info["data"][0] + finally: + if not pooled: + try: + await session.aclose() + except Exception: + pass async def get_mtoken_price(user_id: str, chute_id: str) -> tuple[float, float, float]: diff --git a/api/e2e/router.py b/api/e2e/router.py index cee752e6..dc28e7a0 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -232,10 +232,11 @@ async def e2e_invoke( started_at = time.time() session = None + pooled = True response = None try: # Send to instance. - session = await get_miner_session(instance, timeout=1800) + session, pooled = await get_miner_session(instance, timeout=1800) headers, payload_string = sign_request( miner_ss58=instance.miner_hotkey, payload=encrypted_payload ) @@ -304,6 +305,7 @@ async def e2e_invoke( manager, conn_id, request, + pooled, ), media_type="text/event-stream", ) @@ -344,7 +346,7 @@ async def e2e_invoke( finally: if not is_stream: # For streaming, cleanup happens in the generator. - await _cleanup(session, response, manager, chute_id, instance_id, conn_id) + await _cleanup(session, response, manager, chute_id, instance_id, conn_id, pooled) async def _stream_e2e_response( @@ -360,6 +362,7 @@ async def _stream_e2e_response( manager, conn_id, request, + pooled, ): """ Stream E2E response chunks, extracting usage events for billing. @@ -438,7 +441,9 @@ async def _stream_e2e_response( logger.error(f"E2E stream error: {exc}\n{traceback.format_exc()}") raise finally: - await _cleanup(session, response, manager, chute.chute_id, instance.instance_id, conn_id) + await _cleanup( + session, response, manager, chute.chute_id, instance.instance_id, conn_id, pooled + ) async def _do_billing( @@ -536,7 +541,7 @@ async def _do_billing( asyncio.create_task(update_shutdown_timestamp(instance.instance_id)) -async def _cleanup(session, response, manager, chute_id, instance_id, conn_id): +async def _cleanup(session, response, manager, chute_id, instance_id, conn_id, pooled=True): """ Clean up httpx response and connection tracking. """ @@ -545,7 +550,11 @@ async def _cleanup(session, response, manager, chute_id, instance_id, conn_id): await response.aclose() except Exception: pass - # Don't close session — it's a pooled httpx client. + if not pooled and session: + try: + await session.aclose() + except Exception: + pass if manager: try: key = f"conn:{chute_id}:{instance_id}" diff --git a/api/instance/connection.py b/api/instance/connection.py index c0bec16b..b898beff 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -7,6 +7,7 @@ from cryptography.x509.oid import NameOID from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.serialization import load_pem_private_key +from api.util import semcomp # Cache SSL contexts and cert CNs per instance_id. @@ -16,6 +17,11 @@ _client_cache: dict[str, httpx.AsyncClient] = {} +def _should_pool(instance) -> bool: + """Only pool/cache when the instance has TLS (cacert) AND chutes_version >= 0.5.5.""" + return bool(instance.cacert) and semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0 + + def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: """Get or create cached SSL context + CN for an instance.""" iid = str(instance.instance_id) @@ -123,13 +129,19 @@ async def sleep(self, seconds): await self._backend.sleep(seconds) -async def get_instance_client(instance, timeout: int = 600) -> httpx.AsyncClient: - """Get or create a pooled httpx AsyncClient for an instance (HTTP/2 if TLS).""" +async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.AsyncClient, bool]: + """Get or create an httpx AsyncClient for an instance. + + Returns (client, pooled) — caller must close the client when done if not pooled. + Only HTTPS instances with chutes_version >= 0.5.5 are pooled (HTTP/2 multiplexing). + """ + pooled = _should_pool(instance) iid = str(instance.instance_id) - if iid in _client_cache: + + if pooled and iid in _client_cache: client = _client_cache[iid] if not client.is_closed: - return client + return client, True if instance.cacert: ssl_ctx, cn = _get_ssl_and_cn(instance) @@ -153,5 +165,8 @@ async def get_instance_client(instance, timeout: int = 600) -> httpx.AsyncClient connect=10.0, read=float(timeout) if timeout else None, write=30.0, pool=10.0 ), ) - _client_cache[iid] = client - return client + + if pooled: + _client_cache[iid] = client + + return client, pooled diff --git a/api/miner_client.py b/api/miner_client.py index ef451c3f..182b40b8 100644 --- a/api/miner_client.py +++ b/api/miner_client.py @@ -129,11 +129,18 @@ async def post(miner_ss58: str, url: str, payload: Dict[str, Any], instance=None if instance: from api.instance.connection import get_instance_client - client = await get_instance_client( + client, pooled = await get_instance_client( instance, timeout=int(timeout_val) if timeout_val else 600 ) - response = await client.post(url, content=payload_data, headers=headers) - yield _HttpxResponseWrapper(response) + try: + response = await client.post(url, content=payload_data, headers=headers) + yield _HttpxResponseWrapper(response) + finally: + if not pooled: + try: + await client.aclose() + except Exception: + pass else: timeout = httpx.Timeout( connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 @@ -156,11 +163,18 @@ async def patch(miner_ss58: str, url: str, payload: Dict[str, Any], instance=Non if instance: from api.instance.connection import get_instance_client - client = await get_instance_client( + client, pooled = await get_instance_client( instance, timeout=int(timeout_val) if timeout_val else 600 ) - response = await client.patch(url, content=payload_data, headers=headers) - yield _HttpxResponseWrapper(response) + try: + response = await client.patch(url, content=payload_data, headers=headers) + yield _HttpxResponseWrapper(response) + finally: + if not pooled: + try: + await client.aclose() + except Exception: + pass else: timeout = httpx.Timeout( connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 @@ -184,11 +198,18 @@ async def get(miner_ss58: str, url: str, purpose: str, instance=None, **kwargs): if instance: from api.instance.connection import get_instance_client - client = await get_instance_client( + client, pooled = await get_instance_client( instance, timeout=int(timeout_val) if timeout_val else 600 ) - response = await client.get(url, headers=headers, params=params) - yield _HttpxResponseWrapper(response) + try: + response = await client.get(url, headers=headers, params=params) + yield _HttpxResponseWrapper(response) + finally: + if not pooled: + try: + await client.aclose() + except Exception: + pass else: timeout = httpx.Timeout( connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 From fe60747d327702235e2ab3d6912440608566f78b Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Wed, 18 Feb 2026 14:44:03 -0500 Subject: [PATCH 08/58] model aliasing/failover --- api/database/orms.py | 1 + api/invocation/router.py | 83 ++++-- api/main.py | 2 + .../20260218120000_model_aliases.sql | 15 ++ api/model_alias/__init__.py | 0 api/model_alias/router.py | 92 +++++++ api/model_alias/schemas.py | 56 +++++ api/model_routing.py | 238 ++++++++++++++++++ 8 files changed, 461 insertions(+), 26 deletions(-) create mode 100644 api/migrations/20260218120000_model_aliases.sql create mode 100644 api/model_alias/__init__.py create mode 100644 api/model_alias/router.py create mode 100644 api/model_alias/schemas.py create mode 100644 api/model_routing.py diff --git a/api/database/orms.py b/api/database/orms.py index 7536ec63..e03ce1fa 100644 --- a/api/database/orms.py +++ b/api/database/orms.py @@ -16,3 +16,4 @@ import api.node.events # noqa: F401 import api.server.schemas # noqa: F401 import api.idp.schemas # noqa: F401 +import api.model_alias.schemas # noqa: F401 diff --git a/api/invocation/router.py b/api/invocation/router.py index 0e8eca60..d2189bd4 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -991,6 +991,7 @@ async def hostname_invocation( model = payload.get("model") chute = None + fallback_chutes = [] template = ( "vllm" if "llm" in request.state.chute_id @@ -999,35 +1000,65 @@ async def hostname_invocation( else "diffusion" ) if model: - if (chute := await get_one(model)) is None: - raise HTTPException( - status_code=status.HTTP_404_NOT_FOUND, - detail=f"model not found: {model}", - ) - if chute.standard_template != template or ( - not chute.public - and ( - chute.user_id != current_user.user_id - and not await is_shared(chute.chute_id, current_user.user_id) - ) - and not subnet_role_accessible(chute, current_user) - ): + from api.model_routing import resolve_model_parameter + + ranked_chutes, routing_mode = await resolve_model_parameter( + model, current_user.user_id, template + ) + if not ranked_chutes: raise HTTPException( status_code=status.HTTP_404_NOT_FOUND, detail=f"model not found: {model}", ) - request.state.chute_id = chute.chute_id - request.state.auth_object_id = chute.chute_id - - # # Model disabled temporarily? - # if ( - # await settings.redis_client.get(f"model_disabled:{request.state.chute_id}") - # and current_user.user_id != "dff3e6bb-3a6b-5a2b-9c48-da3abcd5ca5f" - # ): - # logger.warning(f"MODEL DISABLED: {request.state.chute_id}") - # raise HTTPException( - # status_code=status.HTTP_429_TOO_MANY_REQUESTS, - # detail="model is under maintenance", - # ) + chute = ranked_chutes[0] + fallback_chutes = ranked_chutes[1:] + if fallback_chutes or routing_mode: + payload["model"] = chute.name + + if chute is not None: + if chute.standard_template != template or ( + not chute.public + and ( + chute.user_id != current_user.user_id + and not await is_shared(chute.chute_id, current_user.user_id) + ) + and not subnet_role_accessible(chute, current_user) + ): + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail=f"model not found: {model}", + ) + request.state.chute_id = chute.chute_id + request.state.auth_object_id = chute.chute_id + + # Try invocation with cross-chute failover for multi-model routing. + if fallback_chutes: + try: + return await _invoke(request, current_user) + except HTTPException as exc: + if exc.status_code != status.HTTP_429_TOO_MANY_REQUESTS: + raise + # Try each fallback chute on infra_overload. + for fallback in fallback_chutes: + if fallback.standard_template != template or ( + not fallback.public + and ( + fallback.user_id != current_user.user_id + and not await is_shared(fallback.chute_id, current_user.user_id) + ) + and not subnet_role_accessible(fallback, current_user) + ): + continue + request.state.chute_id = fallback.chute_id + request.state.auth_object_id = fallback.chute_id + payload["model"] = fallback.name + try: + return await _invoke(request, current_user) + except HTTPException as inner_exc: + if inner_exc.status_code != status.HTTP_429_TOO_MANY_REQUESTS: + raise + continue + # All chutes exhausted. + raise return await _invoke(request, current_user) diff --git a/api/main.py b/api/main.py index d33e96fa..fb5e46fc 100644 --- a/api/main.py +++ b/api/main.py @@ -37,6 +37,7 @@ from api.server.router import router as servers_router from api.misc.router import router as misc_router from api.idp.router import router as idp_router +from api.model_alias.router import router as model_alias_router from api.chute.util import chute_id_by_slug from api.database import Base, engine, get_session from api.config import settings @@ -192,6 +193,7 @@ async def lifespan(_: FastAPI): default_router.include_router(misc_router, prefix="/misc", tags=["Miscellaneous"]) default_router.include_router(servers_router, prefix="/servers", tags=["Servers"]) default_router.include_router(idp_router, prefix="/idp", tags=["Identity Provider"]) +default_router.include_router(model_alias_router, prefix="/model_aliases", tags=["Model Aliases"]) # Do not use app for this, else middleware picks it up diff --git a/api/migrations/20260218120000_model_aliases.sql b/api/migrations/20260218120000_model_aliases.sql new file mode 100644 index 00000000..03bf1439 --- /dev/null +++ b/api/migrations/20260218120000_model_aliases.sql @@ -0,0 +1,15 @@ +-- migrate:up +CREATE TABLE IF NOT EXISTS model_aliases ( + user_id VARCHAR NOT NULL, + alias VARCHAR(64) NOT NULL, + chute_ids JSONB NOT NULL, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + PRIMARY KEY (user_id, alias), + CONSTRAINT alias_ascii_no_colon CHECK (alias ~ '^[\x21-\x39\x3B-\x7E]+$') +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_model_aliases_user_alias_lower ON model_aliases (user_id, LOWER(alias)); + +-- migrate:down +DROP TABLE IF EXISTS model_aliases; diff --git a/api/model_alias/__init__.py b/api/model_alias/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/api/model_alias/router.py b/api/model_alias/router.py new file mode 100644 index 00000000..5962c08a --- /dev/null +++ b/api/model_alias/router.py @@ -0,0 +1,92 @@ +""" +CRUD endpoints for user model aliases. +""" + +from fastapi import APIRouter, Depends, HTTPException, status +from sqlalchemy import select, delete +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.ext.asyncio import AsyncSession +from api.database import get_db_session +from api.user.schemas import User +from api.user.service import get_current_user +from api.chute.util import get_one +from api.config import settings +from api.model_alias.schemas import ModelAlias, ModelAliasCreate, ModelAliasResponse + +router = APIRouter() + + +@router.get("/", response_model=list[ModelAliasResponse]) +async def list_aliases( + db: AsyncSession = Depends(get_db_session), + current_user: User = Depends(get_current_user()), +): + result = await db.execute(select(ModelAlias).where(ModelAlias.user_id == current_user.user_id)) + return result.scalars().all() + + +@router.post("/", response_model=ModelAliasResponse, status_code=status.HTTP_201_CREATED) +async def create_or_update_alias( + body: ModelAliasCreate, + db: AsyncSession = Depends(get_db_session), + current_user: User = Depends(get_current_user()), +): + # Validate all chute_ids exist. + for cid in body.chute_ids: + chute = await get_one(cid) + if chute is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"chute not found: {cid}", + ) + + stmt = ( + insert(ModelAlias) + .values( + user_id=current_user.user_id, + alias=body.alias, + chute_ids=body.chute_ids, + ) + .on_conflict_do_update( + index_elements=["user_id", "alias"], + set_={"chute_ids": body.chute_ids, "updated_at": ModelAlias.updated_at.default.arg}, + ) + ) + await db.execute(stmt) + await db.commit() + + # Invalidate cache. + cache_key = f"malias:{current_user.user_id}:{body.alias.lower()}" + await settings.redis_client.delete(cache_key) + + # Re-fetch the row. + result = await db.execute( + select(ModelAlias).where( + ModelAlias.user_id == current_user.user_id, + ModelAlias.alias == body.alias, + ) + ) + return result.scalar_one() + + +@router.delete("/{alias}", status_code=status.HTTP_204_NO_CONTENT) +async def delete_alias( + alias: str, + db: AsyncSession = Depends(get_db_session), + current_user: User = Depends(get_current_user()), +): + result = await db.execute( + delete(ModelAlias).where( + ModelAlias.user_id == current_user.user_id, + ModelAlias.alias == alias, + ) + ) + if result.rowcount == 0: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="alias not found", + ) + await db.commit() + + cache_key = f"malias:{current_user.user_id}:{alias.lower()}" + await settings.redis_client.delete(cache_key) diff --git a/api/model_alias/schemas.py b/api/model_alias/schemas.py new file mode 100644 index 00000000..f89fad2e --- /dev/null +++ b/api/model_alias/schemas.py @@ -0,0 +1,56 @@ +""" +ORM and Pydantic models for user model aliases. +""" + +import re +from datetime import datetime +from typing import Optional +from pydantic import BaseModel, field_validator +from sqlalchemy import Column, String, DateTime, ForeignKey, func +from sqlalchemy.dialects.postgresql import JSONB +from api.database import Base + + +class ModelAlias(Base): + __tablename__ = "model_aliases" + + user_id = Column(String, ForeignKey("users.user_id", ondelete="CASCADE"), primary_key=True) + alias = Column(String(64), primary_key=True) + chute_ids = Column(JSONB, nullable=False) + created_at = Column(DateTime(timezone=True), server_default=func.now()) + updated_at = Column(DateTime(timezone=True), server_default=func.now(), onupdate=func.now()) + + +_ALIAS_PATTERN = re.compile(r"^[\x21-\x39\x3B-\x7E]+$") + + +class ModelAliasCreate(BaseModel): + alias: str + chute_ids: list[str] + + @field_validator("alias") + @classmethod + def validate_alias(cls, v: str) -> str: + if not 1 <= len(v) <= 64: + raise ValueError("alias must be 1-64 characters") + if not _ALIAS_PATTERN.match(v): + raise ValueError("alias must be ASCII printable (no spaces or colons)") + if ":latency" in v.lower() or ":throughput" in v.lower(): + raise ValueError("alias must not contain ':latency' or ':throughput'") + return v + + @field_validator("chute_ids") + @classmethod + def validate_chute_ids(cls, v: list[str]) -> list[str]: + if not 1 <= len(v) <= 20: + raise ValueError("chute_ids must have 1-20 items") + return v + + +class ModelAliasResponse(BaseModel): + alias: str + chute_ids: list[str] + created_at: Optional[datetime] = None + updated_at: Optional[datetime] = None + + model_config = {"from_attributes": True} diff --git a/api/model_routing.py b/api/model_routing.py new file mode 100644 index 00000000..27112aa2 --- /dev/null +++ b/api/model_routing.py @@ -0,0 +1,238 @@ +""" +Multi-model routing: failover, latency-based, and throughput-based selection. +""" + +import time +import pickle +from sqlalchemy import select, func +from api.config import settings +from api.chute.schemas import Chute +from api.chute.util import get_one +from api.database import get_session +from api.instance.util import load_chute_target_ids +from api.metrics.perf import otps_tracker, ptps_tracker +from api.model_alias.schemas import ModelAlias + + +ROUTING_SUFFIXES = (":latency", ":throughput") + + +def parse_model_parameter(model_str: str) -> tuple[str, str | None]: + """ + Strip :latency or :throughput suffix from model string. + Returns (model_str_without_suffix, routing_mode). + routing_mode is None (failover), "latency", or "throughput". + """ + lower = model_str.lower() + for suffix in ROUTING_SUFFIXES: + if lower.endswith(suffix): + return model_str[: -len(suffix)], suffix[1:] # strip the colon + return model_str, None + + +async def get_user_alias(user_id: str, alias: str) -> list[str] | None: + """ + Look up a user's model alias. Redis-cached with 120s TTL. + Returns ordered list of chute_ids, or None if alias doesn't exist. + """ + cache_key = f"malias:{user_id}:{alias.lower()}" + cached = await settings.redis_client.get(cache_key) + if cached is not None: + if cached == b"__none__": + return None + return pickle.loads(cached) + + async with get_session() as session: + result = await session.execute( + select(ModelAlias.chute_ids).where( + ModelAlias.user_id == user_id, + func.lower(ModelAlias.alias) == alias.lower(), + ) + ) + row = result.scalar_one_or_none() + + if row is not None: + await settings.redis_client.set(cache_key, pickle.dumps(row), ex=120) + return row + else: + await settings.redis_client.set(cache_key, b"__none__", ex=120) + return None + + +async def check_chute_availability(chute_id: str) -> bool: + """ + Lightweight check: does this chute have at least one instance with capacity? + Uses Redis connection tracking keys; falls back to load_chute_target_ids for cold chutes. + """ + instance_ids = await settings.redis_client.smembers(f"cc_inst:{chute_id}") + if not instance_ids: + nonce = int(time.time()) + nonce -= nonce % 30 + db_ids = await load_chute_target_ids(chute_id, nonce=nonce) + return len(db_ids) > 0 + + conc_raw = await settings.redis_client.get(f"cc_conc:{chute_id}") + concurrency = int(conc_raw) if conc_raw else 1 + + keys = [ + f"cc:{chute_id}:{iid.decode() if isinstance(iid, bytes) else iid}" for iid in instance_ids + ] + values = await settings.redis_client.mget(keys) + for v in values: + if int(v or 0) < concurrency: + return True + + return False + + +async def get_chute_perf(chute_id: str) -> dict[str, float | None]: + """ + Get current otps and ptps EMA values for a chute. + """ + otps_info = await otps_tracker().get_info(chute_id) + ptps_info = await ptps_tracker().get_info(chute_id) + return { + "otps": otps_info["ema"] if otps_info and otps_info.get("ready") else None, + "ptps": ptps_info["ema"] if ptps_info and ptps_info.get("ready") else None, + } + + +async def _load_chutes_map(chute_ids: list[str]) -> dict[str, Chute]: + """Load chute objects for a list of IDs/names, returning a map of id->Chute.""" + result = {} + for cid in chute_ids: + chute = await get_one(cid) + if chute is not None: + result[cid] = chute + return result + + +async def _rank_failover(chute_ids: list[str], chutes_map: dict[str, Chute]) -> list[Chute]: + """ + Failover ranking: available chutes first (in order), then at-capacity chutes + that have instances (in order). Chutes with no instances at all are excluded. + """ + available = [] + at_capacity = [] + + for cid in chute_ids: + chute = chutes_map.get(cid) + if chute is None: + continue + if await check_chute_availability(chute.chute_id): + available.append(chute) + else: + nonce = int(time.time()) + nonce -= nonce % 30 + ids = await load_chute_target_ids(chute.chute_id, nonce=nonce) + if ids: + at_capacity.append(chute) + + return available + at_capacity + + +async def _rank_by_metric( + chute_ids: list[str], chutes_map: dict[str, Chute], metric: str +) -> list[Chute]: + """ + Rank chutes by metric value (descending) among available chutes. + metric is "otps" for throughput, "ptps" for latency. + The other metric is used as tiebreaker. + Chutes without metrics are appended in original order after ranked ones. + """ + tiebreaker = "ptps" if metric == "otps" else "otps" + scored: list[tuple[float, float, Chute]] = [] + unscored: list[Chute] = [] + + for cid in chute_ids: + chute = chutes_map.get(cid) + if chute is None: + continue + if not await check_chute_availability(chute.chute_id): + continue + perf = await get_chute_perf(chute.chute_id) + score = perf.get(metric) + if score is None: + unscored.append(chute) + else: + tie = perf.get(tiebreaker) or 0.0 + scored.append((score, tie, chute)) + + scored.sort(key=lambda x: (x[0], x[1]), reverse=True) + ranked = [chute for _, _, chute in scored] + unscored + + # If nothing was available, fall back to failover ordering. + if not ranked: + return await _rank_failover(chute_ids, chutes_map) + return ranked + + +def _check_chute_access(chute: Chute, template: str, user_id: str) -> bool: + """Check that chute matches template. Access checks happen downstream.""" + return chute.standard_template == template + + +async def resolve_model_parameter( + model_str: str, user_id: str, template: str +) -> tuple[list[Chute], str | None]: + """ + Main entry point for multi-model resolution. + Returns (ranked_chutes, routing_mode). + ranked_chutes is an ordered list — caller should try each in sequence, + falling back to the next on infra_overload. + + Resolution order: + 1. Try exact get_one(model_str) first — handles names with colons/commas + 2. Strip :latency/:throughput suffix + 3. If contains comma -> comma-separated list of chute names + 4. Else try get_one(stripped) for single-chute lookup + 5. Else look up as user alias -> expand to ordered chute_ids list + """ + # 1. Always try exact match first — colons and commas can appear in real model names. + exact = await get_one(model_str) + if exact is not None and _check_chute_access(exact, template, user_id): + return [exact], None + + raw_model, routing_mode = parse_model_parameter(model_str) + + chute_ids: list[str] | None = None + + if "," in raw_model: + chute_ids = [s.strip() for s in raw_model.split(",") if s.strip()] + else: + # Try single lookup on suffix-stripped name. + if routing_mode is not None: + chute = await get_one(raw_model) + if chute is not None and _check_chute_access(chute, template, user_id): + return [chute], routing_mode + + # Try as alias. + alias_ids = await get_user_alias(user_id, raw_model) + if alias_ids is not None: + chute_ids = alias_ids + else: + return [], routing_mode + + if not chute_ids: + return [], routing_mode + + chutes_map = await _load_chutes_map(chute_ids) + + valid_ids = [ + cid + for cid in chute_ids + if cid in chutes_map and _check_chute_access(chutes_map[cid], template, user_id) + ] + if not valid_ids: + return [], routing_mode + + valid_map = {cid: chutes_map[cid] for cid in valid_ids} + + if routing_mode == "throughput": + ranked = await _rank_by_metric(valid_ids, valid_map, "otps") + elif routing_mode == "latency": + ranked = await _rank_by_metric(valid_ids, valid_map, "ptps") + else: + ranked = await _rank_failover(valid_ids, valid_map) + + return ranked, routing_mode From eef2680963a5cccddc214a5f7eaaccd601480759 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 08:16:30 +0000 Subject: [PATCH 09/58] Fixes. --- Dockerfile | 7 +------ api/image/forge.py | 8 ++++---- chute_autoscaler.py | 5 ++++- pyproject.toml | 2 +- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Dockerfile b/Dockerfile index 00596828..5d8edb35 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,11 +39,6 @@ RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/s # Set Python 3.12 as default python3 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 -# NN verification lib. -ADD data/chutes-nnverify.so /usr/local/lib/chutes-nnverify.so -ADD data/chutes-aegis-verify.so /usr/local/lib/chutes-aegis-verify.so -RUN chmod 755 /usr/local/lib/chutes-*.so - ### # FORGE ### @@ -84,7 +79,7 @@ ADD data/containers.conf /etc/containers/containers.conf RUN mkdir -p /root/build /forge /var/lib/containers # Install trivy -RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.68.2 +RUN curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.69.1 # Install cosign ENV COSIGN_VERSION=2.5.3 diff --git a/api/image/forge.py b/api/image/forge.py index 05bf9937..5d822959 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -260,8 +260,8 @@ async def _capture_logs(stream, name, capture=True): RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash COPY cfsv /cfsv USER root -RUN find / -type f -name '*.pyc' -exec rm -f {{}} || true -RUN find / -type d -name __pycache__ -exec rm -rf {{}} || true +RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true +RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes RUN uv cache clean --force RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index @@ -1108,8 +1108,8 @@ async def _capture_logs(stream, name, capture=True): RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash COPY cfsv /cfsv USER root -RUN find / -type f -name '*.pyc' -exec rm -f {{}} || true -RUN find / -type d -name __pycache__ -exec rm -rf {{}} || true +RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true +RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes RUN uv cache clean --force RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index diff --git a/chute_autoscaler.py b/chute_autoscaler.py index f212691b..186c9655 100644 --- a/chute_autoscaler.py +++ b/chute_autoscaler.py @@ -224,10 +224,13 @@ async def get_scale_down_permission( "0d7184a2-32a3-53e0-9607-058c37edaab5": 40, "722df757-203b-58df-b54b-22130fd1fc53": 20, "398651e1-5f85-5e50-a513-7c5324e8e839": 15, + "08a7a60f-6956-5a9e-9983-5603c3ac5a38": 15, "e51e818e-fa63-570d-9f68-49d7d1b4d12f": 10, - "08a7a60f-6956-5a9e-9983-5603c3ac5a38": 10, "9976df15-f78f-55ee-bfc7-89d7abbbecc8": 10, "6320ab82-9e94-5d63-8e38-d136f61dc157": 10, + "579ca543-dda4-51d0-83ef-5667d1a5ed5f": 8, + "1aec6931-3bb8-5f5e-a61c-b112276ed140": 8, + "572bd930-0438-520c-a4a4-56874be296f8": 8, "2ff25e81-4586-5ec8-b892-3a6f342693d7": 8, "8f3bb827-b9e6-5487-88bc-ee8f0c6f5810": 8, "4f82321e-3e58-55da-ba44-051686ddbfe5": 8, diff --git a/pyproject.toml b/pyproject.toml index 257eb0c7..a4245360 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-slugify[unidecode]>=8.0.4,<9.0.0", "async-lru>=2.0.5,<3.0.0", "aiodns>=3.6.0,<4.0.0", - "chutes==0.5.4rc4", + "chutes==0.5.5.rc0", "python-socketio[asyncio-client]>=5.15.0,<6.0.0", "pillow>=12.0.0,<13.0.0", "aioboto3==15.5.0", From 67a01370e2324003f5eaad319b4623040fc1b52b Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 03:53:58 -0500 Subject: [PATCH 10/58] Fixes. --- Dockerfile | 4 ++++ api/e2e/router.py | 19 ++++++++++++++++-- api/instance/router.py | 44 +++++++++++++++++++++++++++--------------- api/metrics/util.py | 7 +++---- conn_prober.py | 8 ++++---- log_prober.py | 43 ++++++++++++++++++++++++++++++++--------- uv.lock | 6 +++--- watchtower.py | 38 +++++++++++++++++++++++------------- 8 files changed, 118 insertions(+), 51 deletions(-) diff --git a/Dockerfile b/Dockerfile index 5d8edb35..2369f30a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,6 +39,10 @@ RUN curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/s # Set Python 3.12 as default python3 RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 +# NN verification lib (v3 instances). +ADD data/chutes-nnverify.so /usr/local/lib/chutes-nnverify.so +RUN chmod 755 /usr/local/lib/chutes-nnverify.so + ### # FORGE ### diff --git a/api/e2e/router.py b/api/e2e/router.py index dc28e7a0..747856c4 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -18,6 +18,7 @@ from api.user.schemas import User from api.chute.util import ( get_one, + is_shared, get_miner_session, get_mtoken_price, update_usage_data, @@ -40,7 +41,7 @@ from api.miner_client import sign_request from api.rate_limit import rate_limit from api.gpu import COMPUTE_UNIT_PRICE_BASIS -from api.user.service import chutes_user_id +from api.user.service import chutes_user_id, subnet_role_accessible router = APIRouter() @@ -72,6 +73,13 @@ async def get_e2e_instances( chute = await get_one(chute_id) if not chute: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") + if not ( + chute.public + or chute.user_id == current_user.user_id + or await is_shared(chute.chute_id, current_user.user_id) + or subnet_role_accessible(chute, current_user) + ): + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") # Load active instances. instances = await load_chute_targets(chute_id, nonce=0) @@ -186,10 +194,17 @@ async def e2e_invoke( detail="Instance is no longer active", ) - # Load chute for billing info. + # Load chute and verify access. chute = await get_one(chute_id) if not chute: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") + if not ( + chute.public + or chute.user_id == current_user.user_id + or await is_shared(chute.chute_id, current_user.user_id) + or subnet_role_accessible(chute, current_user) + ): + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") # Read raw E2E blob from request body. e2e_blob = await request.body() diff --git a/api/instance/router.py b/api/instance/router.py index ee9061d3..84400fca 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -116,17 +116,14 @@ NETNANNY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] NETNANNY.verify.restype = ctypes.c_int -# Aegis v4 verification library (optional — may not be deployed yet) -AEGIS_VERIFY = None -try: - AEGIS_VERIFY = ctypes.CDLL("/usr/local/lib/chutes-aegis-verify.so") - AEGIS_VERIFY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] - AEGIS_VERIFY.verify.restype = ctypes.c_int - logger.info("Loaded chutes-aegis-verify.so") -except OSError: - logger.warning( - "chutes-aegis-verify.so not found, v4 netnanny verification will fall back to commitment-only" - ) +# Aegis v4 verification library is required. +import chutes as _chutes_pkg # noqa: E402 + +_aegis_verify_path = os.path.join(os.path.dirname(_chutes_pkg.__file__), "chutes-aegis-verify.so") +AEGIS_VERIFY = ctypes.CDLL(_aegis_verify_path) +AEGIS_VERIFY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] +AEGIS_VERIFY.verify.restype = ctypes.c_int +logger.info(f"Loaded chutes-aegis-verify.so from {_aegis_verify_path}") def _verify_rint_commitment_v4(commitment_hex: str) -> bool: @@ -1110,7 +1107,18 @@ async def _validate_launch_config_instance( tls_cert_sig = getattr(args, "tls_cert_sig", None) rint_commitment = getattr(args, "rint_commitment", None) - if is_v4 and rint_commitment and rint_commitment[:2] == "04": + if is_v4: + if not rint_commitment or rint_commitment[:2] != "04": + logger.error( + f"{log_prefix} v4 instance (>= 0.5.5) must provide v4 (04-prefix) rint_commitment" + ) + launch_config.failed_at = func.now() + launch_config.verification_error = "v4 instance must provide v4 rint_commitment" + await db.commit() + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="chutes >= 0.5.5 must provide a v4 runtime integrity commitment", + ) if not tls_cert or not tls_cert_sig: logger.error(f"{log_prefix} v4 instance missing tls_cert or tls_cert_sig") launch_config.failed_at = func.now() @@ -2561,17 +2569,21 @@ async def _stream(): ] # Build a temporary client for the log port (different from main port). import httpx as _httpx + import httpcore as _httpcore if instance.cacert: - from api.instance.connection import _get_ssl_and_cn, _InstanceTransport + from api.instance.connection import _get_ssl_and_cn, _InstanceNetworkBackend ssl_ctx, cn = _get_ssl_and_cn(instance) - transport = _InstanceTransport(hostname=cn, ip=instance.host, ssl_context=ssl_ctx) + pool = _httpcore.AsyncConnectionPool( + ssl_context=ssl_ctx, + http2=True, + network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), + ) client = _httpx.AsyncClient( - transport=transport, + transport=pool, base_url=f"https://{cn}:{log_port}", timeout=_httpx.Timeout(connect=10.0, read=None, write=30.0, pool=10.0), - http2=True, ) else: client = _httpx.AsyncClient( diff --git a/api/metrics/util.py b/api/metrics/util.py index 51a7f231..922aa4f2 100644 --- a/api/metrics/util.py +++ b/api/metrics/util.py @@ -12,7 +12,6 @@ from api.instance.util import load_chute_target from api.miner_client import get as miner_get from api.metrics.capacity import track_capacity -import aiohttp CONNECTION_EXPIRY = 3600 GAUGE_REFRESH_INTERVAL = 10 # seconds @@ -21,12 +20,12 @@ async def _query_conn_stats(instance) -> dict | None: """Query an instance's /_conn_stats endpoint for ground-truth connection info.""" try: - url = f"http://{instance.host}:{instance.port}/_conn_stats" async with miner_get( miner_ss58=instance.miner_hotkey, - url=url, + url="/_conn_stats", + instance=instance, purpose="conn_stats", - timeout=aiohttp.ClientTimeout(total=5), + timeout=5, ) as resp: if resp.status == 200: return await resp.json() diff --git a/conn_prober.py b/conn_prober.py index 70609c6c..d9569d34 100644 --- a/conn_prober.py +++ b/conn_prober.py @@ -32,12 +32,12 @@ async def _post_connectivity(instance: Instance, endpoint: str) -> Dict[str, Any]: enc_path, _ = encrypt_instance_request("/_connectivity", instance, hex_encode=True) - url = f"http://{instance.host}:{instance.port}/{enc_path}" payload, _ = encrypt_instance_request(json.dumps({"endpoint": endpoint}), instance) async with miner_client.post( instance.miner_hotkey, - url, + f"/{enc_path}", payload, + instance=instance, timeout=30.0, ) as resp: resp.raise_for_status() @@ -46,12 +46,12 @@ async def _post_connectivity(instance: Instance, endpoint: str) -> Dict[str, Any async def _post_netnanny_challenge(instance: Instance, challenge: str) -> Dict[str, Any]: enc_path, _ = encrypt_instance_request("/_netnanny_challenge", instance, hex_encode=True) - url = f"http://{instance.host}:{instance.port}/{enc_path}" payload, _ = encrypt_instance_request(json.dumps({"challenge": challenge}), instance) async with miner_client.post( instance.miner_hotkey, - url, + f"/{enc_path}", payload, + instance=instance, timeout=15.0, ) as resp: resp.raise_for_status() diff --git a/log_prober.py b/log_prober.py index d3091a54..b1bc612e 100644 --- a/log_prober.py +++ b/log_prober.py @@ -1,6 +1,7 @@ import gc import asyncio import traceback +import httpx as _httpx import api.database.orms # noqa from loguru import logger from api.config import settings @@ -24,14 +25,34 @@ async def check_instance_logging_server(instance: Instance) -> bool: log_port = next(p for p in instance.port_mappings if p["internal_port"] == 8001)[ "external_port" ] - async with miner_client.get( - instance.miner_hotkey, - f"http://{instance.host}:{log_port}/logs", - timeout=10, - purpose="chutes", - ) as resp: + + # Build a TLS-aware client for the log port when instance has cacert. + if instance.cacert: + import httpcore as _httpcore + from api.instance.connection import _get_ssl_and_cn, _InstanceNetworkBackend + + ssl_ctx, cn = _get_ssl_and_cn(instance) + pool = _httpcore.AsyncConnectionPool( + ssl_context=ssl_ctx, + http2=True, + network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), + ) + client = _httpx.AsyncClient( + transport=pool, + base_url=f"https://{cn}:{log_port}", + timeout=_httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=10.0), + ) + else: + client = _httpx.AsyncClient( + base_url=f"http://{instance.host}:{log_port}", + timeout=_httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=10.0), + ) + + headers, _ = miner_client.sign_request(instance.miner_hotkey, purpose="chutes") + try: + resp = await client.get("/logs", headers=headers) resp.raise_for_status() - json_data = await resp.json() + json_data = resp.json() if "logs" not in json_data: raise ValueError("Missing 'logs' key in response") has_required_log = any( @@ -39,13 +60,17 @@ async def check_instance_logging_server(instance: Instance) -> bool: ) if not has_required_log: raise ValueError("No log entry with path '/tmp/_chute.log' found") + proto = "https" if instance.cacert else "http" logger.success( - f"✅ logging server running for {instance.instance_id=} of {instance.miner_hotkey=} for {instance.chute_id=} on http://{instance.host}:{log_port}" + f"✅ logging server running for {instance.instance_id=} of {instance.miner_hotkey=} for {instance.chute_id=} on {proto}://{instance.host}:{log_port}" ) return True + finally: + await client.aclose() except Exception as exc: + proto = "https" if instance.cacert else "http" logger.error( - f"❌ logging server check failure for {instance.instance_id=} of {instance.miner_hotkey=} for {instance.chute_id=} on http://{instance.host}:{log_port or '???'}: {str(exc)}\n{traceback.format_exc()}" + f"❌ logging server check failure for {instance.instance_id=} of {instance.miner_hotkey=} for {instance.chute_id=} on {proto}://{instance.host}:{log_port or '???'}: {str(exc)}\n{traceback.format_exc()}" ) return False diff --git a/uv.lock b/uv.lock index c90e86e5..ea06097a 100644 --- a/uv.lock +++ b/uv.lock @@ -749,7 +749,7 @@ wheels = [ [[package]] name = "chutes" -version = "0.5.4rc4" +version = "0.5.5rc0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -779,7 +779,7 @@ dependencies = [ { name = "uvicorn" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/5f/8c73918484c2b87266112633102f7d24b3eabf4c6259958fe85855de632d/chutes-0.5.4rc4-py3-none-any.whl", hash = "sha256:b75b34a464c84d954d2941dfc9775f356b9bbc9418be50b070484bc233a0955b", size = 7361555, upload-time = "2026-02-06T15:58:49.695Z" }, + { url = "https://files.pythonhosted.org/packages/50/f7/3595a0ce87d8cbae446e996ae2f2f8f1c60be9b0aec009571e3e950024ec/chutes-0.5.5rc0-py3-none-any.whl", hash = "sha256:447b893dadf1f0de4395dccc5298da29ff6ca78c55aba788cd2f58fff7149438", size = 8918289, upload-time = "2026-02-19T08:13:43.108Z" }, ] [[package]] @@ -856,7 +856,7 @@ requires-dist = [ { name = "backoff", specifier = ">=2.2.1,<3.0.0" }, { name = "bittensor-drand", specifier = ">=1.2.0" }, { name = "bittensor-wallet", specifier = ">=4.0.1" }, - { name = "chutes", specifier = "==0.5.4rc4" }, + { name = "chutes", specifier = "==0.5.5rc0" }, { name = "dcap-qvl", specifier = "==0.3.12" }, { name = "dnslib", specifier = ">=0.9.26,<0.10.0" }, { name = "fastapi", specifier = ">=0.124.0,<0.125.0" }, diff --git a/watchtower.py b/watchtower.py index 0b7eab58..30adb494 100644 --- a/watchtower.py +++ b/watchtower.py @@ -157,8 +157,9 @@ async def do_slurp(instance, payload, encrypted_slurp): path, _ = encrypt_instance_request("/_slurp", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=15.0, ) as resp: if resp.status == 404: @@ -499,8 +500,9 @@ async def check_ping(chute, instance): path, _ = encrypt_instance_request("/_ping", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", payload, + instance=instance, timeout=10.0, ) as resp: raw_content = await resp.read() @@ -837,7 +839,7 @@ async def check_chute(chute_id): chute, miner_hotkey=instance.miner_hotkey, seed=instance.nodes[0].seed, - tls=False, + tls=semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0, ) except AssertionError as exc: logger.error(f"{log_prefix} failed running command check: {exc=}") @@ -1097,8 +1099,9 @@ async def procs_check(): try: async with miner_client.get( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", purpose="chutes", + instance=instance, timeout=15.0, ) as resp: data = await resp.json() @@ -1137,8 +1140,9 @@ async def get_env_dump(instance): path, _ = encrypt_instance_request("/_env_dump", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=30.0, ) as resp: if resp.status != 200: @@ -1157,8 +1161,9 @@ async def get_env_sig(instance, salt): path, _ = encrypt_instance_request("/_env_sig", instance, hex_encode=True) async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=5.0, ) as resp: if resp.status != 200: @@ -1182,8 +1187,9 @@ async def get_dump(instance, outdir: str = None): try: async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=400.0, ) as resp: if resp.status != 200: @@ -1247,8 +1253,9 @@ async def get_sig(instance): logger.info(f"Querying {instance.instance_id=} envdump (sig)") async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=15.0, ) as resp: if resp.status != 200: @@ -1273,8 +1280,9 @@ async def slurp(instance, path, offset: int = 0, length: int = 0): logger.info(f"Querying {instance.instance_id=} envdump (slurp) {payload=}") async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=30.0, ) as resp: if resp.status != 200: @@ -1450,8 +1458,9 @@ async def verify_fs_hash(instance): try: async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=90.0, ) as resp: fs_hash = (await resp.json())["result"] @@ -1488,8 +1497,9 @@ async def check_runint(instance: Instance) -> bool: async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=15.0, ) as resp: if resp.status != 200: @@ -1675,8 +1685,9 @@ async def verify_bytecode_integrity(instance: Instance, chute: Chute) -> bool: try: async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=30.0, ) as resp: if resp.status != 200: @@ -1787,8 +1798,9 @@ async def verify_package_integrity(instance: Instance, chute: Chute) -> dict: try: async with miner_client.post( instance.miner_hotkey, - f"http://{instance.host}:{instance.port}/{path}", + f"/{path}", enc_payload, + instance=instance, timeout=60.0, ) as resp: if resp.status != 200: From a949e36d585e49c7c9441188e761cd7f06bd128e Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 04:03:07 -0500 Subject: [PATCH 11/58] Fixes. --- api/chute/util.py | 20 ++++++++++++++++++-- api/instance/router.py | 43 +++++++++++++++++++++++++++++++++--------- conn_prober.py | 23 ++++++++++++++++++---- 3 files changed, 71 insertions(+), 15 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index 5800af70..24448b6f 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -882,8 +882,9 @@ async def _invoke_one( + target.rint_nonce + chute.image.package_hashes["hash"] ) - # Try V2 (HMAC-SHA256 with session key) first, fall back to V1 + # V2 (HMAC-SHA256 with session key) required for >= 0.5.5, V1 fallback for older cllmv_v2_key = (target.extra or {}).get("cllmv_session_key") + is_v4_cllmv = semcomp(target.chutes_version or "0.0.0", "0.5.5") >= 0 if cllmv_v2_key: cllmv_ok = cllmv_validate_v2( data.get("id") or "bad", @@ -895,6 +896,13 @@ async def _invoke_one( model_identifier, chute.revision, ) + elif is_v4_cllmv: + # >= 0.5.5 must use V2; missing key means launch was broken + logger.error( + f"CLLMV FAILURE: STREAMED {target.instance_id=} {target.miner_hotkey=} " + f"v4 instance missing cllmv_session_key" + ) + cllmv_ok = False else: cllmv_ok = cllmv_validate( data.get("id") or "bad", @@ -1138,8 +1146,9 @@ async def _invoke_one( + target.rint_nonce + chute.image.package_hashes["hash"] ) - # Try V2 (HMAC-SHA256 with session key) first, fall back to V1 + # V2 (HMAC-SHA256 with session key) required for >= 0.5.5, V1 fallback for older cllmv_v2_key = (target.extra or {}).get("cllmv_session_key") + is_v4_cllmv = semcomp(target.chutes_version or "0.0.0", "0.5.5") >= 0 if cllmv_v2_key: cllmv_ok = verification_token and cllmv_validate_v2( json_data.get("id") or "bad", @@ -1151,6 +1160,13 @@ async def _invoke_one( model_identifier, chute.revision, ) + elif is_v4_cllmv: + # >= 0.5.5 must use V2; missing key means launch was broken + logger.error( + f"CLLMV FAILURE: {target.instance_id=} {target.miner_hotkey=} " + f"v4 instance missing cllmv_session_key" + ) + cllmv_ok = False else: cllmv_ok = verification_token and cllmv_validate( json_data.get("id") or "bad", diff --git a/api/instance/router.py b/api/instance/router.py index 84400fca..6815dd0f 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -1346,7 +1346,39 @@ async def _validate_launch_config_instance( # CLLMV V2: decrypt miner's ephemeral HMAC session key from init blob cllmv_init = getattr(args, "cllmv_session_init", None) - if cllmv_init and semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + is_v4_instance = semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0 + if is_v4_instance: + if not cllmv_init: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="cllmv_session_init required for chutes >= 0.5.5", + ) + x25519_priv = os.environ.get("CLLMV_X25519_PRIVATE_KEY") + if not x25519_priv: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + detail="CLLMV V2 not configured on validator", + ) + try: + cllmv_session_key = _cllmv.decrypt_session_key(cllmv_init, x25519_priv) + if not cllmv_session_key: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="CLLMV V2 session key decryption failed (invalid init blob or signature)", + ) + if instance.extra is None: + instance.extra = {} + instance.extra["cllmv_session_key"] = cllmv_session_key + logger.info(f"CLLMV V2 session key decrypted for {instance.instance_id}") + except HTTPException: + raise + except Exception as exc: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"CLLMV V2 session key decryption error: {exc}", + ) + elif cllmv_init: + # Pre-0.5.5 instance sent cllmv_init anyway — best-effort decrypt x25519_priv = os.environ.get("CLLMV_X25519_PRIVATE_KEY") if x25519_priv: try: @@ -1356,15 +1388,8 @@ async def _validate_launch_config_instance( instance.extra = {} instance.extra["cllmv_session_key"] = cllmv_session_key logger.info(f"CLLMV V2 session key decrypted for {instance.instance_id}") - else: - logger.warning( - f"CLLMV V2 session key decryption failed for {instance.instance_id} " - f"(invalid init blob or signature)" - ) except Exception as exc: - logger.warning(f"CLLMV V2 session key decryption error: {exc}") - else: - logger.debug("CLLMV_X25519_PRIVATE_KEY not set, skipping V2 session key decrypt") + logger.warning(f"CLLMV V2 session key decryption error (pre-0.5.5): {exc}") return launch_config, nodes, instance, validator_pubkey diff --git a/conn_prober.py b/conn_prober.py index d9569d34..1562fbbd 100644 --- a/conn_prober.py +++ b/conn_prober.py @@ -1,4 +1,5 @@ import gc +import os import uuid import orjson as json import asyncio @@ -29,6 +30,13 @@ NETNANNY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] NETNANNY.verify.restype = ctypes.c_int +import chutes as _chutes_pkg # noqa: E402 + +_aegis_verify_path = os.path.join(os.path.dirname(_chutes_pkg.__file__), "chutes-aegis-verify.so") +AEGIS_VERIFY = ctypes.CDLL(_aegis_verify_path) +AEGIS_VERIFY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] +AEGIS_VERIFY.verify.restype = ctypes.c_int + async def _post_connectivity(instance: Instance, endpoint: str) -> Dict[str, Any]: enc_path, _ = encrypt_instance_request("/_connectivity", instance, hex_encode=True) @@ -136,10 +144,17 @@ async def _verify_netnanny(instance: Instance, allow_external_egress: bool) -> N f"Netnanny reported allow_external_egress={miner_egress} " f"but chute requires {allow_external_egress}." ) - if not NETNANNY.verify( - challenge.encode(), miner_hash.encode(), ctypes.c_uint8(allow_external_egress) - ): - raise RuntimeError("Netnanny verify() returned failure.") + # Use aegis-verify for >= 0.5.5, netnanny for older instances. + if semcomp(instance.chutes_version or "0.0.0", "0.5.5") >= 0: + if not AEGIS_VERIFY.verify( + challenge.encode(), miner_hash.encode(), ctypes.c_uint8(allow_external_egress) + ): + raise RuntimeError("Aegis verify() returned failure.") + else: + if not NETNANNY.verify( + challenge.encode(), miner_hash.encode(), ctypes.c_uint8(allow_external_egress) + ): + raise RuntimeError("Netnanny verify() returned failure.") async def check_instance_connectivity( From d28c774ce8afaa33b5c9d5a3205e3d5009c45d29 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 04:26:34 -0500 Subject: [PATCH 12/58] cllmv fixes --- api/chute/util.py | 73 +++++++++++++++++++++++++++++++++++++++++- api/instance/router.py | 26 +++++++++++++-- 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index 24448b6f..f0ee9ba9 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -3,6 +3,7 @@ """ import os +import ctypes import httpx import asyncio import re @@ -82,7 +83,77 @@ track_request_completed, track_request_rate_limited, ) -from cllmv import validate as cllmv_validate, validate_v2 as cllmv_validate_v2 +import chutes as _chutes_pkg + +_aegis_verify_path = os.path.join(os.path.dirname(_chutes_pkg.__file__), "chutes-aegis-verify.so") +_AEGIS_VERIFY = ctypes.CDLL(_aegis_verify_path) +_AEGIS_VERIFY.validate.argtypes = [ + ctypes.c_char_p, + ctypes.c_int, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, +] +_AEGIS_VERIFY.validate.restype = ctypes.c_int +_AEGIS_VERIFY.validate_v2.argtypes = [ + ctypes.c_char_p, + ctypes.c_int, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, +] +_AEGIS_VERIFY.validate_v2.restype = ctypes.c_int + + +def cllmv_validate( + id: str, + created: int, + value: str, + expected_hash: str, + salt: str, + model: str, + revision: str, +) -> bool: + return bool( + _AEGIS_VERIFY.validate( + id.encode(), + created, + value.encode() if value else None, + expected_hash.encode(), + salt.encode(), + model.encode(), + revision.encode(), + ) + ) + + +def cllmv_validate_v2( + id: str, + created: int, + value: str, + expected_token: str, + session_key_hex: str, + sub: str, + model: str, + revision: str, +) -> bool: + return bool( + _AEGIS_VERIFY.validate_v2( + id.encode(), + created, + value.encode() if value else None, + expected_token.encode(), + session_key_hex.encode(), + sub.encode(), + model.encode(), + revision.encode(), + ) + ) # Tokenizer for input/output token estimation. diff --git a/api/instance/router.py b/api/instance/router.py index 6815dd0f..0085946c 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -16,7 +16,6 @@ import orjson as json # noqa from api.image.util import get_inspecto_hash import api.miner_client as miner_client -import cllmv as _cllmv from loguru import logger from typing import Optional, Tuple from datetime import datetime, timedelta @@ -123,9 +122,30 @@ AEGIS_VERIFY = ctypes.CDLL(_aegis_verify_path) AEGIS_VERIFY.verify.argtypes = [ctypes.c_char_p, ctypes.c_char_p, ctypes.c_uint8] AEGIS_VERIFY.verify.restype = ctypes.c_int +AEGIS_VERIFY.decrypt_session_key.argtypes = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_size_t, +] +AEGIS_VERIFY.decrypt_session_key.restype = ctypes.c_int logger.info(f"Loaded chutes-aegis-verify.so from {_aegis_verify_path}") +def _decrypt_cllmv_session_key(blob_hex: str, x25519_priv_hex: str) -> str | None: + """Decrypt miner's ephemeral HMAC key from the CLLMV V2 init blob.""" + key_buf = ctypes.create_string_buffer(65) + ret = AEGIS_VERIFY.decrypt_session_key( + blob_hex.encode(), + x25519_priv_hex.encode(), + key_buf, + 65, + ) + if ret != 0: + return None + return key_buf.value.decode() + + def _verify_rint_commitment_v4(commitment_hex: str) -> bool: """Verify a v4 runtime integrity commitment (aegis/Ed25519).""" try: @@ -1360,7 +1380,7 @@ async def _validate_launch_config_instance( detail="CLLMV V2 not configured on validator", ) try: - cllmv_session_key = _cllmv.decrypt_session_key(cllmv_init, x25519_priv) + cllmv_session_key = _decrypt_cllmv_session_key(cllmv_init, x25519_priv) if not cllmv_session_key: raise HTTPException( status_code=status.HTTP_400_BAD_REQUEST, @@ -1382,7 +1402,7 @@ async def _validate_launch_config_instance( x25519_priv = os.environ.get("CLLMV_X25519_PRIVATE_KEY") if x25519_priv: try: - cllmv_session_key = _cllmv.decrypt_session_key(cllmv_init, x25519_priv) + cllmv_session_key = _decrypt_cllmv_session_key(cllmv_init, x25519_priv) if cllmv_session_key: if instance.extra is None: instance.extra = {} From e2f521cd3b64d0c24730304637c4b516bbd37acb Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 09:35:00 +0000 Subject: [PATCH 13/58] chutes lib bump --- pyproject.toml | 2 +- uv.lock | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index a4245360..3a8aab6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-slugify[unidecode]>=8.0.4,<9.0.0", "async-lru>=2.0.5,<3.0.0", "aiodns>=3.6.0,<4.0.0", - "chutes==0.5.5.rc0", + "chutes==0.5.5.rc1", "python-socketio[asyncio-client]>=5.15.0,<6.0.0", "pillow>=12.0.0,<13.0.0", "aioboto3==15.5.0", diff --git a/uv.lock b/uv.lock index ea06097a..51271906 100644 --- a/uv.lock +++ b/uv.lock @@ -749,7 +749,7 @@ wheels = [ [[package]] name = "chutes" -version = "0.5.5rc0" +version = "0.5.5rc1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -779,7 +779,7 @@ dependencies = [ { name = "uvicorn" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/50/f7/3595a0ce87d8cbae446e996ae2f2f8f1c60be9b0aec009571e3e950024ec/chutes-0.5.5rc0-py3-none-any.whl", hash = "sha256:447b893dadf1f0de4395dccc5298da29ff6ca78c55aba788cd2f58fff7149438", size = 8918289, upload-time = "2026-02-19T08:13:43.108Z" }, + { url = "https://files.pythonhosted.org/packages/b4/78/519cf70ae864d804a222d2fc1cb9493647c4097bc1644fede8d9e649475d/chutes-0.5.5rc1-py3-none-any.whl", hash = "sha256:a0b16d061c8f990a1dfff173a4316243df925b6a2e5fa38323fb195e221371db", size = 8918289, upload-time = "2026-02-19T09:33:58.111Z" }, ] [[package]] @@ -856,7 +856,7 @@ requires-dist = [ { name = "backoff", specifier = ">=2.2.1,<3.0.0" }, { name = "bittensor-drand", specifier = ">=1.2.0" }, { name = "bittensor-wallet", specifier = ">=4.0.1" }, - { name = "chutes", specifier = "==0.5.5rc0" }, + { name = "chutes", specifier = "==0.5.5rc1" }, { name = "dcap-qvl", specifier = "==0.3.12" }, { name = "dnslib", specifier = ">=0.9.26,<0.10.0" }, { name = "fastapi", specifier = ">=0.124.0,<0.125.0" }, @@ -912,13 +912,13 @@ wheels = [ [[package]] name = "cllmv" -version = "0.1.1" +version = "0.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "setuptools" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/59/f0/6ebce9a19a3b70241ebcd5579004e838d6e2d3d26db743d05a526d63115c/cllmv-0.1.1-py3-none-any.whl", hash = "sha256:cd8c8f7d7c7e6c272884cf4d853007be10848fa069f4a9f4f64f69f1804f4ea2", size = 18375, upload-time = "2026-01-30T20:16:32.286Z" }, + { url = "https://files.pythonhosted.org/packages/2a/e7/da6c1525948f8608945b3665f81d72daff9614368c52ffb6ec116db8e41a/cllmv-0.1.2-py3-none-any.whl", hash = "sha256:bb9bb6dc87717e0d632374d06e56f8e77e1e2115355f97d57b168f448d356216", size = 4459, upload-time = "2026-02-19T09:19:25.375Z" }, ] [[package]] From 1e7cc574ce7c0f42d1b55bce2e5c22410fb83db6 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 07:55:32 -0500 Subject: [PATCH 14/58] more httpx fixes --- api/chute/util.py | 12 +++++++++-- api/instance/connection.py | 42 ++++++++++++++++++++++++++------------ api/miner_client.py | 38 ++++++++++++++++++++-------------- log_prober.py | 2 +- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index f0ee9ba9..8751636f 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -798,6 +798,9 @@ async def _invoke_one( elif semcomp(target.chutes_version or "0.0.0", "0.4.2") < 0: timeout = 900 pooled = True + req_timeout = httpx.Timeout( + connect=10.0, read=float(timeout) if timeout else None, write=30.0, pool=10.0 + ) try: session, pooled = await get_miner_session(target, timeout=timeout) headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) @@ -809,6 +812,7 @@ async def _invoke_one( stream_response = await session.send( session.build_request("POST", f"/{path}", content=payload_string, headers=headers), stream=True, + timeout=req_timeout, ) response = stream_response else: @@ -816,6 +820,7 @@ async def _invoke_one( f"/{path}", content=payload_string, headers=headers, + timeout=req_timeout, ) if response.status_code != 200: @@ -1751,7 +1756,7 @@ async def invoke( error_message = "INVALID_RESPONSE" instant_delete = True elif isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code >= 500: - error_message = f"HTTP_{exc.status}: {error_message}" + error_message = f"HTTP_{exc.response.status_code}: {error_message}" # Server returned an error - connection worked, server is broken # skip_disable_loop = True @@ -1885,10 +1890,13 @@ async def load_llm_details(chute, target): payload, iv = await asyncio.to_thread(encrypt_instance_request, json.dumps(payload), target) session, pooled = await get_miner_session(target, timeout=60) + llm_timeout = httpx.Timeout(connect=10.0, read=60.0, write=30.0, pool=10.0) try: headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) headers["X-Chutes-Serialized"] = "true" - resp = await session.post(f"/{path}", content=payload_string, headers=headers) + resp = await session.post( + f"/{path}", content=payload_string, headers=headers, timeout=llm_timeout + ) resp.raise_for_status() raw_data = resp.json() logger.info(f"{target.chute_id=} {target.instance_id=} {target.miner_hotkey=}: {raw_data=}") diff --git a/api/instance/connection.py b/api/instance/connection.py index b898beff..a539ec38 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -1,20 +1,21 @@ """Instance connection helpers — httpx + HTTP/2 with TLS cert verification.""" import ssl +import asyncio import httpx import httpcore +from collections import OrderedDict from cryptography import x509 from cryptography.x509.oid import NameOID from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.serialization import load_pem_private_key from api.util import semcomp +_POOL_MAX = 2048 -# Cache SSL contexts and cert CNs per instance_id. -_ssl_cache: dict[str, tuple[ssl.SSLContext, str]] = {} - -# Pooled httpx clients per instance (reuse TCP+TLS connections). -_client_cache: dict[str, httpx.AsyncClient] = {} +# LRU caches keyed by instance_id — oldest entries evicted when full. +_ssl_cache: OrderedDict[str, tuple[ssl.SSLContext, str]] = OrderedDict() +_client_cache: OrderedDict[str, httpx.AsyncClient] = OrderedDict() def _should_pool(instance) -> bool: @@ -26,6 +27,7 @@ def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: """Get or create cached SSL context + CN for an instance.""" iid = str(instance.instance_id) if iid in _ssl_cache: + _ssl_cache.move_to_end(iid) return _ssl_cache[iid] ctx = ssl.create_default_context() @@ -65,6 +67,8 @@ def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: cert = x509.load_pem_x509_certificate(instance.cacert.encode()) cn = cert.subject.get_attributes_for_oid(NameOID.COMMON_NAME)[0].value _ssl_cache[iid] = (ctx, cn) + if len(_ssl_cache) > _POOL_MAX: + _ssl_cache.popitem(last=False) return ctx, cn @@ -74,8 +78,6 @@ def evict_instance_ssl(instance_id: str): _ssl_cache.pop(iid, None) client = _client_cache.pop(iid, None) if client and not client.is_closed: - import asyncio - try: loop = asyncio.get_running_loop() loop.create_task(client.aclose()) @@ -134,6 +136,10 @@ async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.Async Returns (client, pooled) — caller must close the client when done if not pooled. Only HTTPS instances with chutes_version >= 0.5.5 are pooled (HTTP/2 multiplexing). + + For pooled clients the timeout baked into the client is generous (read=None); + callers should pass per-request timeouts to .post()/.get() etc. + For ephemeral clients the caller's timeout is set on the client directly. """ pooled = _should_pool(instance) iid = str(instance.instance_id) @@ -141,7 +147,13 @@ async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.Async if pooled and iid in _client_cache: client = _client_cache[iid] if not client.is_closed: + _client_cache.move_to_end(iid) return client, True + _client_cache.pop(iid, None) + + # Pooled clients use read=None so per-request timeouts can override. + # Ephemeral clients bake in the caller's timeout directly. + read_timeout = None if pooled else (float(timeout) if timeout else None) if instance.cacert: ssl_ctx, cn = _get_ssl_and_cn(instance) @@ -154,19 +166,23 @@ async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.Async client = httpx.AsyncClient( transport=pool, base_url=f"https://{cn}:{instance.port}", - timeout=httpx.Timeout( - connect=10.0, read=float(timeout) if timeout else None, write=30.0, pool=10.0 - ), + timeout=httpx.Timeout(connect=10.0, read=read_timeout, write=30.0, pool=10.0), ) else: client = httpx.AsyncClient( base_url=f"http://{instance.host}:{instance.port}", - timeout=httpx.Timeout( - connect=10.0, read=float(timeout) if timeout else None, write=30.0, pool=10.0 - ), + timeout=httpx.Timeout(connect=10.0, read=read_timeout, write=30.0, pool=10.0), ) if pooled: _client_cache[iid] = client + if len(_client_cache) > _POOL_MAX: + _, evicted = _client_cache.popitem(last=False) + if evicted and not evicted.is_closed: + try: + loop = asyncio.get_running_loop() + loop.create_task(evicted.aclose()) + except RuntimeError: + pass return client, pooled diff --git a/api/miner_client.py b/api/miner_client.py index 182b40b8..7f9261d2 100644 --- a/api/miner_client.py +++ b/api/miner_client.py @@ -126,6 +126,11 @@ async def post(miner_ss58: str, url: str, payload: Dict[str, Any], instance=None timeout_val = kwargs.pop("timeout", 600) kwargs.pop("params", None) # httpx uses params kwarg natively + # Build per-request timeout for overriding pooled client defaults. + req_timeout = httpx.Timeout( + connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 + ) + if instance: from api.instance.connection import get_instance_client @@ -133,7 +138,9 @@ async def post(miner_ss58: str, url: str, payload: Dict[str, Any], instance=None instance, timeout=int(timeout_val) if timeout_val else 600 ) try: - response = await client.post(url, content=payload_data, headers=headers) + response = await client.post( + url, content=payload_data, headers=headers, timeout=req_timeout + ) yield _HttpxResponseWrapper(response) finally: if not pooled: @@ -142,10 +149,7 @@ async def post(miner_ss58: str, url: str, payload: Dict[str, Any], instance=None except Exception: pass else: - timeout = httpx.Timeout( - connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 - ) - async with httpx.AsyncClient(timeout=timeout) as client: + async with httpx.AsyncClient(timeout=req_timeout) as client: response = await client.post(url, content=payload_data, headers=headers) yield _HttpxResponseWrapper(response) @@ -160,6 +164,10 @@ async def patch(miner_ss58: str, url: str, payload: Dict[str, Any], instance=Non headers.update(new_headers) timeout_val = kwargs.pop("timeout", 600) + req_timeout = httpx.Timeout( + connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 + ) + if instance: from api.instance.connection import get_instance_client @@ -167,7 +175,9 @@ async def patch(miner_ss58: str, url: str, payload: Dict[str, Any], instance=Non instance, timeout=int(timeout_val) if timeout_val else 600 ) try: - response = await client.patch(url, content=payload_data, headers=headers) + response = await client.patch( + url, content=payload_data, headers=headers, timeout=req_timeout + ) yield _HttpxResponseWrapper(response) finally: if not pooled: @@ -176,10 +186,7 @@ async def patch(miner_ss58: str, url: str, payload: Dict[str, Any], instance=Non except Exception: pass else: - timeout = httpx.Timeout( - connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 - ) - async with httpx.AsyncClient(timeout=timeout) as client: + async with httpx.AsyncClient(timeout=req_timeout) as client: response = await client.patch(url, content=payload_data, headers=headers) yield _HttpxResponseWrapper(response) @@ -195,6 +202,10 @@ async def get(miner_ss58: str, url: str, purpose: str, instance=None, **kwargs): timeout_val = kwargs.pop("timeout", 600) params = kwargs.pop("params", None) + req_timeout = httpx.Timeout( + connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 + ) + if instance: from api.instance.connection import get_instance_client @@ -202,7 +213,7 @@ async def get(miner_ss58: str, url: str, purpose: str, instance=None, **kwargs): instance, timeout=int(timeout_val) if timeout_val else 600 ) try: - response = await client.get(url, headers=headers, params=params) + response = await client.get(url, headers=headers, params=params, timeout=req_timeout) yield _HttpxResponseWrapper(response) finally: if not pooled: @@ -211,9 +222,6 @@ async def get(miner_ss58: str, url: str, purpose: str, instance=None, **kwargs): except Exception: pass else: - timeout = httpx.Timeout( - connect=10.0, read=float(timeout_val) if timeout_val else None, write=30.0, pool=10.0 - ) - async with httpx.AsyncClient(timeout=timeout) as client: + async with httpx.AsyncClient(timeout=req_timeout) as client: response = await client.get(url, headers=headers, params=params) yield _HttpxResponseWrapper(response) diff --git a/log_prober.py b/log_prober.py index b1bc612e..5abcd02b 100644 --- a/log_prober.py +++ b/log_prober.py @@ -48,8 +48,8 @@ async def check_instance_logging_server(instance: Instance) -> bool: timeout=_httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=10.0), ) - headers, _ = miner_client.sign_request(instance.miner_hotkey, purpose="chutes") try: + headers, _ = miner_client.sign_request(instance.miner_hotkey, purpose="chutes") resp = await client.get("/logs", headers=headers) resp.raise_for_status() json_data = resp.json() From 3d6268af6fc18cfc34673e6cac3128b6e7375698 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 10:04:17 -0500 Subject: [PATCH 15/58] fix httpx timeouts --- api/chute/util.py | 3 ++- api/e2e/router.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/api/chute/util.py b/api/chute/util.py index 8751636f..81d150ae 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -809,10 +809,11 @@ async def _invoke_one( if stream: # Use streaming request for streaming responses. + # .send() doesn't accept timeout= kwarg; set it on the client directly. + session.timeout = req_timeout stream_response = await session.send( session.build_request("POST", f"/{path}", content=payload_string, headers=headers), stream=True, - timeout=req_timeout, ) response = stream_response else: diff --git a/api/e2e/router.py b/api/e2e/router.py index 747856c4..f588c492 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -9,6 +9,7 @@ import asyncio import random import traceback +import httpx import orjson as json from loguru import logger from fastapi import APIRouter, Depends, Header, HTTPException, Request, status @@ -259,8 +260,11 @@ async def e2e_invoke( if is_stream: headers["X-E2E-Stream"] = "true" + e2e_timeout = httpx.Timeout(connect=10.0, read=1800.0, write=30.0, pool=10.0) if is_stream: # Use streaming request for streaming responses. + # .send() doesn't accept timeout= kwarg; set it on the client directly. + session.timeout = e2e_timeout response = await session.send( session.build_request( "POST", f"/{encrypted_path}", content=payload_string, headers=headers @@ -272,6 +276,7 @@ async def e2e_invoke( f"/{encrypted_path}", content=payload_string, headers=headers, + timeout=e2e_timeout, ) # Handle transport-level errors. From d2668ac085d5d54b5fde190bb22b90304952254c Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 10:26:35 -0500 Subject: [PATCH 16/58] Fix stream response handling. --- api/chute/util.py | 4 +++- api/e2e/router.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index 81d150ae..318ab43f 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -867,7 +867,9 @@ async def _invoke_one( cllmv_verified = False last_usage = None disconnect_chunk_check = 0 - async for raw_chunk in stream_response.aiter_bytes(): + async for raw_chunk in stream_response.aiter_lines(): + if not raw_chunk: + continue chunk = await asyncio.to_thread(decrypt_instance_response, raw_chunk, target, iv) if not use_serialized: chunk = gzip.decompress(chunk) diff --git a/api/e2e/router.py b/api/e2e/router.py index f588c492..794b1d1b 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -390,7 +390,9 @@ async def _stream_e2e_response( metrics = {} chunk_count = 0 try: - async for raw_chunk in response.aiter_bytes(): + async for raw_chunk in response.aiter_lines(): + if not raw_chunk: + continue # Transport-decrypt each chunk. try: decrypted = await asyncio.to_thread(decrypt_instance_response, raw_chunk, instance) From 21d08fbe2e7cddb72fee5e2100ad5b4f25c0dbd8 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 10:45:07 -0500 Subject: [PATCH 17/58] alias routing pref fixes. --- api/model_routing.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/api/model_routing.py b/api/model_routing.py index 27112aa2..e8e1bd85 100644 --- a/api/model_routing.py +++ b/api/model_routing.py @@ -23,6 +23,7 @@ def parse_model_parameter(model_str: str) -> tuple[str, str | None]: Returns (model_str_without_suffix, routing_mode). routing_mode is None (failover), "latency", or "throughput". """ + model_str = model_str.strip() lower = model_str.lower() for suffix in ROUTING_SUFFIXES: if lower.endswith(suffix): @@ -30,6 +31,18 @@ def parse_model_parameter(model_str: str) -> tuple[str, str | None]: return model_str, None +def _dedupe_keep_order(items: list[str]) -> list[str]: + """Remove duplicates while preserving original order.""" + seen = set() + out = [] + for item in items: + if item in seen: + continue + seen.add(item) + out.append(item) + return out + + async def get_user_alias(user_id: str, alias: str) -> list[str] | None: """ Look up a user's model alias. Redis-cached with 120s TTL. @@ -198,7 +211,19 @@ async def resolve_model_parameter( chute_ids: list[str] | None = None if "," in raw_model: - chute_ids = [s.strip() for s in raw_model.split(",") if s.strip()] + tokens = [s.strip() for s in raw_model.split(",") if s.strip()] + expanded: list[str] = [] + for token in tokens: + # Prefer direct model lookup over alias when names collide. + if await get_one(token) is not None: + expanded.append(token) + continue + alias_ids = await get_user_alias(user_id, token) + if alias_ids is not None: + expanded.extend(alias_ids) + else: + expanded.append(token) + chute_ids = _dedupe_keep_order(expanded) else: # Try single lookup on suffix-stripped name. if routing_mode is not None: From a907d1b12b426872c761f818e76f7e0266f72c82 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 11:02:52 -0500 Subject: [PATCH 18/58] misc edge cases. --- api/invocation/router.py | 66 ++++++++++++++++++++++---------------- api/model_alias/router.py | 6 ++-- api/model_alias/schemas.py | 9 ++++-- 3 files changed, 48 insertions(+), 33 deletions(-) diff --git a/api/invocation/router.py b/api/invocation/router.py index 0a4ef369..15611edd 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -69,6 +69,30 @@ async def initialize_quota_cache(cache_key: str) -> None: await settings.redis_client.incrbyfloat(cache_key, 0.0) +def _derive_upstream_status(error: object) -> int | None: + """ + Map upstream error payloads to HTTP statuses used by retry/failover logic. + """ + if isinstance(error, dict): + code = error.get("code") + if isinstance(code, int): + if 500 <= code < 600: + return status.HTTP_503_SERVICE_UNAVAILABLE + if 400 <= code < 500: + return code + return None + + if isinstance(error, str): + if error in {"infra_overload", "no_targets"}: + return status.HTTP_429_TOO_MANY_REQUESTS + if error == "bad_request": + return status.HTTP_400_BAD_REQUEST + if error.startswith("HTTP_5"): + return status.HTTP_503_SERVICE_UNAVAILABLE + + return None + + @router.get("/usage") async def get_usage(request: Request): """ @@ -646,28 +670,14 @@ async def _buffered_stream_response(): # If the error occurred on the first chunk, we can raise an HTTP exception. if not first_chunk_processed: - # SGLang errors. - if isinstance(error, dict): - if ( - isinstance(error.get("code"), int) - and 400 <= error["code"] < 500 - ): + mapped_status = _derive_upstream_status(error) + if mapped_status is not None: + if isinstance(error, dict) and 400 <= error.get("code", 0) < 500: logger.warning( f"Received error code from upstream streaming response: {error=}" ) - raise HTTPException( - status_code=error["code"], - detail=error, - ) - - if error in ("infra_overload", "no_targets"): raise HTTPException( - status_code=status.HTTP_429_TOO_MANY_REQUESTS, - detail=chunk_data.get("detail") or error, - ) - elif error == "bad_request": - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, + status_code=mapped_status, detail=chunk_data.get("detail") or error, ) raise HTTPException( @@ -787,14 +797,10 @@ async def _streamfile(): elif chunk.startswith('data: {"error"'): chunk_data = json.loads(chunk[6:]) error = chunk_data["error"] - if error in ("infra_overload", "no_targets"): - raise HTTPException( - status_code=status.HTTP_429_TOO_MANY_REQUESTS, - detail=chunk_data.get("detail") or error, - ) - elif error == "bad_request": + mapped_status = _derive_upstream_status(error) + if mapped_status is not None: raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, + status_code=mapped_status, detail=chunk_data.get("detail") or error, ) raise HTTPException( @@ -1048,7 +1054,10 @@ async def hostname_invocation( try: return await _invoke(request, current_user) except HTTPException as exc: - if exc.status_code != status.HTTP_429_TOO_MANY_REQUESTS: + if exc.status_code not in ( + status.HTTP_429_TOO_MANY_REQUESTS, + status.HTTP_503_SERVICE_UNAVAILABLE, + ): raise # Try each fallback chute on infra_overload. for fallback in fallback_chutes: @@ -1067,7 +1076,10 @@ async def hostname_invocation( try: return await _invoke(request, current_user) except HTTPException as inner_exc: - if inner_exc.status_code != status.HTTP_429_TOO_MANY_REQUESTS: + if inner_exc.status_code not in ( + status.HTTP_429_TOO_MANY_REQUESTS, + status.HTTP_503_SERVICE_UNAVAILABLE, + ): raise continue # All chutes exhausted. diff --git a/api/model_alias/router.py b/api/model_alias/router.py index 5962c08a..eebe35e8 100644 --- a/api/model_alias/router.py +++ b/api/model_alias/router.py @@ -3,7 +3,7 @@ """ from fastapi import APIRouter, Depends, HTTPException, status -from sqlalchemy import select, delete +from sqlalchemy import select, delete, func from sqlalchemy.dialects.postgresql import insert from sqlalchemy.ext.asyncio import AsyncSession from api.database import get_db_session @@ -63,7 +63,7 @@ async def create_or_update_alias( result = await db.execute( select(ModelAlias).where( ModelAlias.user_id == current_user.user_id, - ModelAlias.alias == body.alias, + func.lower(ModelAlias.alias) == body.alias, ) ) return result.scalar_one() @@ -78,7 +78,7 @@ async def delete_alias( result = await db.execute( delete(ModelAlias).where( ModelAlias.user_id == current_user.user_id, - ModelAlias.alias == alias, + func.lower(ModelAlias.alias) == alias.lower(), ) ) if result.rowcount == 0: diff --git a/api/model_alias/schemas.py b/api/model_alias/schemas.py index f89fad2e..d96ec47c 100644 --- a/api/model_alias/schemas.py +++ b/api/model_alias/schemas.py @@ -35,16 +35,19 @@ def validate_alias(cls, v: str) -> str: raise ValueError("alias must be 1-64 characters") if not _ALIAS_PATTERN.match(v): raise ValueError("alias must be ASCII printable (no spaces or colons)") - if ":latency" in v.lower() or ":throughput" in v.lower(): + v = v.lower() + if ":latency" in v or ":throughput" in v: raise ValueError("alias must not contain ':latency' or ':throughput'") return v @field_validator("chute_ids") @classmethod def validate_chute_ids(cls, v: list[str]) -> list[str]: - if not 1 <= len(v) <= 20: + # De-duplicate while preserving order to avoid repeated fallback attempts. + deduped = list(dict.fromkeys(v)) + if not 1 <= len(deduped) <= 20: raise ValueError("chute_ids must have 1-20 items") - return v + return deduped class ModelAliasResponse(BaseModel): From dff57d9811e6bd12a3d72de49ec2498465f76c99 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Thu, 19 Feb 2026 16:48:51 +0000 Subject: [PATCH 19/58] Fix update time. --- api/model_alias/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/model_alias/router.py b/api/model_alias/router.py index eebe35e8..e7730838 100644 --- a/api/model_alias/router.py +++ b/api/model_alias/router.py @@ -49,7 +49,7 @@ async def create_or_update_alias( ) .on_conflict_do_update( index_elements=["user_id", "alias"], - set_={"chute_ids": body.chute_ids, "updated_at": ModelAlias.updated_at.default.arg}, + set_={"chute_ids": body.chute_ids, "updated_at": func.now()}, ) ) await db.execute(stmt) From 23f77627abd547f2c9a2083c3d81ade073c2cc7c Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 06:34:44 -0500 Subject: [PATCH 20/58] pyc cleanup timing issue --- api/image/forge.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index 5d822959..e0625f43 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -193,7 +193,10 @@ async def _capture_logs(stream, name, capture=True): RUN usermod -aG root chutes || true RUN chmod g+rwx /usr/local/lib /usr/local/bin /usr/local/share /usr/local/share/man RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true +RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true +RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes +ENV PYTHONDONTWRITEBYTECODE=1 RUN pip install chutes=={image.chutes_version} """ # v4 (aegis) vs v3 (netnanny+logintercept) .so injection @@ -259,10 +262,6 @@ async def _capture_logs(stream, name, capture=True): RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash COPY cfsv /cfsv -USER root -RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true -RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true -USER chutes RUN uv cache clean --force RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index USER root @@ -1042,6 +1041,8 @@ async def _capture_logs(stream, name, capture=True): RUN usermod -aG root chutes || true RUN chmod g+rwx /usr/local/lib /usr/local/bin /usr/local/share /usr/local/share/man RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true +RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true +RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes RUN pip install chutes=={chutes_version} """ @@ -1107,9 +1108,6 @@ async def _capture_logs(stream, name, capture=True): RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash COPY cfsv /cfsv -USER root -RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true -RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes RUN uv cache clean --force RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index From c83cb1f6614ca6f4424d603577b9802211d10cf9 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 06:38:12 -0500 Subject: [PATCH 21/58] Find updates --- api/image/forge.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index e0625f43..134b6576 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -193,8 +193,8 @@ async def _capture_logs(stream, name, capture=True): RUN usermod -aG root chutes || true RUN chmod g+rwx /usr/local/lib /usr/local/bin /usr/local/share /usr/local/share/man RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true -RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true -RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true +RUN find / -xdev -type f -name '*.pyc' -exec rm -f {{}} \\; || true +RUN find / -xdev -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes ENV PYTHONDONTWRITEBYTECODE=1 RUN pip install chutes=={image.chutes_version} @@ -1041,9 +1041,10 @@ async def _capture_logs(stream, name, capture=True): RUN usermod -aG root chutes || true RUN chmod g+rwx /usr/local/lib /usr/local/bin /usr/local/share /usr/local/share/man RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true -RUN find / -type f -name '*.pyc' -exec rm -f {{}} \\; || true -RUN find / -type d -name __pycache__ -exec rm -rf {{}} \\; || true +RUN find / -xdev -type f -name '*.pyc' -exec rm -f {{}} \\; || true +RUN find / -xdev -type d -name __pycache__ -exec rm -rf {{}} \\; || true USER chutes +ENV PYTHONDONTWRITEBYTECODE=1 RUN pip install chutes=={chutes_version} """ # v4 (aegis) vs v3 (netnanny+logintercept) .so injection From 00f32f26a4401d1cc3a556a0a12a0932bc84b059 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 07:35:13 -0500 Subject: [PATCH 22/58] Fix py path. --- api/image/forge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index 134b6576..b4f71e40 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -289,7 +289,7 @@ async def _capture_logs(stream, name, capture=True): shutil.copy2(MANIFEST_DRIVER_PATH, build_driver_path) fsv_dockerfile_content += """COPY chutes-bcm.so /tmp/chutes-bcm.so COPY generate_manifest_driver.py /tmp/generate_manifest_driver.py -RUN CFSV_OP="${CFSV_OP}" python3 /tmp/generate_manifest_driver.py \ +RUN CFSV_OP="${CFSV_OP}" python /tmp/generate_manifest_driver.py \ --output /tmp/bytecode.manifest \ --json-output /tmp/bytecode.manifest.json \ --lib /tmp/chutes-bcm.so \ @@ -1137,7 +1137,7 @@ async def _capture_logs(stream, name, capture=True): shutil.copy2(MANIFEST_DRIVER_PATH, build_driver_path) fsv_dockerfile_content += """COPY chutes-bcm.so /tmp/chutes-bcm.so COPY generate_manifest_driver.py /tmp/generate_manifest_driver.py -RUN CFSV_OP="${CFSV_OP}" python3 /tmp/generate_manifest_driver.py \ +RUN CFSV_OP="${CFSV_OP}" python /tmp/generate_manifest_driver.py \ --output /tmp/bytecode.manifest \ --json-output /tmp/bytecode.manifest.json \ --lib /tmp/chutes-bcm.so \ From 252f8d0872d58edd53e8ff9e8824d07caa10524c Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 14:39:05 +0000 Subject: [PATCH 23/58] temp disable inspecto --- api/instance/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/instance/router.py b/api/instance/router.py index 0085946c..b07f69f4 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -839,7 +839,7 @@ async def _validate_launch_config_inspecto( detail=launch_config.verification_error, ) - enforce_inspecto = "PS_OP" in os.environ + enforce_inspecto = "PS_OP" in os.environ and semcomp(chute.chutes_version, "0.5.5") < 0 inspecto_valid = True fail_reason = None if enforce_inspecto: From cb502d7d0cd52cf07414171deb498930367963a5 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 10:17:14 -0500 Subject: [PATCH 24/58] fix cache clean timing issue with cfsv --- api/image/forge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index b4f71e40..b25c92f3 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -198,6 +198,7 @@ async def _capture_logs(stream, name, capture=True): USER chutes ENV PYTHONDONTWRITEBYTECODE=1 RUN pip install chutes=={image.chutes_version} +RUN uv cache clean --force """ # v4 (aegis) vs v3 (netnanny+logintercept) .so injection if semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0: @@ -262,7 +263,6 @@ async def _capture_logs(stream, name, capture=True): RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash COPY cfsv /cfsv -RUN uv cache clean --force RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index USER root RUN cp -f /tmp/chutesfs.index /etc/chutesfs.index && chmod a+r /etc/chutesfs.index @@ -1046,6 +1046,7 @@ async def _capture_logs(stream, name, capture=True): USER chutes ENV PYTHONDONTWRITEBYTECODE=1 RUN pip install chutes=={chutes_version} +RUN uv cache clean --force """ # v4 (aegis) vs v3 (netnanny+logintercept) .so injection if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: @@ -1110,7 +1111,6 @@ async def _capture_logs(stream, name, capture=True): RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash COPY cfsv /cfsv USER chutes -RUN uv cache clean --force RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index USER root RUN cp -f /tmp/chutesfs.index /etc/chutesfs.index && chmod a+r /etc/chutesfs.index From 33d26c14b8bebe5c3be5807e411533b22a853b2c Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 15:25:41 +0000 Subject: [PATCH 25/58] Update chutes lib. --- pyproject.toml | 2 +- uv.lock | 11 +++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3a8aab6c..45940f48 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-slugify[unidecode]>=8.0.4,<9.0.0", "async-lru>=2.0.5,<3.0.0", "aiodns>=3.6.0,<4.0.0", - "chutes==0.5.5.rc1", + "chutes==0.5.5rc12", "python-socketio[asyncio-client]>=5.15.0,<6.0.0", "pillow>=12.0.0,<13.0.0", "aioboto3==15.5.0", diff --git a/uv.lock b/uv.lock index 51271906..a8f923dc 100644 --- a/uv.lock +++ b/uv.lock @@ -749,7 +749,7 @@ wheels = [ [[package]] name = "chutes" -version = "0.5.5rc1" +version = "0.5.5rc12" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -758,7 +758,6 @@ dependencies = [ { name = "cllmv" }, { name = "cryptography" }, { name = "fastapi" }, - { name = "fickling" }, { name = "graval" }, { name = "huggingface-hub" }, { name = "loguru" }, @@ -779,7 +778,7 @@ dependencies = [ { name = "uvicorn" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/b4/78/519cf70ae864d804a222d2fc1cb9493647c4097bc1644fede8d9e649475d/chutes-0.5.5rc1-py3-none-any.whl", hash = "sha256:a0b16d061c8f990a1dfff173a4316243df925b6a2e5fa38323fb195e221371db", size = 8918289, upload-time = "2026-02-19T09:33:58.111Z" }, + { url = "https://files.pythonhosted.org/packages/f7/a3/9ab64f1d8cf18063d80666a2012fc177332c1b07e7ac2bc87be0136f4c67/chutes-0.5.5rc12-py3-none-any.whl", hash = "sha256:fa9b20e4484c2fd3307f4f88269567e013a2eb23539733e913ca6b1fed50f64e", size = 8927094, upload-time = "2026-02-20T14:02:34.74Z" }, ] [[package]] @@ -856,7 +855,7 @@ requires-dist = [ { name = "backoff", specifier = ">=2.2.1,<3.0.0" }, { name = "bittensor-drand", specifier = ">=1.2.0" }, { name = "bittensor-wallet", specifier = ">=4.0.1" }, - { name = "chutes", specifier = "==0.5.5rc1" }, + { name = "chutes", specifier = "==0.5.5rc12" }, { name = "dcap-qvl", specifier = "==0.3.12" }, { name = "dnslib", specifier = ">=0.9.26,<0.10.0" }, { name = "fastapi", specifier = ">=0.124.0,<0.125.0" }, @@ -912,13 +911,13 @@ wheels = [ [[package]] name = "cllmv" -version = "0.1.2" +version = "0.1.3" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "setuptools" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/2a/e7/da6c1525948f8608945b3665f81d72daff9614368c52ffb6ec116db8e41a/cllmv-0.1.2-py3-none-any.whl", hash = "sha256:bb9bb6dc87717e0d632374d06e56f8e77e1e2115355f97d57b168f448d356216", size = 4459, upload-time = "2026-02-19T09:19:25.375Z" }, + { url = "https://files.pythonhosted.org/packages/de/bc/c794a38145ca902da5bd6f8349fece930060386e282b522bd2da1f2c0a1f/cllmv-0.1.3-py3-none-any.whl", hash = "sha256:aef678f61a7da9126a4690aa83bb7e76a3e6564bb79879d0498040361717cb4c", size = 4487, upload-time = "2026-02-19T12:50:34.224Z" }, ] [[package]] From ca351c1edde5ac107db250bdb57d73ead5de9bc4 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 10:50:36 -0500 Subject: [PATCH 26/58] cfsv fixes --- api/image/forge.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/api/image/forge.py b/api/image/forge.py index b25c92f3..087da0b7 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -256,12 +256,16 @@ async def _capture_logs(stream, name, capture=True): verification_tag = f"{short_tag}-fsv-{uuid.uuid4().hex[:8]}" logger.info(f"Building filesystem verification image as {verification_tag}") fsv_dockerfile_content = f"""FROM {chutes_tag} +USER chutes ARG CFSV_OP ARG PS_OP ENV LD_PRELOAD="" ENV PYTHONDONTWRITEBYTECODE=1 RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash +USER root +RUN rm -f /etc/bytecode.manifest /tmp/chutesfs.index /etc/chutesfs.index /tmp/chutesfs.data +USER chutes COPY cfsv /cfsv RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index USER root @@ -1105,12 +1109,15 @@ async def _capture_logs(stream, name, capture=True): fsv_dockerfile_content = f"""FROM {updated_tag} ARG CFSV_OP ARG PS_OP +USER chutes ENV LD_PRELOAD="" ENV PYTHONDONTWRITEBYTECODE=1 RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash -COPY cfsv /cfsv +USER root +RUN rm -f /etc/bytecode.manifest /tmp/chutesfs.index /etc/chutesfs.index /tmp/chutesfs.data USER chutes +COPY cfsv /cfsv RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index USER root RUN cp -f /tmp/chutesfs.index /etc/chutesfs.index && chmod a+r /etc/chutesfs.index From bb0fb73b38a07d04d2776361c061fc64e6cbac0e Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 11:53:01 -0500 Subject: [PATCH 27/58] tls cert validation fixes --- api/instance/router.py | 54 +++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 16 deletions(-) diff --git a/api/instance/router.py b/api/instance/router.py index b07f69f4..d4861c7e 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -1159,22 +1159,6 @@ async def _validate_launch_config_instance( ) validated_cacert = tls_cert - # Live TLS verification: connect to logging port and verify served cert. - log_port_mapping = next((p for p in args.port_mappings if p.internal_port == 8001), None) - if log_port_mapping: - live_ok = await _verify_instance_tls_live( - args.host, log_port_mapping.external_port, tls_cert - ) - if not live_ok: - logger.error(f"{log_prefix} live TLS cert verification failed") - launch_config.failed_at = func.now() - launch_config.verification_error = "Live TLS certificate verification failed" - await db.commit() - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="Live TLS certificate verification failed — cert mismatch or unreachable", - ) - # Create the instance now that we've verified the envdump/k8s env. node_selector = NodeSelector(**chute.node_selector) extra_fields = { @@ -1895,6 +1879,42 @@ async def claim_graval_launch_config( return response +async def delayed_instance_tls_check(instance_id: str): + """Verify the chute port serves the expected TLS cert after activation.""" + await asyncio.sleep(10) # Wait for uvicorn to be listening. + + async with get_session() as session: + instance = ( + (await session.execute(select(Instance).where(Instance.instance_id == instance_id))) + .unique() + .scalar_one_or_none() + ) + if not instance or not instance.active: + return + if not instance.cacert: + return + live_ok = await _verify_instance_tls_live(instance.host, instance.port, instance.cacert) + if not live_ok: + reason = ( + f"Live TLS cert verification failed: " + f"{instance.instance_id=} {instance.miner_hotkey=} {instance.chute_id=}" + ) + logger.error(reason) + await session.delete(instance) + await asyncio.create_task(notify_deleted(instance)) + await session.execute( + text( + "UPDATE instance_audit SET deletion_reason = :reason WHERE instance_id = :instance_id" + ), + {"instance_id": instance.instance_id, "reason": reason}, + ) + await invalidate_instance_cache(instance.chute_id, instance_id=instance.instance_id) + else: + logger.success( + f"Live TLS cert verification passed: {instance.instance_id=} on {instance.host}:{instance.port}" + ) + + async def delayed_instance_fs_check(instance_id: str): await asyncio.sleep(10) # XXX wait for uvicorn to be listening. @@ -2079,6 +2099,8 @@ async def activate_launch_config_instance( await invalidate_instance_cache(instance.chute_id, instance_id=instance.instance_id) await delete_bounty(chute.chute_id) asyncio.create_task(notify_activated(instance)) + if instance.cacert: + asyncio.create_task(delayed_instance_tls_check(instance.instance_id)) return {"ok": True} From 6c2cfd02c4ae0fd32a6e1724b458cbd64f4e83ed Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 17:33:35 +0000 Subject: [PATCH 28/58] chutes lib update --- pyproject.toml | 2 +- uv.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 45940f48..7590cb52 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-slugify[unidecode]>=8.0.4,<9.0.0", "async-lru>=2.0.5,<3.0.0", "aiodns>=3.6.0,<4.0.0", - "chutes==0.5.5rc12", + "chutes==0.5.5rc16", "python-socketio[asyncio-client]>=5.15.0,<6.0.0", "pillow>=12.0.0,<13.0.0", "aioboto3==15.5.0", diff --git a/uv.lock b/uv.lock index a8f923dc..a0601d54 100644 --- a/uv.lock +++ b/uv.lock @@ -749,7 +749,7 @@ wheels = [ [[package]] name = "chutes" -version = "0.5.5rc12" +version = "0.5.5rc16" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -778,7 +778,7 @@ dependencies = [ { name = "uvicorn" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/f7/a3/9ab64f1d8cf18063d80666a2012fc177332c1b07e7ac2bc87be0136f4c67/chutes-0.5.5rc12-py3-none-any.whl", hash = "sha256:fa9b20e4484c2fd3307f4f88269567e013a2eb23539733e913ca6b1fed50f64e", size = 8927094, upload-time = "2026-02-20T14:02:34.74Z" }, + { url = "https://files.pythonhosted.org/packages/1f/9a/3659310acaf671449509b23695e56ca9515c0cdb1c79c3046e16cef0522a/chutes-0.5.5rc16-py3-none-any.whl", hash = "sha256:219ec114f90bbd60f44f784df429a4359244f577164c93d7d151551613969818", size = 8926653, upload-time = "2026-02-20T17:22:34.248Z" }, ] [[package]] @@ -855,7 +855,7 @@ requires-dist = [ { name = "backoff", specifier = ">=2.2.1,<3.0.0" }, { name = "bittensor-drand", specifier = ">=1.2.0" }, { name = "bittensor-wallet", specifier = ">=4.0.1" }, - { name = "chutes", specifier = "==0.5.5rc12" }, + { name = "chutes", specifier = "==0.5.5rc16" }, { name = "dcap-qvl", specifier = "==0.3.12" }, { name = "dnslib", specifier = ">=0.9.26,<0.10.0" }, { name = "fastapi", specifier = ">=0.124.0,<0.125.0" }, From a581528e68fdcb89ca5487583bc7836c0c32d713 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 18:53:17 +0000 Subject: [PATCH 29/58] Already bytes --- api/chute/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/chute/util.py b/api/chute/util.py index 318ab43f..e6e7001d 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -770,7 +770,7 @@ async def _invoke_one( # Version-gate payload format: >= 0.5.5 uses plain JSON + gzip, < 0.5.5 uses pickle. if raw_payload is not None and semcomp(target.chutes_version or "0.0.0", "0.5.5") >= 0: # >= 0.5.5: plain JSON + gzip, no pickle - payload_bytes = gzip.compress(json.dumps(raw_payload).encode()) + payload_bytes = gzip.compress(json.dumps(raw_payload)) use_serialized = False else: # < 0.5.5: pickle-wrapped args/kwargs, no gzip From 8d8b60d0915bdb91cb3d281682ccb15e2c7ec053 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 14:08:44 -0500 Subject: [PATCH 30/58] Fix httpcore/httpx updated lib issues... --- api/instance/connection.py | 37 ++++++++++++++++++++++++++++++++++++- api/instance/router.py | 4 ++-- log_prober.py | 27 +++++---------------------- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/api/instance/connection.py b/api/instance/connection.py index a539ec38..d7a728a4 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -131,6 +131,41 @@ async def sleep(self, seconds): await self._backend.sleep(seconds) +class _CoreTransport(httpx.AsyncBaseTransport): + """Wrap a raw httpcore pool so httpx <-> httpcore request mapping works. + + httpx 0.28+ passes string-based URLs to transports, but httpcore 1.0 + expects bytes-based URLs. httpx.AsyncHTTPTransport handles this internally + but doesn't expose the network_backend param we need for custom DNS. + """ + + def __init__(self, pool: httpcore.AsyncConnectionPool): + self._pool = pool + + async def handle_async_request(self, request: httpx.Request) -> httpx.Response: + core_request = httpcore.Request( + method=request.method.encode() if isinstance(request.method, str) else request.method, + url=httpcore.URL( + scheme=request.url.raw_scheme, + host=request.url.raw_host, + port=request.url.port, + target=request.url.raw_path, + ), + headers=request.headers.raw, + content=request.stream, + ) + core_response = await self._pool.handle_async_request(core_request) + return httpx.Response( + status_code=core_response.status, + headers=core_response.headers, + stream=core_response.stream, + extensions=core_response.extensions, + ) + + async def aclose(self) -> None: + await self._pool.aclose() + + async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.AsyncClient, bool]: """Get or create an httpx AsyncClient for an instance. @@ -164,7 +199,7 @@ async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.Async network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), ) client = httpx.AsyncClient( - transport=pool, + transport=_CoreTransport(pool), base_url=f"https://{cn}:{instance.port}", timeout=httpx.Timeout(connect=10.0, read=read_timeout, write=30.0, pool=10.0), ) diff --git a/api/instance/router.py b/api/instance/router.py index d4861c7e..34845337 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -2639,7 +2639,7 @@ async def _stream(): import httpcore as _httpcore if instance.cacert: - from api.instance.connection import _get_ssl_and_cn, _InstanceNetworkBackend + from api.instance.connection import _get_ssl_and_cn, _InstanceNetworkBackend, _CoreTransport ssl_ctx, cn = _get_ssl_and_cn(instance) pool = _httpcore.AsyncConnectionPool( @@ -2648,7 +2648,7 @@ async def _stream(): network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), ) client = _httpx.AsyncClient( - transport=pool, + transport=_CoreTransport(pool), base_url=f"https://{cn}:{log_port}", timeout=_httpx.Timeout(connect=10.0, read=None, write=30.0, pool=10.0), ) diff --git a/log_prober.py b/log_prober.py index 5abcd02b..81f0c01c 100644 --- a/log_prober.py +++ b/log_prober.py @@ -26,27 +26,10 @@ async def check_instance_logging_server(instance: Instance) -> bool: "external_port" ] - # Build a TLS-aware client for the log port when instance has cacert. - if instance.cacert: - import httpcore as _httpcore - from api.instance.connection import _get_ssl_and_cn, _InstanceNetworkBackend - - ssl_ctx, cn = _get_ssl_and_cn(instance) - pool = _httpcore.AsyncConnectionPool( - ssl_context=ssl_ctx, - http2=True, - network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), - ) - client = _httpx.AsyncClient( - transport=pool, - base_url=f"https://{cn}:{log_port}", - timeout=_httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=10.0), - ) - else: - client = _httpx.AsyncClient( - base_url=f"http://{instance.host}:{log_port}", - timeout=_httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=10.0), - ) + client = _httpx.AsyncClient( + base_url=f"http://{instance.host}:{log_port}", + timeout=_httpx.Timeout(connect=10.0, read=10.0, write=10.0, pool=10.0), + ) try: headers, _ = miner_client.sign_request(instance.miner_hotkey, purpose="chutes") @@ -60,7 +43,7 @@ async def check_instance_logging_server(instance: Instance) -> bool: ) if not has_required_log: raise ValueError("No log entry with path '/tmp/_chute.log' found") - proto = "https" if instance.cacert else "http" + proto = "http" logger.success( f"✅ logging server running for {instance.instance_id=} of {instance.miner_hotkey=} for {instance.chute_id=} on {proto}://{instance.host}:{log_port}" ) From adcd17a64eb1ed589912cc340720a49acf0e1314 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 15:34:09 -0500 Subject: [PATCH 31/58] more mtls fixes --- api/instance/connection.py | 6 ++++-- api/instance/router.py | 10 +++++++++- api/instance/schemas.py | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/api/instance/connection.py b/api/instance/connection.py index d7a728a4..ed585876 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -31,10 +31,12 @@ def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: return _ssl_cache[iid] ctx = ssl.create_default_context() - ctx.load_verify_locations(cadata=instance.cacert) + extra = instance.extra or {} + # Use CA cert for chain verification when available, fall back to server cert. + ca_pem = extra.get("ca_cert") or instance.cacert + ctx.load_verify_locations(cadata=ca_pem) # Load mTLS client cert if available. - extra = instance.extra or {} client_cert_pem = extra.get("client_cert") client_key_pem = extra.get("client_key") client_key_password = extra.get("client_key_password") diff --git a/api/instance/router.py b/api/instance/router.py index 34845337..295df42d 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -1164,6 +1164,10 @@ async def _validate_launch_config_instance( extra_fields = { "e2e_pubkey": getattr(args, "e2e_pubkey", None), } + # Store CA cert for SSL verification (separate from server cert in cacert). + tls_ca_cert = getattr(args, "tls_ca_cert", None) + if tls_ca_cert: + extra_fields["ca_cert"] = tls_ca_cert # Store mTLS client cert + key for API-to-instance connections. tls_client_cert = getattr(args, "tls_client_cert", None) if tls_client_cert: @@ -2639,7 +2643,11 @@ async def _stream(): import httpcore as _httpcore if instance.cacert: - from api.instance.connection import _get_ssl_and_cn, _InstanceNetworkBackend, _CoreTransport + from api.instance.connection import ( + _get_ssl_and_cn, + _InstanceNetworkBackend, + _CoreTransport, + ) ssl_ctx, cn = _get_ssl_and_cn(instance) pool = _httpcore.AsyncConnectionPool( diff --git a/api/instance/schemas.py b/api/instance/schemas.py index 6d2e2687..34d0c04f 100644 --- a/api/instance/schemas.py +++ b/api/instance/schemas.py @@ -65,6 +65,7 @@ class LaunchConfigArgs(BaseModel): rint_pubkey: Optional[str] = None tls_cert: Optional[str] = None tls_cert_sig: Optional[str] = None + tls_ca_cert: Optional[str] = None tls_client_cert: Optional[str] = None tls_client_key: Optional[str] = None tls_client_key_password: Optional[str] = None From b0eb7a630385b1f2eaba79963eb225fd1964f3f0 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 16:16:35 -0500 Subject: [PATCH 32/58] fix stream --- api/instance/connection.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/api/instance/connection.py b/api/instance/connection.py index ed585876..f47a64e3 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -133,6 +133,20 @@ async def sleep(self, seconds): await self._backend.sleep(seconds) +class _AsyncStreamWrapper(httpx.AsyncByteStream): + """Wrap an httpcore async stream as an httpx AsyncByteStream.""" + + def __init__(self, core_stream): + self._stream = core_stream + + async def __aiter__(self): + async for chunk in self._stream: + yield chunk + + async def aclose(self): + await self._stream.aclose() + + class _CoreTransport(httpx.AsyncBaseTransport): """Wrap a raw httpcore pool so httpx <-> httpcore request mapping works. @@ -160,7 +174,7 @@ async def handle_async_request(self, request: httpx.Request) -> httpx.Response: return httpx.Response( status_code=core_response.status, headers=core_response.headers, - stream=core_response.stream, + stream=_AsyncStreamWrapper(core_response.stream), extensions=core_response.extensions, ) From 58b8d9d5c4e33c506a6a3f9f4ec4349cd106766b Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 16:23:44 -0500 Subject: [PATCH 33/58] fix cllmv session keys --- api/instance/router.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/instance/router.py b/api/instance/router.py index cf7a207e..d96dcbd1 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -1376,7 +1376,7 @@ async def _validate_launch_config_instance( ) if instance.extra is None: instance.extra = {} - instance.extra["cllmv_session_key"] = cllmv_session_key + instance.extra = {**instance.extra, "cllmv_session_key": cllmv_session_key} logger.info(f"CLLMV V2 session key decrypted for {instance.instance_id}") except HTTPException: raise @@ -1394,7 +1394,7 @@ async def _validate_launch_config_instance( if cllmv_session_key: if instance.extra is None: instance.extra = {} - instance.extra["cllmv_session_key"] = cllmv_session_key + instance.extra = {**instance.extra, "cllmv_session_key": cllmv_session_key} logger.info(f"CLLMV V2 session key decrypted for {instance.instance_id}") except Exception as exc: logger.warning(f"CLLMV V2 session key decryption error (pre-0.5.5): {exc}") From 4b8fdb5148251c348a7346e12c94882bf7b06bc5 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Fri, 20 Feb 2026 20:54:43 -0500 Subject: [PATCH 34/58] more cleanup --- api/metrics/util.py | 5 +++-- conn_prober.py | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/api/metrics/util.py b/api/metrics/util.py index 922aa4f2..ae3a4311 100644 --- a/api/metrics/util.py +++ b/api/metrics/util.py @@ -9,7 +9,7 @@ import asyncio from loguru import logger from api.config import settings -from api.instance.util import load_chute_target +from api.instance.util import load_chute_target, cleanup_instance_conn_tracking from api.miner_client import get as miner_get from api.metrics.capacity import track_capacity @@ -39,7 +39,8 @@ async def _reconcile_instance(chute_id: str, instance_id: str) -> bool: redis_client = settings.redis_client instance = await load_chute_target(instance_id) if not instance: - return False + await cleanup_instance_conn_tracking(chute_id, instance_id) + return True stats = await _query_conn_stats(instance) key = f"cc:{chute_id}:{instance_id}" diff --git a/conn_prober.py b/conn_prober.py index b383903d..89c49a9e 100644 --- a/conn_prober.py +++ b/conn_prober.py @@ -16,7 +16,7 @@ from api.chute.schemas import RollingUpdate, Chute from api.database import get_session from api.instance.schemas import Instance -from api.instance.util import invalidate_instance_cache +from api.instance.util import invalidate_instance_cache, cleanup_instance_conn_tracking from api.util import encrypt_instance_request, notify_deleted, semcomp from api.chute.util import get_one from watchtower import check_runint @@ -106,6 +106,7 @@ async def _hard_delete_instance(session, instance: Instance, reason: str) -> Non await notify_deleted(instance, message=reason) await invalidate_instance_cache(instance.chute_id, instance_id=instance.instance_id) await session.commit() + await cleanup_instance_conn_tracking(instance.chute_id, instance.instance_id) async def _record_failure_or_delete(session, instance: Instance, hard_reason: str | None) -> None: From ccb9b14cb89002fb2ad67b955ad6d9ae45d28c4b Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 03:59:31 -0500 Subject: [PATCH 35/58] client cert fixes --- api/instance/connection.py | 17 +++-------------- api/instance/router.py | 2 +- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/api/instance/connection.py b/api/instance/connection.py index f47a64e3..cf2420ac 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -7,8 +7,7 @@ from collections import OrderedDict from cryptography import x509 from cryptography.x509.oid import NameOID -from cryptography.hazmat.primitives import serialization -from cryptography.hazmat.primitives.serialization import load_pem_private_key + from api.util import semcomp _POOL_MAX = 2048 @@ -37,20 +36,10 @@ def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: ctx.load_verify_locations(cadata=ca_pem) # Load mTLS client cert if available. + # Client key is sent unencrypted (no passphrase) from the miner. client_cert_pem = extra.get("client_cert") client_key_pem = extra.get("client_key") - client_key_password = extra.get("client_key_password") if client_cert_pem and client_key_pem: - # Decrypt the client key and load into SSL context. - password_bytes = client_key_password.encode() if client_key_password else None - client_key = load_pem_private_key(client_key_pem.encode(), password=password_bytes) - # Re-serialize unencrypted (in memory only, never written to disk). - client_key_unencrypted = client_key.private_bytes( - encoding=serialization.Encoding.PEM, - format=serialization.PrivateFormat.PKCS8, - encryption_algorithm=serialization.NoEncryption(), - ) - # Write to temporary in-memory for ssl context (load_cert_chain requires files). import tempfile import os @@ -58,7 +47,7 @@ def _get_ssl_and_cn(instance) -> tuple[ssl.SSLContext, str]: cf.write(client_cert_pem.encode()) cert_tmp = cf.name with tempfile.NamedTemporaryFile(mode="wb", suffix=".pem", delete=False) as kf: - kf.write(client_key_unencrypted) + kf.write(client_key_pem.encode()) key_tmp = kf.name try: ctx.load_cert_chain(certfile=cert_tmp, keyfile=key_tmp) diff --git a/api/instance/router.py b/api/instance/router.py index d96dcbd1..b24d4cfd 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -1169,11 +1169,11 @@ async def _validate_launch_config_instance( if tls_ca_cert: extra_fields["ca_cert"] = tls_ca_cert # Store mTLS client cert + key for API-to-instance connections. + # Client key is unencrypted (no passphrase). tls_client_cert = getattr(args, "tls_client_cert", None) if tls_client_cert: extra_fields["client_cert"] = tls_client_cert extra_fields["client_key"] = getattr(args, "tls_client_key", None) - extra_fields["client_key_password"] = getattr(args, "tls_client_key_password", None) instance = Instance( instance_id=new_instance_id, From 929e3369b1eac0338f07bc3217ee08ba2bee7304 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 04:45:35 -0500 Subject: [PATCH 36/58] pkg_hash after BCM --- api/image/forge.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index 087da0b7..aa507bda 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -272,13 +272,6 @@ async def _capture_logs(stream, name, capture=True): RUN cp -f /tmp/chutesfs.index /etc/chutesfs.index && chmod a+r /etc/chutesfs.index USER chutes RUN CFSV_OP="${{CFSV_OP}}" /cfsv collect / /etc/chutesfs.index /tmp/chutesfs.data -""" - if semcomp(image.chutes_version, "0.5.3") >= 0 and image.name in ("sglang", "vllm"): - from api.user.service import chutes_user_id - - if image.user_id == await chutes_user_id(): - fsv_dockerfile_content += """ -RUN python -m cllmv.pkg_hash > /tmp/package_hashes.json """ # Generate bytecode manifest (V2) for chutes >= 0.5.5. @@ -298,6 +291,13 @@ async def _capture_logs(stream, name, capture=True): --json-output /tmp/bytecode.manifest.json \ --lib /tmp/chutes-bcm.so \ --extra-dirs /usr/local/lib/python3.12/site-packages +""" + if semcomp(image.chutes_version, "0.5.3") >= 0 and image.name in ("sglang", "vllm"): + from api.user.service import chutes_user_id + + if image.user_id == await chutes_user_id(): + fsv_dockerfile_content += """ +RUN CFSV_OP="${CFSV_OP}" python -m cllmv.pkg_hash > /tmp/package_hashes.json """ fsv_dockerfile_path = os.path.join(build_dir, "Dockerfile.fsv") @@ -1123,13 +1123,6 @@ async def _capture_logs(stream, name, capture=True): RUN cp -f /tmp/chutesfs.index /etc/chutesfs.index && chmod a+r /etc/chutesfs.index USER chutes RUN CFSV_OP="${{CFSV_OP}}" /cfsv collect / /etc/chutesfs.index /tmp/chutesfs.data -""" - if semcomp(chutes_version, "0.5.3") >= 0 and image.name in ("sglang", "vllm"): - from api.user.service import chutes_user_id - - if image.user_id == await chutes_user_id(): - fsv_dockerfile_content += """ -RUN python -m cllmv.pkg_hash > /tmp/package_hashes.json """ # Generate bytecode manifest (V2) for chutes >= 0.5.5. @@ -1151,6 +1144,14 @@ async def _capture_logs(stream, name, capture=True): --extra-dirs /usr/local/lib/python3.12/site-packages """ + if semcomp(chutes_version, "0.5.3") >= 0 and image.name in ("sglang", "vllm"): + from api.user.service import chutes_user_id + + if image.user_id == await chutes_user_id(): + fsv_dockerfile_content += """ +RUN CFSV_OP="${CFSV_OP}" python -m cllmv.pkg_hash > /tmp/package_hashes.json +""" + fsv_dockerfile_path = os.path.join(build_dir, "Dockerfile.fsv") with open(fsv_dockerfile_path, "w") as f: f.write(fsv_dockerfile_content) From a4f118fb6168becaf6f99bfc78efc2f26c9d9cb5 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 06:40:57 -0500 Subject: [PATCH 37/58] f --- api/image/forge.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index aa507bda..1d2fd672 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -264,7 +264,7 @@ async def _capture_logs(stream, name, capture=True): RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash USER root -RUN rm -f /etc/bytecode.manifest /tmp/chutesfs.index /etc/chutesfs.index /tmp/chutesfs.data +RUN rm -f /etc/ld.so.preload /etc/bytecode.manifest /tmp/chutesfs.index /etc/chutesfs.index /tmp/chutesfs.data USER chutes COPY cfsv /cfsv RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index @@ -364,7 +364,6 @@ async def _capture_logs(stream, name, capture=True): FROM {chutes_tag} COPY --from=fsv /etc/chutesfs.index /etc/chutesfs.index ENV PYTHONDONTWRITEBYTECODE=1 -ENTRYPOINT [] """ # Include bytecode manifest in final image if it was generated. if bytecode_manifest_path and os.path.exists(bytecode_manifest_path): @@ -372,6 +371,14 @@ async def _capture_logs(stream, name, capture=True): final_dockerfile_content += ( "COPY --from=fsv /tmp/bytecode.manifest /etc/bytecode.manifest\n" ) + if semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0: + final_dockerfile_content += ( + "USER root\n" + "RUN printf '/usr/local/lib/chutes-aegis.so\\\\n' > /etc/ld.so.preload && chmod 0644 /etc/ld.so.preload\n" + "USER chutes\n" + "ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so\n" + ) + final_dockerfile_content += "ENTRYPOINT []\n" final_dockerfile_path = os.path.join(build_dir, "Dockerfile.final") with open(final_dockerfile_path, "w") as f: f.write(final_dockerfile_content) @@ -1115,7 +1122,7 @@ async def _capture_logs(stream, name, capture=True): RUN rm -rf does_not_exist.py does_not_exist RUN PS_OP="${{PS_OP}}" chutes run does_not_exist:chute --generate-inspecto-hash > /tmp/inspecto.hash USER root -RUN rm -f /etc/bytecode.manifest /tmp/chutesfs.index /etc/chutesfs.index /tmp/chutesfs.data +RUN rm -f /etc/ld.so.preload /etc/bytecode.manifest /tmp/chutesfs.index /etc/chutesfs.index /tmp/chutesfs.data USER chutes COPY cfsv /cfsv RUN CFSV_OP="${{CFSV_OP}}" /cfsv index / /tmp/chutesfs.index @@ -1234,7 +1241,6 @@ async def _capture_logs(stream, name, capture=True): FROM {updated_tag} as base COPY --from=fsv /tmp/chutesfs.index /etc/chutesfs.index ENV PYTHONDONTWRITEBYTECODE=1 -ENTRYPOINT [] """ # Include bytecode manifest in final image if it was generated. if bytecode_manifest_path and os.path.exists(bytecode_manifest_path): @@ -1242,6 +1248,14 @@ async def _capture_logs(stream, name, capture=True): final_dockerfile_content += ( "COPY --from=fsv /tmp/bytecode.manifest /etc/bytecode.manifest\n" ) + if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: + final_dockerfile_content += ( + "USER root\n" + "RUN printf '/usr/local/lib/chutes-aegis.so\\\\n' > /etc/ld.so.preload && chmod 0644 /etc/ld.so.preload\n" + "USER chutes\n" + "ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so\n" + ) + final_dockerfile_content += "ENTRYPOINT []\n" final_dockerfile_path = os.path.join(build_dir, "Dockerfile.final") with open(final_dockerfile_path, "w") as f: f.write(final_dockerfile_content) From 170a2e14e20c52961579092619ae8d98f31f85e8 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 06:51:11 -0500 Subject: [PATCH 38/58] fix preload --- api/image/forge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index 1d2fd672..537370e7 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -374,7 +374,7 @@ async def _capture_logs(stream, name, capture=True): if semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0: final_dockerfile_content += ( "USER root\n" - "RUN printf '/usr/local/lib/chutes-aegis.so\\\\n' > /etc/ld.so.preload && chmod 0644 /etc/ld.so.preload\n" + "RUN echo '/usr/local/lib/chutes-aegis.so' > /etc/ld.so.preload\n" "USER chutes\n" "ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so\n" ) @@ -1251,7 +1251,7 @@ async def _capture_logs(stream, name, capture=True): if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: final_dockerfile_content += ( "USER root\n" - "RUN printf '/usr/local/lib/chutes-aegis.so\\\\n' > /etc/ld.so.preload && chmod 0644 /etc/ld.so.preload\n" + "RUN echo '/usr/local/lib/chutes-aegis.so' > /etc/ld.so.preload\n" "USER chutes\n" "ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so\n" ) From 4f8eddc59136f86df69128109c6b9b148081105d Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 08:10:17 -0500 Subject: [PATCH 39/58] deletion redis subs --- api/instance/util.py | 71 +++++++++++++++++++++++++++++++++++++++++++- api/main.py | 2 ++ 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/api/instance/util.py b/api/instance/util.py index a3c46e9e..dfc3f62c 100644 --- a/api/instance/util.py +++ b/api/instance/util.py @@ -320,6 +320,70 @@ async def clear_instance_disable_state(instance_id: str) -> None: MANAGERS = {} +async def remove_instance_from_manager(chute_id: str, instance_id: str): + """Remove a deleted instance from the local MANAGERS dict.""" + manager = MANAGERS.get(chute_id) + if manager: + async with manager.lock: + manager.instances.pop(instance_id, None) + + +async def start_instance_invalidation_listener(): + """ + Subscribe to Redis 'events' channel and invalidate local caches + when instances are deleted, so all API pods stay in sync. + """ + import redis.asyncio as aioredis + import orjson + + while True: + pubsub = None + try: + client = aioredis.Redis( + host=settings.redis_host, + port=settings.redis_port, + db=settings.redis_db, + password=settings.redis_password, + socket_connect_timeout=2.5, + socket_timeout=60, + socket_keepalive=True, + retry_on_timeout=True, + ) + pubsub = client.pubsub() + await pubsub.subscribe("events") + logger.info("Instance invalidation listener subscribed to 'events' channel") + + async for message in pubsub.listen(): + if message["type"] != "message": + continue + try: + data = orjson.loads(message["data"]) + if data.get("reason") != "instance_deleted": + continue + payload = data.get("data", {}) + chute_id = payload.get("chute_id") + instance_id = payload.get("instance_id") + if not chute_id or not instance_id: + continue + logger.info( + f"Pubsub: invalidating cache for deleted instance {instance_id} " + f"of chute {chute_id}" + ) + await invalidate_instance_cache(chute_id, instance_id=instance_id) + await remove_instance_from_manager(chute_id, instance_id) + except Exception as exc: + logger.warning(f"Error processing pubsub message: {exc}") + except Exception as exc: + logger.warning(f"Instance invalidation listener error: {exc}, reconnecting in 2s") + finally: + if pubsub: + try: + await pubsub.close() + except Exception: + pass + await asyncio.sleep(2) + + class LeastConnManager: def __init__( self, @@ -480,7 +544,12 @@ async def get_target(self, avoid=[], prefixes=None): break if not instance: - yield None, "infra_overload" + # Check if there are actually any active instances (bypass LRU cache). + real_ids = await load_chute_target_ids(self.chute_id, nonce=int(time.time())) + if not real_ids: + yield None, "No infrastructure available to serve request" + else: + yield None, "infra_overload" return key = f"cc:{self.chute_id}:{instance.instance_id}" diff --git a/api/main.py b/api/main.py index 3e0dfd74..a8cd3958 100644 --- a/api/main.py +++ b/api/main.py @@ -43,6 +43,7 @@ from api.database import Base, engine, get_session from api.config import settings from api.metrics.util import keep_gauges_fresh +from api.instance.util import start_instance_invalidation_listener async def loop_lag_monitor(interval: float = 0.1, warn_threshold: float = 0.2): @@ -103,6 +104,7 @@ async def lifespan(_: FastAPI): asyncio.create_task(loop_lag_monitor()) asyncio.create_task(keep_gauges_fresh()) + asyncio.create_task(start_instance_invalidation_listener()) # Prom multi-proc dir. os.makedirs("/tmp/prometheus_multiproc", exist_ok=True) From 59fd82b3beb51aec9b4f0084a8451f496a000d48 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 08:11:39 -0500 Subject: [PATCH 40/58] update bytecode manifest before pkg_hash --- api/image/forge.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/api/image/forge.py b/api/image/forge.py index 537370e7..d34be205 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -297,6 +297,9 @@ async def _capture_logs(stream, name, capture=True): if image.user_id == await chutes_user_id(): fsv_dockerfile_content += """ +USER root +RUN cp -f /tmp/bytecode.manifest /etc/bytecode.manifest || true +USER chutes RUN CFSV_OP="${CFSV_OP}" python -m cllmv.pkg_hash > /tmp/package_hashes.json """ @@ -1156,6 +1159,9 @@ async def _capture_logs(stream, name, capture=True): if image.user_id == await chutes_user_id(): fsv_dockerfile_content += """ +USER root +RUN cp -f /tmp/bytecode.manifest /etc/bytecode.manifest || true +USER chutes RUN CFSV_OP="${CFSV_OP}" python -m cllmv.pkg_hash > /tmp/package_hashes.json """ From b68d20616fc1f830908ac245241c400f536a412e Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 09:52:58 -0500 Subject: [PATCH 41/58] optimizations --- api/instance/util.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/api/instance/util.py b/api/instance/util.py index dfc3f62c..5ace0a90 100644 --- a/api/instance/util.py +++ b/api/instance/util.py @@ -153,6 +153,21 @@ async def is_instance_disabled(instance_id: str) -> bool: return disabled is not None +async def batch_check_disabled(instance_ids: list[str]) -> set[str]: + """Return the set of instance IDs that are currently disabled (single MGET).""" + if not instance_ids: + return set() + keys = [f"instance_disabled:{iid}" for iid in instance_ids] + try: + values = await settings.redis_client.client.mget(keys) + if not values: + return set() + return {iid for iid, v in zip(instance_ids, values) if v is not None} + except Exception as e: + logger.error(f"Error batch checking disabled instances: {e}") + return set() + + async def get_instance_disable_count(instance_id: str) -> int: count = await settings.redis_client.get(f"instance_disable_count:{instance_id}") if count is None: @@ -536,10 +551,11 @@ async def get_target(self, avoid=[], prefixes=None): yield None, "No infrastructure available to serve request" return - # Find first non-disabled instance (lazy check with caching) + # Find first non-disabled instance (single MGET instead of N GETs) + disabled_ids = await batch_check_disabled([t.instance_id for t in targets]) instance = None for candidate in targets: - if not await is_instance_disabled(candidate.instance_id): + if candidate.instance_id not in disabled_ids: instance = candidate break @@ -554,11 +570,10 @@ async def get_target(self, avoid=[], prefixes=None): key = f"cc:{self.chute_id}:{instance.instance_id}" try: - await asyncio.wait_for( - self.redis_client.client.incr(key), - timeout=3.0, - ) - await self.redis_client.expire(key, self.connection_expiry) + pipe = self.redis_client.client.pipeline() + pipe.incr(key) + pipe.expire(key, self.connection_expiry) + await asyncio.wait_for(pipe.execute(), timeout=3.0) except asyncio.TimeoutError: logger.warning( f"Timeout incrementing connection count for {instance.instance_id}, proceeding anyway" From ea989067e1981cfe74785a213bd96df48f2b63d2 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sat, 21 Feb 2026 12:23:52 -0500 Subject: [PATCH 42/58] timeout/socket death detection --- api/chute/util.py | 23 ++++++++++++++++++++++- api/instance/connection.py | 11 +++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/api/chute/util.py b/api/chute/util.py index 22219b34..2d81639f 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -5,6 +5,7 @@ import os import ctypes import httpx +import httpcore import asyncio import re import uuid @@ -792,7 +793,9 @@ async def _invoke_one( and chute.standard_template == "vllm" and plain_path.endswith("_stream") ): - # No timeouts for streaming LLM calls with newer chutes lib versions. + # No read timeout for streaming LLM calls — prefill on large prompts + # can legitimately take minutes. Dead connections are caught by TCP + # keepalive probes on the socket instead (see connection.py). timeout = None if semcomp(target.chutes_version or "0.0.0", "0.3.59") < 0: timeout = 600 @@ -1761,6 +1764,24 @@ async def invoke( return except Exception as exc: avoid.append(target.instance_id) + + # Evict cached connection on transport/connection errors so + # subsequent retries or requests don't reuse a dead socket. + if isinstance( + exc, + ( + httpx.NetworkError, + httpx.RemoteProtocolError, + httpcore.NetworkError, + httpcore.RemoteProtocolError, + ConnectionError, + OSError, + ), + ): + from api.instance.connection import evict_instance_ssl + + evict_instance_ssl(str(target.instance_id)) + error_message = f"{exc}\n{traceback.format_exc()}" error_message = error_message.replace( f"{target.host}:{target.port}", "[host redacted]" diff --git a/api/instance/connection.py b/api/instance/connection.py index cf2420ac..5e74bc8a 100644 --- a/api/instance/connection.py +++ b/api/instance/connection.py @@ -1,6 +1,7 @@ """Instance connection helpers — httpx + HTTP/2 with TLS cert verification.""" import ssl +import socket import asyncio import httpx import httpcore @@ -12,6 +13,15 @@ _POOL_MAX = 2048 +# Aggressive TCP keepalive: detect dead peers in ~40s. +# 15s idle before first probe, then probe every 5s, give up after 5 failures. +_KEEPALIVE_SOCK_OPTS = [ + (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), + (socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 15), + (socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 5), + (socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 5), +] + # LRU caches keyed by instance_id — oldest entries evicted when full. _ssl_cache: OrderedDict[str, tuple[ssl.SSLContext, str]] = OrderedDict() _client_cache: OrderedDict[str, httpx.AsyncClient] = OrderedDict() @@ -202,6 +212,7 @@ async def get_instance_client(instance, timeout: int = 600) -> tuple[httpx.Async ssl_context=ssl_ctx, http2=True, network_backend=_InstanceNetworkBackend(hostname=cn, ip=instance.host), + socket_options=_KEEPALIVE_SOCK_OPTS, ) client = httpx.AsyncClient( transport=_CoreTransport(pool), From cdbb97c70296a7248ce0133c175b9861ca1c7994 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 03:04:24 -0500 Subject: [PATCH 43/58] Sharded connection manager, other fixes. --- api/chute/util.py | 2 -- api/e2e/router.py | 38 ++++++++++++++------------------------ api/instance/util.py | 37 ++++++++++++++++++++++++++++++------- api/metrics/aema.py | 7 +++---- api/metrics/util.py | 19 +++++++++++++------ api/model_routing.py | 4 ++-- 6 files changed, 62 insertions(+), 45 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index 2d81639f..66494048 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -220,8 +220,6 @@ def cllmv_validate_v2( ) UNIFIED_INVOCATION_INSERT = text( f"""{BASE_UNIFIED_INVOCATION_INSERT} -ON CONFLICT (invocation_id, started_at) - DO UPDATE SET invocation_id = EXCLUDED.invocation_id {UNIFIED_INVOCATION_RV}""".format(table_name="invocations") ) diff --git a/api/e2e/router.py b/api/e2e/router.py index 794b1d1b..1fd72194 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -223,23 +223,16 @@ async def e2e_invoke( encrypt_instance_request, request_path.ljust(24, "?"), instance, True ) - # Connection tracking. + # Connection tracking (INCR/DECR, same as LeastConnManager.get_target). conn_id = str(uuid.uuid4()) manager = MANAGERS.get(chute_id) if manager: try: - key = f"conn:{chute_id}:{instance_id}" - await asyncio.wait_for( - manager.redis_client.eval( - manager.lua_add_connection, - 1, - key, - conn_id, - int(time.time()), - manager.connection_expiry, - ), - timeout=3.0, - ) + key = f"cc:{chute_id}:{instance_id}" + pipe = manager.redis_client.client.pipeline() + pipe.incr(key) + pipe.expire(key, manager.connection_expiry) + await asyncio.wait_for(pipe.execute(), timeout=3.0) except Exception as e: logger.warning(f"E2E: Error tracking connection: {e}") @@ -579,16 +572,13 @@ async def _cleanup(session, response, manager, chute_id, instance_id, conn_id, p pass if manager: try: - key = f"conn:{chute_id}:{instance_id}" - await asyncio.shield( - manager.redis_client.eval( - manager.lua_remove_connection, - 1, - key, - conn_id, - int(time.time()), - manager.connection_expiry, - ) - ) + key = f"cc:{chute_id}:{instance_id}" + + async def _decr(): + val = await manager.redis_client.client.decr(key) + if val < 0: + await manager.redis_client.client.set(key, 0, ex=manager.connection_expiry) + + await asyncio.shield(_decr()) except Exception as e: logger.warning(f"E2E: Error cleaning up connection {conn_id}: {e}") diff --git a/api/instance/util.py b/api/instance/util.py index 5ace0a90..8afbd040 100644 --- a/api/instance/util.py +++ b/api/instance/util.py @@ -185,13 +185,20 @@ def __init__(self, instance_id: str, miner_hotkey: str, chute_id: str, config_id self.config_id = config_id +def cm_redis_shard(chute_id: str): + """Get the sharded cm_redis client for a chute's connection counting. + Uses first 8 hex chars of the UUID for deterministic sharding + (Python's hash() is randomized per-process via PYTHONHASHSEED).""" + clients = settings.cm_redis_client + return clients[int(chute_id[:8], 16) % len(clients)] + + async def cleanup_instance_conn_tracking(chute_id: str, instance_id: str): """Remove a deleted instance from Redis connection tracking sets/keys.""" try: - pipe = settings.redis_client.client.pipeline() - pipe.srem(f"cc_inst:{chute_id}", instance_id) - pipe.delete(f"cc:{chute_id}:{instance_id}") - await pipe.execute() + # Enumeration key on primary redis. + await settings.redis_client.client.srem(f"cc_inst:{chute_id}", instance_id) + await cm_redis_shard(chute_id).delete(f"cc:{chute_id}:{instance_id}") except Exception as e: logger.warning(f"Failed to clean up connection tracking for {instance_id}: {e}") @@ -409,7 +416,8 @@ def __init__( ): self.concurrency = concurrency or 1 self.chute_id = chute_id - self.redis_client = settings.redis_client + # Shard connection counting across cm_redis backends. + self.redis_client = cm_redis_shard(chute_id) self.instances = {instance.instance_id: instance for instance in instances} self.connection_expiry = connection_expiry self.mean_count = None @@ -528,9 +536,10 @@ async def _handle_prefix_routing(self, counts, grouped_by_count, min_count, pref return result async def _track_active(self, instance_id: str): - """Fire-and-forget tracking of active chutes/instances for gauge enumeration.""" + """Fire-and-forget tracking of active chutes/instances for gauge enumeration. + Uses primary redis for enumeration keys (low-throughput metadata).""" try: - pipe = self.redis_client.client.pipeline() + pipe = settings.redis_client.client.pipeline() pipe.sadd("active_chutes", self.chute_id) pipe.expire("active_chutes", self.connection_expiry) pipe.sadd(f"cc_inst:{self.chute_id}", instance_id) @@ -542,6 +551,20 @@ async def _track_active(self, instance_id: str): @asynccontextmanager async def get_target(self, avoid=[], prefixes=None): + # Single-instance fast path: skip connection counting, just check disabled. + if len(self.instances) == 1: + instance = next(iter(self.instances.values())) + if instance.instance_id in avoid: + yield None, "No infrastructure available to serve request" + else: + disabled_ids = await batch_check_disabled([instance.instance_id]) + if instance.instance_id in disabled_ids: + yield None, "infra_overload" + else: + asyncio.create_task(self._track_active(instance.instance_id)) + yield instance, None + return + instance = None try: targets = await asyncio.wait_for( diff --git a/api/metrics/aema.py b/api/metrics/aema.py index fdde3330..a9c9a79b 100644 --- a/api/metrics/aema.py +++ b/api/metrics/aema.py @@ -3,9 +3,8 @@ """ import time -import uuid import redis -from api.config import settings +from api.instance.util import cm_redis_shard class AdaptiveEMA: @@ -98,7 +97,7 @@ async def update(self, key: str, new_value: float): """ Update the adaptive EMA metrics. """ - client = settings.cm_redis_client[uuid.UUID(key).int % len(settings.cm_redis_client)] + client = cm_redis_shard(key) await self._ensure_script(client) try: result = await client.evalsha( @@ -134,7 +133,7 @@ async def get_info(self, key: str): """ Get current state information. """ - client = settings.cm_redis_client[uuid.UUID(key).int % len(settings.cm_redis_client)] + client = cm_redis_shard(key) data = await client.hgetall(f"{self.key_prefix}:{key}") if not data: return None diff --git a/api/metrics/util.py b/api/metrics/util.py index ae3a4311..b5ebcae4 100644 --- a/api/metrics/util.py +++ b/api/metrics/util.py @@ -9,7 +9,7 @@ import asyncio from loguru import logger from api.config import settings -from api.instance.util import load_chute_target, cleanup_instance_conn_tracking +from api.instance.util import load_chute_target, cleanup_instance_conn_tracking, cm_redis_shard from api.miner_client import get as miner_get from api.metrics.capacity import track_capacity @@ -34,9 +34,14 @@ async def _query_conn_stats(instance) -> dict | None: return None +def _get_cm_redis(chute_id: str): + """Get the sharded cm_redis client for a chute's connection counting.""" + return cm_redis_shard(chute_id) + + async def _reconcile_instance(chute_id: str, instance_id: str) -> bool: """Reconcile a single instance. Returns True if corrected.""" - redis_client = settings.redis_client + cm_redis = _get_cm_redis(chute_id) instance = await load_chute_target(instance_id) if not instance: await cleanup_instance_conn_tracking(chute_id, instance_id) @@ -53,11 +58,11 @@ async def _reconcile_instance(chute_id: str, instance_id: str) -> bool: if in_flight is None: return False - current = await redis_client.get(key) + current = await cm_redis.get(key) current = int(current or 0) if current != in_flight: - await redis_client.set(key, in_flight, ex=CONNECTION_EXPIRY) + await cm_redis.set(key, in_flight, ex=CONNECTION_EXPIRY) return True return False @@ -111,7 +116,8 @@ async def bounded(coro): async def _refresh_gauges_once(): """ Read connection counts + concurrency from Redis and update prometheus gauges. - Pure Redis reads — no DB queries. Safe to run on every API replica. + Enumeration keys (active_chutes, cc_inst, cc_conc) on primary redis, + cc: counters on sharded cm_redis. No DB queries. """ redis_client = settings.redis_client chute_ids_raw = await redis_client.smembers("active_chutes") @@ -138,7 +144,8 @@ async def _refresh_gauges_once(): iid = raw_iid if isinstance(raw_iid, str) else raw_iid.decode() keys.append(f"cc:{chute_id}:{iid}") - values = await redis_client.mget(keys) + cm_redis = _get_cm_redis(chute_id) + values = await cm_redis.mget(keys) total_conns = sum(int(v or 0) for v in values) instance_count = len(keys) mean_conn = total_conns / instance_count if instance_count else 0 diff --git a/api/model_routing.py b/api/model_routing.py index e8e1bd85..0d02bf44 100644 --- a/api/model_routing.py +++ b/api/model_routing.py @@ -9,7 +9,7 @@ from api.chute.schemas import Chute from api.chute.util import get_one from api.database import get_session -from api.instance.util import load_chute_target_ids +from api.instance.util import load_chute_target_ids, cm_redis_shard from api.metrics.perf import otps_tracker, ptps_tracker from api.model_alias.schemas import ModelAlias @@ -90,7 +90,7 @@ async def check_chute_availability(chute_id: str) -> bool: keys = [ f"cc:{chute_id}:{iid.decode() if isinstance(iid, bytes) else iid}" for iid in instance_ids ] - values = await settings.redis_client.mget(keys) + values = await cm_redis_shard(chute_id).mget(keys) for v in values: if int(v or 0) < concurrency: return True From 85c84700710964a242d575f212314e471ff5352f Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 08:05:22 +0000 Subject: [PATCH 44/58] Dep updates --- pyproject.toml | 2 +- uv.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7590cb52..d4da6332 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-slugify[unidecode]>=8.0.4,<9.0.0", "async-lru>=2.0.5,<3.0.0", "aiodns>=3.6.0,<4.0.0", - "chutes==0.5.5rc16", + "chutes==0.5.5rc39", "python-socketio[asyncio-client]>=5.15.0,<6.0.0", "pillow>=12.0.0,<13.0.0", "aioboto3==15.5.0", diff --git a/uv.lock b/uv.lock index a0601d54..0aa7db22 100644 --- a/uv.lock +++ b/uv.lock @@ -749,7 +749,7 @@ wheels = [ [[package]] name = "chutes" -version = "0.5.5rc16" +version = "0.5.5rc39" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -778,7 +778,7 @@ dependencies = [ { name = "uvicorn" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/1f/9a/3659310acaf671449509b23695e56ca9515c0cdb1c79c3046e16cef0522a/chutes-0.5.5rc16-py3-none-any.whl", hash = "sha256:219ec114f90bbd60f44f784df429a4359244f577164c93d7d151551613969818", size = 8926653, upload-time = "2026-02-20T17:22:34.248Z" }, + { url = "https://files.pythonhosted.org/packages/fb/00/4c178f2a623359ae57c54f7a7a7838b131abcdf7fe72f7612103c093a89e/chutes-0.5.5rc39-py3-none-any.whl", hash = "sha256:d74bd6b06d8100e928d437812f276187ba53724d2f89b0e59550dbcf0e6b345a", size = 8956649, upload-time = "2026-02-21T18:56:17.327Z" }, ] [[package]] @@ -855,7 +855,7 @@ requires-dist = [ { name = "backoff", specifier = ">=2.2.1,<3.0.0" }, { name = "bittensor-drand", specifier = ">=1.2.0" }, { name = "bittensor-wallet", specifier = ">=4.0.1" }, - { name = "chutes", specifier = "==0.5.5rc16" }, + { name = "chutes", specifier = "==0.5.5rc39" }, { name = "dcap-qvl", specifier = "==0.3.12" }, { name = "dnslib", specifier = ">=0.9.26,<0.10.0" }, { name = "fastapi", specifier = ">=0.124.0,<0.125.0" }, From 6dd928c7d0d5d044a6f4b266751dcddb3ca75c78 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 03:09:14 -0500 Subject: [PATCH 45/58] Fixes --- api/metrics/aema.py | 149 ++++++++++++++------------------------------ api/metrics/perf.py | 10 ++- api/metrics/util.py | 2 + 3 files changed, 54 insertions(+), 107 deletions(-) diff --git a/api/metrics/aema.py b/api/metrics/aema.py index a9c9a79b..a36b39d8 100644 --- a/api/metrics/aema.py +++ b/api/metrics/aema.py @@ -1,9 +1,9 @@ """ Helper for redis-based adaptive exponential moving average tracking. +Uses plain HGETALL/HSET instead of lua scripts to avoid evalsha timeouts. """ import time -import redis from api.instance.util import cm_redis_shard @@ -21,113 +21,60 @@ def __init__( self.min_alpha = min_alpha self.max_alpha = max_alpha self.min_count = min_count - self.script_sha = None - self.script = """ - local key = KEYS[1] - local new_value = tonumber(ARGV[1]) - local current_time = tonumber(ARGV[2]) - local target_window = tonumber(ARGV[3]) - local min_alpha = tonumber(ARGV[4]) - local max_alpha = tonumber(ARGV[5]) - local min_count = tonumber(ARGV[6]) + async def update(self, key: str, new_value: float): + """ + Update the adaptive EMA metrics using plain HGETALL + HSET. + """ + client = cm_redis_shard(key) + redis_key = f"{self.key_prefix}:{key}" + current_time = time.time() - local data = redis.call('HMGET', key, 'ema', 'count', 'last_time', 'recent_rate') - local current_ema = data[1] - local current_count = data[2] - local last_time = data[3] - local recent_rate = data[4] + data = await client.hgetall(redis_key) - local new_ema, new_count, new_rate, alpha + if not data or b"ema" not in data: + new_ema = new_value + new_count = 1 + new_rate = 1.0 + alpha = self.max_alpha + else: + current_ema = float(data[b"ema"]) + current_count = int(data[b"count"]) + new_count = current_count + 1 - if not current_ema then - new_ema = new_value - new_count = 1 - new_rate = 1 - alpha = max_alpha - else - current_count = tonumber(current_count) - new_count = current_count + 1 - if last_time then - local time_diff = current_time - tonumber(last_time) - if time_diff > 0 then - local instant_rate = 1.0 / time_diff - if recent_rate then - new_rate = 0.1 * instant_rate + 0.9 * tonumber(recent_rate) - else - new_rate = instant_rate - end - else - new_rate = tonumber(recent_rate) or 1 - end - else - new_rate = 1 - end - local target_samples = new_rate * target_window - alpha = 1.0 / target_samples - if alpha < min_alpha then - alpha = min_alpha - elseif alpha > max_alpha then - alpha = max_alpha - end - new_ema = (alpha * new_value) + ((1 - alpha) * tonumber(current_ema)) - end - redis.call('HSET', key, - 'ema', new_ema, - 'count', new_count, - 'last_time', current_time, - 'recent_rate', new_rate, - 'last_alpha', alpha - ) - if new_count >= min_count then - return {new_ema, alpha, new_rate} - else - return {nil, alpha, new_rate} - end - """ + if b"last_time" in data: + time_diff = current_time - float(data[b"last_time"]) + if time_diff > 0: + instant_rate = 1.0 / time_diff + if b"recent_rate" in data: + new_rate = 0.1 * instant_rate + 0.9 * float(data[b"recent_rate"]) + else: + new_rate = instant_rate + else: + new_rate = float(data[b"recent_rate"]) if b"recent_rate" in data else 1.0 + else: + new_rate = 1.0 - async def _ensure_script(self, client): - """ - Load the lua script if it's not already loaded. - """ - if not hasattr(client, "_aema_script_sha"): - setattr(client, "_aema_script_sha", await client.script_load(self.script)) + target_samples = new_rate * self.target_window + alpha = 1.0 / target_samples + alpha = max(self.min_alpha, min(self.max_alpha, alpha)) + new_ema = (alpha * new_value) + ((1 - alpha) * current_ema) - async def update(self, key: str, new_value: float): - """ - Update the adaptive EMA metrics. - """ - client = cm_redis_shard(key) - await self._ensure_script(client) - try: - result = await client.evalsha( - client._aema_script_sha, - 1, - f"{self.key_prefix}:{key}", - new_value, - time.time(), - self.target_window, - self.min_alpha, - self.max_alpha, - self.min_count, - ) - except redis.NoScriptError: - client._aema_script_sha = await client.script_load(self.script) - result = await client.evalsha( - client._aema_script_sha, - 1, - f"{self.key_prefix}:{key}", - new_value, - time.time(), - self.target_window, - self.min_alpha, - self.max_alpha, - self.min_count, - ) - if result[0] is not None: - return (float(result[0]), float(result[1]), float(result[2])) + await client.hset( + redis_key, + mapping={ + "ema": new_ema, + "count": new_count, + "last_time": current_time, + "recent_rate": new_rate, + "last_alpha": alpha, + }, + ) + + if new_count >= self.min_count: + return (new_ema, alpha, new_rate) else: - return (None, float(result[1]), float(result[2])) + return (None, alpha, new_rate) async def get_info(self, key: str): """ diff --git a/api/metrics/perf.py b/api/metrics/perf.py index 06e4b961..5723b70b 100644 --- a/api/metrics/perf.py +++ b/api/metrics/perf.py @@ -91,9 +91,9 @@ async def update_invocation_metrics( try: ptps = metrics["it"] / metrics["ttft"] pema, _, _ = await ptps_tracker().update(chute_id, ptps) - except Exception: + except Exception as exc: logger.warning( - "Failed to update adaptive EMA for prompt processing TPS: {str(exc)}" + f"Failed to update adaptive EMA for prompt processing TPS: {exc}" ) # Completion tokens. @@ -101,10 +101,8 @@ async def update_invocation_metrics( try: otps = metrics["ot"] / (duration - metrics["ttft"]) oema, _, _ = await otps_tracker().update(chute_id, otps) - except Exception: - logger.warning( - "Failed to update adaptive EMA for completion TPS: {str(exc)}" - ) + except Exception as exc: + logger.warning(f"Failed to update adaptive EMA for completion TPS: {exc}") if pema and oema: updates.update( { diff --git a/api/metrics/util.py b/api/metrics/util.py index b5ebcae4..08892633 100644 --- a/api/metrics/util.py +++ b/api/metrics/util.py @@ -146,6 +146,8 @@ async def _refresh_gauges_once(): cm_redis = _get_cm_redis(chute_id) values = await cm_redis.mget(keys) + if values is None: + continue total_conns = sum(int(v or 0) for v in values) instance_count = len(keys) mean_conn = total_conns / instance_count if instance_count else 0 From 11461f2fb8582822b869f469900d00657bdb1a98 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 04:41:15 -0500 Subject: [PATCH 46/58] fix e2e sse --- api/e2e/router.py | 41 ++++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/api/e2e/router.py b/api/e2e/router.py index 1fd72194..192a8251 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -379,32 +379,26 @@ async def _stream_e2e_response( ): """ Stream E2E response chunks, extracting usage events for billing. + + E2E streaming chunks are SSE events sent in plaintext over the mTLS + tunnel (no per-chunk transport encryption). The events are already + E2E-encrypted by the instance, so we relay them directly and only + parse usage data for billing. """ metrics = {} chunk_count = 0 try: async for raw_chunk in response.aiter_lines(): - if not raw_chunk: - continue - # Transport-decrypt each chunk. - try: - decrypted = await asyncio.to_thread(decrypt_instance_response, raw_chunk, instance) - except Exception as exc: - logger.warning(f"E2E stream: transport decrypt failed: {exc}") - continue - - # Parse SSE lines to extract usage data for billing. - # Decrypted chunks are SSE lines like: - # data: {"e2e_init": "..."} - ML-KEM ciphertext - # data: {"e2e": "..."} - encrypted content - # data: {"usage": {...}} - plaintext usage for billing - # data: {"e2e_error": {...}} - error with encrypted message + # aiter_lines() strips newlines; relay every line (including + # empty ones) with a trailing \n to reconstruct the original + # SSE framing (data: {...}\n\n). chunk_str = ( - decrypted.decode("utf-8", errors="replace") - if isinstance(decrypted, bytes) - else decrypted + raw_chunk.decode("utf-8", errors="replace") + if isinstance(raw_chunk, bytes) + else raw_chunk ) + # Parse non-empty SSE data lines to extract usage for billing. if chunk_str.startswith("data: "): try: obj = json.loads(chunk_str[6:].encode()) @@ -418,17 +412,18 @@ async def _stream_e2e_response( except Exception: pass - # Periodic disconnect check (every 5 chunks). - chunk_count += 1 - if chunk_count % 5 == 0 and await request.is_disconnected(): + # Periodic disconnect check (every 5 data lines). + if chunk_str: + chunk_count += 1 + if chunk_count % 5 == 0 and chunk_count > 0 and await request.is_disconnected(): logger.info( f"E2E client disconnected mid-stream for {chute.name} {instance.instance_id=}" ) await response.aclose() break - # Relay the decrypted chunk to client as-is. - yield decrypted + # Relay the line with newline to preserve SSE framing. + yield f"{chunk_str}\n".encode() # Billing after stream completes. duration = time.time() - started_at From e07d2d6d9866d181bc4ad0789b060093f497f597 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 05:03:39 -0500 Subject: [PATCH 47/58] Subscription info headers. --- api/invocation/router.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/api/invocation/router.py b/api/invocation/router.py index 8819e750..af76a83a 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -93,6 +93,16 @@ def _derive_upstream_status(error: object) -> int | None: return None +def _quota_headers(request, base_headers=None): + headers = dict(base_headers or {}) + if getattr(request.state, "quota_total", None) is not None: + headers["X-Chutes-Quota-Total"] = str(int(request.state.quota_total)) + headers["X-Chutes-Quota-Used"] = str(int(request.state.quota_used)) + remaining = max(0, request.state.quota_total - request.state.quota_used) + headers["X-Chutes-Quota-Remaining"] = str(int(remaining)) + return headers + + @router.get("/usage") async def get_usage(request: Request): """ @@ -482,6 +492,10 @@ async def _invoke( # When within the quota, mark the invocation as "free" so no balance is deducted when finished. request.state.free_invocation = True + # Store quota info for response headers. + request.state.quota_total = quota + request.state.quota_used = request_count + # Identify the cord that we'll trying to access by the public API path and method. selected_cord = None request_body = None @@ -735,7 +749,7 @@ async def _stream_with_first_chunk(): return StreamingResponse( _stream_with_first_chunk(), media_type="text/event-stream", - headers={"X-Chutes-InvocationID": parent_invocation_id}, + headers=_quota_headers(request, {"X-Chutes-InvocationID": parent_invocation_id}), ) except HTTPException: @@ -777,22 +791,22 @@ async def _streamfile(): response = StreamingResponse( _streamfile(), media_type=result["content_type"], - headers={"X-Chutes-InvocationID": parent_invocation_id}, + headers=_quota_headers(request, {"X-Chutes-InvocationID": parent_invocation_id}), ) elif "text" in result: response = Response( content=result["text"], media_type=result["content_type"], - headers={"X-Chutes-InvocationID": parent_invocation_id}, + headers=_quota_headers(request, {"X-Chutes-InvocationID": parent_invocation_id}), ) else: response = Response( content=json.dumps(result.get("json", result)).decode(), media_type="application/json", - headers={ + headers=_quota_headers(request, { "Content-type": "application/json", "X-Chutes-InvocationID": parent_invocation_id, - }, + }), ) elif chunk.startswith('data: {"error"'): chunk_data = json.loads(chunk[6:]) From 03f8225335e0aa6560dbdd6d16a401df0db82a4b Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 06:00:55 -0500 Subject: [PATCH 48/58] More response headers, rate limit info, etc. --- api/invocation/router.py | 44 ++++++++++++++++++++++++++++++++++------ api/user/schemas.py | 5 ++++- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/api/invocation/router.py b/api/invocation/router.py index af76a83a..0bd99602 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -93,6 +93,9 @@ def _derive_upstream_status(error: object) -> int | None: return None +DEFAULT_RATE_LIMIT = 60 + + def _quota_headers(request, base_headers=None): headers = dict(base_headers or {}) if getattr(request.state, "quota_total", None) is not None: @@ -100,6 +103,14 @@ def _quota_headers(request, base_headers=None): headers["X-Chutes-Quota-Used"] = str(int(request.state.quota_used)) remaining = max(0, request.state.quota_total - request.state.quota_used) headers["X-Chutes-Quota-Remaining"] = str(int(remaining)) + rl_user = getattr(request.state, "rl_user", None) + rl_chute = getattr(request.state, "rl_chute", None) + if rl_user is not None: + headers["X-Chutes-RL-User"] = str(rl_user) if rl_user == "inf" else str(int(rl_user)) + if rl_chute is not None: + headers["X-Chutes-RL-Chute"] = str(int(rl_chute)) + if getattr(request.state, "invoice_billing", False): + headers["X-Chutes-Invoice-Billing"] = "true" return headers @@ -341,6 +352,20 @@ async def _invoke( status_code=status.HTTP_404_NOT_FOUND, detail="No matching chute found!" ) + # Resolve per-user rate limit headers. + request.state.invoice_billing = current_user.has_role(Permissioning.invoice_billing) + if not chute.public: + request.state.rl_user = "inf" + else: + overrides = current_user.rate_limit_overrides or {} + chute_rl = overrides.get(chute.chute_id) + global_rl = overrides.get("*") + if chute_rl is not None: + request.state.rl_chute = chute_rl + request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + else: + request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + # Check if the chute is disabled. if chute.disabled: raise HTTPException( @@ -791,22 +816,29 @@ async def _streamfile(): response = StreamingResponse( _streamfile(), media_type=result["content_type"], - headers=_quota_headers(request, {"X-Chutes-InvocationID": parent_invocation_id}), + headers=_quota_headers( + request, {"X-Chutes-InvocationID": parent_invocation_id} + ), ) elif "text" in result: response = Response( content=result["text"], media_type=result["content_type"], - headers=_quota_headers(request, {"X-Chutes-InvocationID": parent_invocation_id}), + headers=_quota_headers( + request, {"X-Chutes-InvocationID": parent_invocation_id} + ), ) else: response = Response( content=json.dumps(result.get("json", result)).decode(), media_type="application/json", - headers=_quota_headers(request, { - "Content-type": "application/json", - "X-Chutes-InvocationID": parent_invocation_id, - }), + headers=_quota_headers( + request, + { + "Content-type": "application/json", + "X-Chutes-InvocationID": parent_invocation_id, + }, + ), ) elif chunk.startswith('data: {"error"'): chunk_data = json.loads(chunk[6:]) diff --git a/api/user/schemas.py b/api/user/schemas.py index 7392a152..c9816fc5 100644 --- a/api/user/schemas.py +++ b/api/user/schemas.py @@ -19,7 +19,7 @@ select, case, ) -from sqlalchemy.dialects.postgresql import ARRAY +from sqlalchemy.dialects.postgresql import ARRAY, JSONB from sqlalchemy.orm import relationship, validates from api.database import Base import hashlib @@ -109,6 +109,9 @@ class User(Base): # Logo/avatar. logo_id = Column(String, ForeignKey("logos.logo_id", ondelete="SET NULL"), nullable=True) + # Per-user rate limit overrides (JSONB: {"*": N, "": M}). + rate_limit_overrides = Column(JSONB, nullable=True) + chutes = relationship("Chute", back_populates="user") images = relationship("Image", back_populates="user") api_keys = relationship("APIKey", back_populates="user", cascade="all, delete-orphan") From 88a807ec82e0602d3678bc36ec5ae5c4548494dc Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 06:04:02 -0500 Subject: [PATCH 49/58] Copy new headers to e2e path. --- api/e2e/router.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/api/e2e/router.py b/api/e2e/router.py index 192a8251..62996407 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -43,6 +43,8 @@ from api.rate_limit import rate_limit from api.gpu import COMPUTE_UNIT_PRICE_BASIS from api.user.service import chutes_user_id, subnet_role_accessible +from api.invocation.router import _quota_headers, DEFAULT_RATE_LIMIT +from api.permissions import Permissioning router = APIRouter() @@ -207,6 +209,20 @@ async def e2e_invoke( ): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") + # Resolve per-user rate limit headers. + request.state.invoice_billing = current_user.has_role(Permissioning.invoice_billing) + if not chute.public: + request.state.rl_user = "inf" + else: + overrides = current_user.rate_limit_overrides or {} + chute_rl = overrides.get(chute.chute_id) + global_rl = overrides.get("*") + if chute_rl is not None: + request.state.rl_chute = chute_rl + request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + else: + request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + # Read raw E2E blob from request body. e2e_blob = await request.body() if not e2e_blob: @@ -321,6 +337,7 @@ async def e2e_invoke( pooled, ), media_type="text/event-stream", + headers=_quota_headers(request), ) else: # Non-streaming: read full response, transport-decrypt, relay. @@ -349,7 +366,11 @@ async def e2e_invoke( ) asyncio.create_task(clear_instance_disable_state(instance.instance_id)) - return Response(content=decrypted, media_type="application/octet-stream") + return Response( + content=decrypted, + media_type="application/octet-stream", + headers=_quota_headers(request), + ) except HTTPException: raise From 66f52491123033a0d8e23301b713ab8fa29405d3 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 07:38:08 -0500 Subject: [PATCH 50/58] e2e quotas/billing/etc. --- api/e2e/router.py | 26 ++--- api/invocation/router.py | 208 ++++----------------------------------- api/invocation/util.py | 204 +++++++++++++++++++++++++++++++++++++- 3 files changed, 230 insertions(+), 208 deletions(-) diff --git a/api/e2e/router.py b/api/e2e/router.py index 62996407..1861b9d0 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -43,8 +43,11 @@ from api.rate_limit import rate_limit from api.gpu import COMPUTE_UNIT_PRICE_BASIS from api.user.service import chutes_user_id, subnet_role_accessible -from api.invocation.router import _quota_headers, DEFAULT_RATE_LIMIT -from api.permissions import Permissioning +from api.invocation.util import ( + resolve_rate_limit_headers, + build_response_headers, + check_quota_and_balance, +) router = APIRouter() @@ -209,19 +212,8 @@ async def e2e_invoke( ): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") - # Resolve per-user rate limit headers. - request.state.invoice_billing = current_user.has_role(Permissioning.invoice_billing) - if not chute.public: - request.state.rl_user = "inf" - else: - overrides = current_user.rate_limit_overrides or {} - chute_rl = overrides.get(chute.chute_id) - global_rl = overrides.get("*") - if chute_rl is not None: - request.state.rl_chute = chute_rl - request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT - else: - request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + resolve_rate_limit_headers(request, current_user, chute) + await check_quota_and_balance(request, current_user, chute) # Read raw E2E blob from request body. e2e_blob = await request.body() @@ -337,7 +329,7 @@ async def e2e_invoke( pooled, ), media_type="text/event-stream", - headers=_quota_headers(request), + headers=build_response_headers(request), ) else: # Non-streaming: read full response, transport-decrypt, relay. @@ -369,7 +361,7 @@ async def e2e_invoke( return Response( content=decrypted, media_type="application/octet-stream", - headers=_quota_headers(request), + headers=build_response_headers(request), ) except HTTPException: diff --git a/api/invocation/router.py b/api/invocation/router.py index 0bd99602..80972b4e 100644 --- a/api/invocation/router.py +++ b/api/invocation/router.py @@ -15,7 +15,7 @@ import traceback from loguru import logger from pydantic import BaseModel, ValidationError, Field -from datetime import date, datetime, timedelta, UTC +from datetime import date, datetime from io import BytesIO, StringIO from typing import Optional from fastapi import APIRouter, Depends, HTTPException, status, Request, Response @@ -30,18 +30,19 @@ is_shared, count_prompt_tokens, ) -from api.util import ( - recreate_vlm_payload, - has_legacy_private_billing, -) -from api.user.schemas import User, InvocationQuota -from api.user.service import get_current_user, chutes_user_id, subnet_role_accessible +from api.util import recreate_vlm_payload +from api.user.schemas import User +from api.user.service import get_current_user, subnet_role_accessible from api.report.schemas import Report, ReportArgs from api.database import get_db_session, get_session, get_inv_session, get_db_ro_session from api.instance.util import get_chute_target_manager -from api.invocation.util import get_prompt_prefix_hashes, get_sponsored_chute_ids +from api.invocation.util import ( + get_prompt_prefix_hashes, + resolve_rate_limit_headers, + build_response_headers, + check_quota_and_balance, +) from api.util import validate_tool_call_arguments -from api.permissions import Permissioning router = APIRouter() host_invocation_router = APIRouter() @@ -65,10 +66,6 @@ class Config: extra = "forbid" -async def initialize_quota_cache(cache_key: str) -> None: - await settings.redis_client.incrbyfloat(cache_key, 0.0) - - def _derive_upstream_status(error: object) -> int | None: """ Map upstream error payloads to HTTP statuses used by retry/failover logic. @@ -93,27 +90,6 @@ def _derive_upstream_status(error: object) -> int | None: return None -DEFAULT_RATE_LIMIT = 60 - - -def _quota_headers(request, base_headers=None): - headers = dict(base_headers or {}) - if getattr(request.state, "quota_total", None) is not None: - headers["X-Chutes-Quota-Total"] = str(int(request.state.quota_total)) - headers["X-Chutes-Quota-Used"] = str(int(request.state.quota_used)) - remaining = max(0, request.state.quota_total - request.state.quota_used) - headers["X-Chutes-Quota-Remaining"] = str(int(remaining)) - rl_user = getattr(request.state, "rl_user", None) - rl_chute = getattr(request.state, "rl_chute", None) - if rl_user is not None: - headers["X-Chutes-RL-User"] = str(rl_user) if rl_user == "inf" else str(int(rl_user)) - if rl_chute is not None: - headers["X-Chutes-RL-Chute"] = str(int(rl_chute)) - if getattr(request.state, "invoice_billing", False): - headers["X-Chutes-Invoice-Billing"] = "true" - return headers - - @router.get("/usage") async def get_usage(request: Request): """ @@ -352,19 +328,7 @@ async def _invoke( status_code=status.HTTP_404_NOT_FOUND, detail="No matching chute found!" ) - # Resolve per-user rate limit headers. - request.state.invoice_billing = current_user.has_role(Permissioning.invoice_billing) - if not chute.public: - request.state.rl_user = "inf" - else: - overrides = current_user.rate_limit_overrides or {} - chute_rl = overrides.get(chute.chute_id) - global_rl = overrides.get("*") - if chute_rl is not None: - request.state.rl_chute = chute_rl - request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT - else: - request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + resolve_rate_limit_headers(request, current_user, chute) # Check if the chute is disabled. if chute.disabled: @@ -381,145 +345,7 @@ async def _invoke( detail="This chute does not have TEE enabled. Use the /teeify endpoint to promote the chute to TEE, or remove the X-TEE-Only header.", ) - quota_date = date.today() - if chute.discount == 1.0: - request.state.free_invocation = True - - # Limit free model usage independently of quota. - if current_user.permissions_bitmask == 0: - effective_balance = ( - current_user.current_balance.effective_balance - if current_user.current_balance - else 0.0 - ) - unlimited = False - if effective_balance >= 10: - unlimited = True - else: - quota = await InvocationQuota.get(current_user.user_id, "__anychute__") - if quota > 2000: - unlimited = True - if not unlimited: - free_usage = 0 - try: - qkey = f"free_usage:{quota_date}:{current_user.user_id}" - free_usage = await settings.redis_client.incr(qkey) - if free_usage <= 3: - tomorrow = datetime.combine(quota_date, datetime.min.time()) + timedelta( - days=1 - ) - exp = max(int((tomorrow - datetime.now()).total_seconds()), 1) # noqa - except Exception as exc: - logger.warning( - f"Error checking free usage for {current_user.user_id=}: {str(exc)}" - ) - if free_usage > 100: - logger.warning( - f"{current_user.user_id=} {current_user.username=} has hit daily free limit: {chute.name=} {effective_balance=}" - ) - raise HTTPException( - status_code=status.HTTP_429_TOO_MANY_REQUESTS, - detail="Free models limit reached for today - maintain >= $10 balance or upgrade subscription to pro to unlock more.", - ) - elif current_user.user_id == settings.or_free_user_id: - sponsored_chutes = await get_sponsored_chute_ids(current_user.user_id) - if chute.chute_id not in sponsored_chutes: - logger.warning( - f"Attempt to invoke {chute.chute_id=} {chute.name=} from openrouter free account." - ) - raise HTTPException( - status_code=status.HTTP_429_TOO_MANY_REQUESTS, - detail="Invalid free model, please select from the updated list of current chutes free models", - ) - - # Check account balance. - origin_ip = request.headers.get("x-forwarded-for", "").split(",")[0] - - # Prevent calling private chutes when the owner has no balance. - if ( - not chute.public - and not has_legacy_private_billing(chute) - and chute.user_id != await chutes_user_id() - ): - owner_balance = ( - chute.user.current_balance.effective_balance if chute.user.current_balance else 0.0 - ) - if owner_balance <= 0: - logger.warning( - f"Preventing execution of chute {chute.name=} {chute.chute_id=}, " - f"creator has insufficient balance {owner_balance=}" - ) - raise HTTPException( - status_code=status.HTTP_402_PAYMENT_REQUIRED, - detail="Chute unavailable because the creator of this chute {chute.user_id=} has zero balance.", - ) - request.state.free_invocation = True - - # Check account quotas if not free/invoiced. - quota_date = date.today() - if not ( - current_user.has_role(Permissioning.free_account) - or current_user.has_role(Permissioning.invoice_billing) - or request.state.free_invocation - ): - quota = await InvocationQuota.get(current_user.user_id, chute.chute_id) - key = await InvocationQuota.quota_key(current_user.user_id, chute.chute_id) - client_success, cached = await settings.redis_client.get_with_status(key) - request_count = 0.0 - if cached is not None: - try: - request_count = float(cached.decode()) - except ValueError: - await settings.redis_client.delete(key) - elif client_success: - asyncio.create_task(initialize_quota_cache(key)) - - # No quota for private/user-created chutes. - effective_balance = ( - current_user.current_balance.effective_balance if current_user.current_balance else 0.0 - ) - if ( - not chute.public - and not has_legacy_private_billing(chute) - and chute.user_id != await chutes_user_id() - ): - quota = 0 - - # Automatically switch to paygo when the quota is exceeded. - if request_count >= quota: - if effective_balance <= 0 and not request.state.free_invocation: - logger.warning( - f"Payment required: attempted invocation of {chute.name} " - f"from user {current_user.username} [{origin_ip}] with no balance " - f"and {request_count=} of {quota=}" - ) - error_kwargs = { - "status_code": status.HTTP_402_PAYMENT_REQUIRED, - "detail": { - "message": ( - f"Quota exceeded and account balance is ${current_user.current_balance.effective_balance}, " - f"please pay with fiat or send tao to {current_user.payment_address}" - ), - }, - } - if quota: - quota_reset = quota_date + timedelta(days=1) - quota_reset = quota_reset = datetime( - year=quota_reset.year, - month=quota_reset.month, - day=quota_reset.day, - tzinfo=UTC, - ).isoformat() - error_kwargs["detail"]["quota_reset_timestamp"] = quota_reset - - raise HTTPException(**error_kwargs) - else: - # When within the quota, mark the invocation as "free" so no balance is deducted when finished. - request.state.free_invocation = True - - # Store quota info for response headers. - request.state.quota_total = quota - request.state.quota_used = request_count + await check_quota_and_balance(request, current_user, chute) # Identify the cord that we'll trying to access by the public API path and method. selected_cord = None @@ -774,7 +600,9 @@ async def _stream_with_first_chunk(): return StreamingResponse( _stream_with_first_chunk(), media_type="text/event-stream", - headers=_quota_headers(request, {"X-Chutes-InvocationID": parent_invocation_id}), + headers=build_response_headers( + request, {"X-Chutes-InvocationID": parent_invocation_id} + ), ) except HTTPException: @@ -816,7 +644,7 @@ async def _streamfile(): response = StreamingResponse( _streamfile(), media_type=result["content_type"], - headers=_quota_headers( + headers=build_response_headers( request, {"X-Chutes-InvocationID": parent_invocation_id} ), ) @@ -824,7 +652,7 @@ async def _streamfile(): response = Response( content=result["text"], media_type=result["content_type"], - headers=_quota_headers( + headers=build_response_headers( request, {"X-Chutes-InvocationID": parent_invocation_id} ), ) @@ -832,7 +660,7 @@ async def _streamfile(): response = Response( content=json.dumps(result.get("json", result)).decode(), media_type="application/json", - headers=_quota_headers( + headers=build_response_headers( request, { "Content-type": "application/json", diff --git a/api/invocation/util.py b/api/invocation/util.py index 5c67e169..59ce7cf9 100644 --- a/api/invocation/util.py +++ b/api/invocation/util.py @@ -3,17 +3,21 @@ """ import os +import asyncio import hashlib import aiohttp import orjson as json -from datetime import datetime, timezone +from datetime import date, datetime, timedelta, timezone from typing import Dict from async_lru import alru_cache from loguru import logger +from fastapi import HTTPException, status from api.gpu import COMPUTE_UNIT_PRICE_BASIS from api.config import settings from api.database import get_session, get_inv_session from api.chute.schemas import NodeSelector +from api.permissions import Permissioning +from api.util import has_legacy_private_billing from sqlalchemy import text TOKEN_METRICS_QUERY = """ @@ -263,3 +267,201 @@ async def generate_invocation_history_metrics(): await session.execute(text("TRUNCATE TABLE diffusion_metrics RESTART IDENTITY")) await session.execute(text(TOKEN_METRICS_QUERY)) await session.execute(text(DIFFUSION_METRICS_QUERY)) + + +DEFAULT_RATE_LIMIT = 60 + + +async def _initialize_quota_cache(cache_key: str) -> None: + await settings.redis_client.incrbyfloat(cache_key, 0.0) + + +def resolve_rate_limit_headers(request, current_user, chute): + """ + Set rate limit and invoice billing headers on request.state. + """ + request.state.invoice_billing = current_user.has_role(Permissioning.invoice_billing) + if not chute.public: + request.state.rl_user = "inf" + else: + overrides = current_user.rate_limit_overrides or {} + chute_rl = overrides.get(chute.chute_id) + global_rl = overrides.get("*") + if chute_rl is not None: + request.state.rl_chute = chute_rl + request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + else: + request.state.rl_user = global_rl if global_rl is not None else DEFAULT_RATE_LIMIT + + +def build_response_headers(request, base_headers=None): + """ + Build response headers dict with quota, rate limit, and invoice billing info. + """ + headers = dict(base_headers or {}) + if getattr(request.state, "quota_total", None) is not None: + headers["X-Chutes-Quota-Total"] = str(int(request.state.quota_total)) + headers["X-Chutes-Quota-Used"] = str(int(request.state.quota_used)) + remaining = max(0, request.state.quota_total - request.state.quota_used) + headers["X-Chutes-Quota-Remaining"] = str(int(remaining)) + rl_user = getattr(request.state, "rl_user", None) + rl_chute = getattr(request.state, "rl_chute", None) + if rl_user is not None: + headers["X-Chutes-RL-User"] = str(rl_user) if rl_user == "inf" else str(int(rl_user)) + if rl_chute is not None: + headers["X-Chutes-RL-Chute"] = str(int(rl_chute)) + if getattr(request.state, "invoice_billing", False): + headers["X-Chutes-Invoice-Billing"] = "true" + return headers + + +async def check_quota_and_balance(request, current_user, chute): + """ + Enforce free-model limits, private chute owner balance, and subscriber + invocation quotas. Sets request.state.free_invocation and quota state + used by build_response_headers(). + + Must be called AFTER resolve_rate_limit_headers(). + """ + from api.user.schemas import InvocationQuota + from api.user.service import chutes_user_id + + quota_date = date.today() + + # Fully discounted chutes are free but have usage caps for unprivileged users. + if chute.discount == 1.0: + request.state.free_invocation = True + + if current_user.permissions_bitmask == 0: + effective_balance = ( + current_user.current_balance.effective_balance + if current_user.current_balance + else 0.0 + ) + unlimited = False + if effective_balance >= 10: + unlimited = True + else: + quota = await InvocationQuota.get(current_user.user_id, "__anychute__") + if quota > 2000: + unlimited = True + if not unlimited: + free_usage = 0 + try: + qkey = f"free_usage:{quota_date}:{current_user.user_id}" + free_usage = await settings.redis_client.incr(qkey) + if free_usage <= 3: + tomorrow = datetime.combine(quota_date, datetime.min.time()) + timedelta( + days=1 + ) + exp = max(int((tomorrow - datetime.now()).total_seconds()), 1) # noqa + except Exception as exc: + logger.warning( + f"Error checking free usage for {current_user.user_id=}: {str(exc)}" + ) + if free_usage > 100: + logger.warning( + f"{current_user.user_id=} {current_user.username=} has hit daily free limit: {chute.name=} {effective_balance=}" + ) + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail="Free models limit reached for today - maintain >= $10 balance or upgrade subscription to pro to unlock more.", + ) + + elif current_user.user_id == settings.or_free_user_id: + sponsored_chutes = await get_sponsored_chute_ids(current_user.user_id) + if chute.chute_id not in sponsored_chutes: + logger.warning( + f"Attempt to invoke {chute.chute_id=} {chute.name=} from openrouter free account." + ) + raise HTTPException( + status_code=status.HTTP_429_TOO_MANY_REQUESTS, + detail="Invalid free model, please select from the updated list of current chutes free models", + ) + + # Prevent calling private chutes when the owner has no balance. + origin_ip = request.headers.get("x-forwarded-for", "").split(",")[0] + if ( + not chute.public + and not has_legacy_private_billing(chute) + and chute.user_id != await chutes_user_id() + ): + owner_balance = ( + chute.user.current_balance.effective_balance if chute.user.current_balance else 0.0 + ) + if owner_balance <= 0: + logger.warning( + f"Preventing execution of chute {chute.name=} {chute.chute_id=}, " + f"creator has insufficient balance {owner_balance=}" + ) + raise HTTPException( + status_code=status.HTTP_402_PAYMENT_REQUIRED, + detail="Chute unavailable because the creator of this chute {chute.user_id=} has zero balance.", + ) + request.state.free_invocation = True + + # Check account quotas if not free/invoiced. + quota_date = date.today() + if not ( + current_user.has_role(Permissioning.free_account) + or current_user.has_role(Permissioning.invoice_billing) + or request.state.free_invocation + ): + quota = await InvocationQuota.get(current_user.user_id, chute.chute_id) + key = await InvocationQuota.quota_key(current_user.user_id, chute.chute_id) + client_success, cached = await settings.redis_client.get_with_status(key) + request_count = 0.0 + if cached is not None: + try: + request_count = float(cached.decode()) + except ValueError: + await settings.redis_client.delete(key) + elif client_success: + asyncio.create_task(_initialize_quota_cache(key)) + + # No quota for private/user-created chutes. + effective_balance = ( + current_user.current_balance.effective_balance if current_user.current_balance else 0.0 + ) + if ( + not chute.public + and not has_legacy_private_billing(chute) + and chute.user_id != await chutes_user_id() + ): + quota = 0 + + # Automatically switch to paygo when the quota is exceeded. + if request_count >= quota: + if effective_balance <= 0 and not request.state.free_invocation: + logger.warning( + f"Payment required: attempted invocation of {chute.name} " + f"from user {current_user.username} [{origin_ip}] with no balance " + f"and {request_count=} of {quota=}" + ) + error_kwargs = { + "status_code": status.HTTP_402_PAYMENT_REQUIRED, + "detail": { + "message": ( + f"Quota exceeded and account balance is ${current_user.current_balance.effective_balance}, " + f"please pay with fiat or send tao to {current_user.payment_address}" + ), + }, + } + if quota: + quota_reset = quota_date + timedelta(days=1) + quota_reset = datetime( + year=quota_reset.year, + month=quota_reset.month, + day=quota_reset.day, + tzinfo=timezone.utc, + ).isoformat() + error_kwargs["detail"]["quota_reset_timestamp"] = quota_reset + + raise HTTPException(**error_kwargs) + else: + # When within the quota, mark the invocation as "free" so no balance is deducted when finished. + request.state.free_invocation = True + + # Store quota info for response headers. + request.state.quota_total = quota + request.state.quota_used = request_count From 6396133843f56cc7aa31ccff83a8b826e3f9025e Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 07:50:16 -0500 Subject: [PATCH 51/58] quota usage fixes, more e2e updates to match normal invocation path, etc. --- api/e2e/router.py | 79 ++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/api/e2e/router.py b/api/e2e/router.py index 1861b9d0..2197b7d9 100644 --- a/api/e2e/router.py +++ b/api/e2e/router.py @@ -16,7 +16,7 @@ from fastapi.responses import StreamingResponse, Response from api.config import settings from api.user.service import get_current_user -from api.user.schemas import User +from api.user.schemas import User, PriceOverride, InvocationDiscount, InvocationQuota from api.chute.util import ( get_one, is_shared, @@ -24,6 +24,7 @@ get_mtoken_price, update_usage_data, safe_store_invocation, + selector_hourly_price, ) from api.chute.schemas import NodeSelector from api.instance.util import ( @@ -42,12 +43,14 @@ from api.miner_client import sign_request from api.rate_limit import rate_limit from api.gpu import COMPUTE_UNIT_PRICE_BASIS +from api.constants import DIFFUSION_PRICE_MULT_PER_STEP from api.user.service import chutes_user_id, subnet_role_accessible from api.invocation.util import ( resolve_rate_limit_headers, build_response_headers, check_quota_and_balance, ) +from api.metrics.capacity import track_request_completed, track_capacity router = APIRouter() @@ -211,6 +214,11 @@ async def e2e_invoke( or subnet_role_accessible(chute, current_user) ): raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Chute not found") + if chute.disabled: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="This chute is currently disabled.", + ) resolve_rate_limit_headers(request, current_user, chute) await check_quota_and_balance(request, current_user, chute) @@ -350,6 +358,7 @@ async def e2e_invoke( invocation_id, parent_invocation_id, request, + manager, ) # Cleanup. @@ -452,6 +461,7 @@ async def _stream_e2e_response( invocation_id, parent_invocation_id, request, + manager, ) # Clear failure tracking on success. @@ -480,15 +490,19 @@ async def _do_billing( invocation_id, parent_invocation_id, request, + manager=None, ): """ Handle billing for an E2E invocation. """ user_id = user.user_id balance_used = 0.0 + override_applied = False free_invocation = getattr(request.state, "free_invocation", False) if compute_units and not free_invocation: + hourly_price = await selector_hourly_price(chute.node_selector) + # Per megatoken pricing for vLLM chutes. if chute.standard_template == "vllm" and metrics and metrics.get("it"): per_million_in, per_million_out, cache_discount = await get_mtoken_price( @@ -502,14 +516,40 @@ async def _do_billing( - cached_tokens / 1000000.0 * per_million_in * cache_discount + output_tokens / 1000000.0 * per_million_out ) - else: - # Time-based pricing. + override_applied = True + + elif (price_override := await PriceOverride.get(user_id, chute.chute_id)) is not None: + if chute.standard_template == "diffusion" and price_override.per_step is not None: + balance_used = (metrics.get("steps", 0) or 0) * price_override.per_step + override_applied = True + elif price_override.per_request is not None: + balance_used = price_override.per_request + override_applied = True + + # If no override was applied, use standard pricing. + if not override_applied: discount = 0.0 if chute.discount and -3 < chute.discount <= 1: discount = chute.discount if discount < 1.0: - balance_used = compute_units * COMPUTE_UNIT_PRICE_BASIS / 3600.0 - balance_used -= balance_used * discount + if chute.standard_template == "diffusion": + balance_used = ( + (metrics.get("steps", 0) or 0) + * hourly_price + * DIFFUSION_PRICE_MULT_PER_STEP + ) + balance_used -= balance_used * discount + + default_balance_used = compute_units * COMPUTE_UNIT_PRICE_BASIS / 3600.0 + default_balance_used -= default_balance_used * discount + if not balance_used: + balance_used = default_balance_used + + # User discounts. + if balance_used and not override_applied: + user_discount = await InvocationDiscount.get(user_id, chute.chute_id) + if user_discount: + balance_used -= balance_used * user_discount # Don't charge for private instances. if ( @@ -555,6 +595,35 @@ async def _do_billing( ) ) + # Increment quota usage value. + if ( + free_invocation + and chute.discount < 1.0 + and ( + chute.public + or has_legacy_private_billing(chute) + or chute.user_id == await chutes_user_id() + ) + ): + key = await InvocationQuota.quota_key(user.user_id, chute.chute_id) + asyncio.create_task(settings.redis_client.incrbyfloat(key, 1.0)) + + # Prometheus metrics. + track_request_completed(chute.chute_id) + if manager and hasattr(manager, "mean_count"): + try: + instance_util = getattr(manager, "_last_instance_utilization", None) + if instance_util is not None: + instance_util = float(instance_util) + await track_capacity( + chute.chute_id, + manager.mean_count or 0, + chute.concurrency or 1, + instance_utilization=instance_util, + ) + except Exception: + pass + # Push back instance shutdown for private chutes. if ( not chute.public From e858ad81a8f79646e0b6de787e6718f677203a1b Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 08:03:59 -0500 Subject: [PATCH 52/58] Forge fixes --- api/image/forge.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/api/image/forge.py b/api/image/forge.py index d34be205..ea83e990 100644 --- a/api/image/forge.py +++ b/api/image/forge.py @@ -377,7 +377,7 @@ async def _capture_logs(stream, name, capture=True): if semcomp(image.chutes_version or "0.0.0", "0.5.5") >= 0: final_dockerfile_content += ( "USER root\n" - "RUN echo '/usr/local/lib/chutes-aegis.so' > /etc/ld.so.preload\n" + "RUN echo '/usr/local/lib/chutes-aegis.so' > /etc/ld.so.preload && chmod 0644 /etc/ld.so.preload\n" "USER chutes\n" "ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so\n" ) @@ -1051,7 +1051,7 @@ async def _capture_logs(stream, name, capture=True): dockerfile_content = f"""FROM {full_source_tag} USER root ENV LD_PRELOAD="" -RUN rm -f /etc/chutesfs.index +RUN rm -f /etc/chutesfs.index /usr/bin/cautious-launcher /etc/ld.so.preload RUN usermod -aG root chutes || true RUN chmod g+rwx /usr/local/lib /usr/local/bin /usr/local/share /usr/local/share/man RUN chmod g+rwx /usr/local/lib/python3.12/dist-packages || true @@ -1257,7 +1257,7 @@ async def _capture_logs(stream, name, capture=True): if semcomp(chutes_version or "0.0.0", "0.5.5") >= 0: final_dockerfile_content += ( "USER root\n" - "RUN echo '/usr/local/lib/chutes-aegis.so' > /etc/ld.so.preload\n" + "RUN echo '/usr/local/lib/chutes-aegis.so' > /etc/ld.so.preload && chmod 0644 /etc/ld.so.preload\n" "USER chutes\n" "ENV LD_PRELOAD=/usr/local/lib/chutes-aegis.so\n" ) From 5ed449cad824138a49c7ef49e9472b810af5a539 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 11:10:26 -0500 Subject: [PATCH 53/58] Example e2e client script. --- scripts/test_e2e_client.py | 243 +++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 scripts/test_e2e_client.py diff --git a/scripts/test_e2e_client.py b/scripts/test_e2e_client.py new file mode 100644 index 00000000..538caa40 --- /dev/null +++ b/scripts/test_e2e_client.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +""" +E2E encrypted client test — ML-KEM-768 + HKDF + ChaCha20-Poly1305. + +Usage: CHUTES_API_KEY=... python scripts/test_e2e_client.py +""" + +import base64 +import gzip +import json +import os +import sys + +import httpx +from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305 +from cryptography.hazmat.primitives.kdf.hkdf import HKDF +from cryptography.hazmat.primitives import hashes +from loguru import logger +from pqcrypto.kem.ml_kem_768 import generate_keypair, encrypt, decrypt + +API_BASE = "https://api.chutes.dev" +CHUTE_ID = "edd0810e-51f8-5119-b480-09c950812833" +MODEL = "unsloth/Llama-3.2-1B-Instruct" +E2E_PATH = "/v1/chat/completions" + +API_KEY = os.environ.get("CHUTES_API_KEY") +if not API_KEY: + logger.error("CHUTES_API_KEY env var is required") + sys.exit(1) + +AUTH_HEADERS = {"Authorization": f"Bearer {API_KEY}"} +MLKEM_CT_SIZE = 1088 +TAG_SIZE = 16 + + +def derive_key(shared_secret: bytes, mlkem_ct: bytes, info: bytes) -> bytes: + return HKDF(algorithm=hashes.SHA256(), length=32, salt=mlkem_ct[:16], info=info).derive( + shared_secret + ) + + +def chacha_encrypt(key: bytes, nonce: bytes, plaintext: bytes) -> tuple[bytes, bytes]: + ct_tag = ChaCha20Poly1305(key).encrypt(nonce, plaintext, None) + return ct_tag[:-TAG_SIZE], ct_tag[-TAG_SIZE:] + + +def chacha_decrypt(key: bytes, nonce: bytes, ciphertext: bytes, tag: bytes) -> bytes: + return ChaCha20Poly1305(key).decrypt(nonce, ciphertext + tag, None) + + +def discover_instances() -> dict: + url = f"{API_BASE}/e2e/instances/{CHUTE_ID}" + resp = httpx.get(url, headers=AUTH_HEADERS, timeout=30) + resp.raise_for_status() + data = resp.json() + logger.info( + f"Discovered {len(data['instances'])} instance(s), nonces expire in {data['nonce_expires_in']}s" + ) + return data + + +def build_e2e_blob(e2e_pubkey_b64: str, payload: dict, response_pk: bytes) -> bytes: + e2e_pubkey = base64.b64decode(e2e_pubkey_b64) + mlkem_ct, shared_secret = encrypt(e2e_pubkey) + sym_key = derive_key(shared_secret, mlkem_ct, b"e2e-req-v1") + logger.info("ML-KEM encapsulated, HKDF derived request key (info=e2e-req-v1)") + + payload["e2e_response_pk"] = base64.b64encode(response_pk).decode() + compressed = gzip.compress(json.dumps(payload).encode()) + + nonce = os.urandom(12) + ciphertext, tag = chacha_encrypt(sym_key, nonce, compressed) + blob = mlkem_ct + nonce + ciphertext + tag + logger.info( + f"E2E blob: {len(blob)} bytes (ct={len(mlkem_ct)}, nonce=12, encrypted={len(ciphertext)}, tag=16)" + ) + return blob + + +def decrypt_response_blob(response_blob: bytes, response_sk: bytes) -> dict: + mlkem_ct = response_blob[:MLKEM_CT_SIZE] + nonce = response_blob[MLKEM_CT_SIZE : MLKEM_CT_SIZE + 12] + ciphertext = response_blob[MLKEM_CT_SIZE + 12 : -TAG_SIZE] + tag = response_blob[-TAG_SIZE:] + + shared_secret = decrypt(response_sk, mlkem_ct) + sym_key = derive_key(shared_secret, mlkem_ct, b"e2e-resp-v1") + logger.info("ML-KEM decapsulated, HKDF derived response key (info=e2e-resp-v1)") + + plaintext = gzip.decompress(chacha_decrypt(sym_key, nonce, ciphertext, tag)) + return json.loads(plaintext) + + +def decrypt_stream_init(response_sk: bytes, mlkem_ct: bytes) -> bytes: + shared_secret = decrypt(response_sk, mlkem_ct) + stream_key = derive_key(shared_secret, mlkem_ct, b"e2e-stream-v1") + logger.info("Stream key exchange complete (info=e2e-stream-v1)") + return stream_key + + +def decrypt_stream_chunk(enc_chunk_b64: str, stream_key: bytes) -> str: + raw = base64.b64decode(enc_chunk_b64) + return chacha_decrypt(stream_key, raw[:12], raw[12:-TAG_SIZE], raw[-TAG_SIZE:]).decode() + + +def invoke_headers(instance_id: str, nonce: str, stream: bool) -> dict: + return { + **AUTH_HEADERS, + "X-Chute-Id": CHUTE_ID, + "X-Instance-Id": instance_id, + "X-E2E-Nonce": nonce, + "X-E2E-Stream": str(stream).lower(), + "X-E2E-Path": E2E_PATH, + "Content-Type": "application/octet-stream", + } + + +def test_non_streaming(): + logger.info("--- Non-streaming E2E chat completion ---") + data = discover_instances() + instance = data["instances"][0] + + response_pk, response_sk = generate_keypair() + payload = { + "model": MODEL, + "messages": [{"role": "user", "content": "Say 'hello world' and nothing else."}], + } + e2e_blob = build_e2e_blob(instance["e2e_pubkey"], payload, response_pk) + + resp = httpx.post( + f"{API_BASE}/e2e/invoke", + headers=invoke_headers(instance["instance_id"], instance["nonces"][0], stream=False), + content=e2e_blob, + timeout=60, + ) + if resp.status_code != 200: + logger.error(f"Request failed: {resp.status_code} {resp.text[:500]}") + return False + + result = decrypt_response_blob(resp.content, response_sk) + logger.success(f"Response:\n{json.dumps(result, indent=2)}") + return True + + +def test_streaming(): + logger.info("--- Streaming E2E chat completion ---") + data = discover_instances() + instance = data["instances"][0] + + response_pk, response_sk = generate_keypair() + payload = { + "model": MODEL, + "messages": [{"role": "user", "content": "Count from 1 to 5, one number per line."}], + "stream": True, + } + e2e_blob = build_e2e_blob(instance["e2e_pubkey"], payload, response_pk) + + stream_key = None + chunks = 0 + content = "" + + with httpx.stream( + "POST", + f"{API_BASE}/e2e/invoke", + headers=invoke_headers(instance["instance_id"], instance["nonces"][0], stream=True), + content=e2e_blob, + timeout=60, + ) as resp: + if resp.status_code != 200: + logger.error(f"Stream failed: {resp.status_code} {resp.read()[:500]}") + return False + + for line in resp.iter_lines(): + if not line.startswith("data: "): + continue + raw = line[6:].strip() + if raw == "[DONE]": + break + + try: + event = json.loads(raw) + except json.JSONDecodeError: + continue + + if "e2e_init" in event: + mlkem_ct = base64.b64decode(event["e2e_init"]) + stream_key = decrypt_stream_init(response_sk, mlkem_ct) + + elif "e2e" in event: + if stream_key is None: + logger.error("Received e2e chunk before e2e_init") + return False + chunk_text = decrypt_stream_chunk(event["e2e"], stream_key) + chunks += 1 + try: + chunk_data = json.loads(chunk_text) + for choice in chunk_data.get("choices", []): + c = choice.get("delta", {}).get("content", "") + if c: + content += c + print(c, end="", flush=True) + except json.JSONDecodeError: + content += chunk_text + print(chunk_text, end="", flush=True) + + elif "usage" in event: + logger.info(f"Usage: {event['usage']}") + + elif "e2e_error" in event: + logger.error(f"E2E error: {event['e2e_error']}") + return False + + print() + logger.success(f"Streamed {chunks} chunks: '{content.strip()}'") + return True + + +def main(): + logger.remove() + logger.add( + sys.stderr, + level="INFO", + format="{time:HH:mm:ss} | {level:<8} | {message}", + ) + + logger.info(f"E2E Client Test — {API_BASE} — {MODEL}") + + results = {} + for name, test in [("non_streaming", test_non_streaming), ("streaming", test_streaming)]: + try: + results[name] = test() + except Exception: + logger.exception(f"{name} test failed") + results[name] = False + + logger.info( + "Results: " + ", ".join(f"{k}={'PASS' if v else 'FAIL'}" for k, v in results.items()) + ) + sys.exit(0 if all(results.values()) else 1) + + +if __name__ == "__main__": + main() From 0ad0d3ec3e836aae47cec3a2e3e3738d63bd40ff Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Sun, 22 Feb 2026 12:30:18 -0500 Subject: [PATCH 54/58] handle new format for llm details --- api/chute/util.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/api/chute/util.py b/api/chute/util.py index 66494048..b654e72e 100644 --- a/api/chute/util.py +++ b/api/chute/util.py @@ -1936,26 +1936,34 @@ async def load_llm_details(chute, target): path, _ = await asyncio.to_thread( encrypt_instance_request, "/get_models".ljust(24, "?"), target, True ) - payload = { - "args": base64.b64encode(gzip.compress(pickle.dumps(tuple()))).decode(), - "kwargs": base64.b64encode(gzip.compress(pickle.dumps({}))).decode(), - } - payload, iv = await asyncio.to_thread(encrypt_instance_request, json.dumps(payload), target) + use_new_format = semcomp(target.chutes_version or "0.0.0", "0.5.5") >= 0 + if use_new_format: + payload_bytes = gzip.compress(json.dumps({})) + else: + payload_bytes = json.dumps( + { + "args": base64.b64encode(gzip.compress(pickle.dumps(tuple()))).decode(), + "kwargs": base64.b64encode(gzip.compress(pickle.dumps({}))).decode(), + } + ) + payload, iv = await asyncio.to_thread(encrypt_instance_request, payload_bytes, target) session, pooled = await get_miner_session(target, timeout=60) llm_timeout = httpx.Timeout(connect=10.0, read=60.0, write=30.0, pool=10.0) try: headers, payload_string = sign_request(miner_ss58=target.miner_hotkey, payload=payload) - headers["X-Chutes-Serialized"] = "true" + if not use_new_format: + headers["X-Chutes-Serialized"] = "true" resp = await session.post( f"/{path}", content=payload_string, headers=headers, timeout=llm_timeout ) resp.raise_for_status() raw_data = resp.json() logger.info(f"{target.chute_id=} {target.instance_id=} {target.miner_hotkey=}: {raw_data=}") - info = json.loads( - await asyncio.to_thread(decrypt_instance_response, raw_data["json"], target, iv) - ) + plaintext = await asyncio.to_thread(decrypt_instance_response, raw_data["json"], target, iv) + if use_new_format: + plaintext = gzip.decompress(plaintext) + info = json.loads(plaintext) return info["data"][0] finally: if not pooled: From f14509b2ebeb1fbeb258800ca246cb6a8d16b21c Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Mon, 23 Feb 2026 12:09:44 +0000 Subject: [PATCH 55/58] chutes lib update --- pyproject.toml | 2 +- uv.lock | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d4da6332..f1bdf83c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,7 @@ dependencies = [ "python-slugify[unidecode]>=8.0.4,<9.0.0", "async-lru>=2.0.5,<3.0.0", "aiodns>=3.6.0,<4.0.0", - "chutes==0.5.5rc39", + "chutes==0.5.5rc45", "python-socketio[asyncio-client]>=5.15.0,<6.0.0", "pillow>=12.0.0,<13.0.0", "aioboto3==15.5.0", diff --git a/uv.lock b/uv.lock index 0aa7db22..a40d629f 100644 --- a/uv.lock +++ b/uv.lock @@ -749,7 +749,7 @@ wheels = [ [[package]] name = "chutes" -version = "0.5.5rc39" +version = "0.5.5rc45" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "aiofiles" }, @@ -778,7 +778,7 @@ dependencies = [ { name = "uvicorn" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/fb/00/4c178f2a623359ae57c54f7a7a7838b131abcdf7fe72f7612103c093a89e/chutes-0.5.5rc39-py3-none-any.whl", hash = "sha256:d74bd6b06d8100e928d437812f276187ba53724d2f89b0e59550dbcf0e6b345a", size = 8956649, upload-time = "2026-02-21T18:56:17.327Z" }, + { url = "https://files.pythonhosted.org/packages/d1/48/fb0a4bdf35989a0c6bd0dece3d637c3203ffb5a820feca63a8cc4637e629/chutes-0.5.5rc45-py3-none-any.whl", hash = "sha256:f78c617a0270b82c1a12b58d3ed847d872f1e04cbf2750a58989632587916cd9", size = 8962379, upload-time = "2026-02-23T12:06:29.39Z" }, ] [[package]] @@ -855,7 +855,7 @@ requires-dist = [ { name = "backoff", specifier = ">=2.2.1,<3.0.0" }, { name = "bittensor-drand", specifier = ">=1.2.0" }, { name = "bittensor-wallet", specifier = ">=4.0.1" }, - { name = "chutes", specifier = "==0.5.5rc39" }, + { name = "chutes", specifier = "==0.5.5rc45" }, { name = "dcap-qvl", specifier = "==0.3.12" }, { name = "dnslib", specifier = ">=0.9.26,<0.10.0" }, { name = "fastapi", specifier = ">=0.124.0,<0.125.0" }, From 0c693d782eb2319ed0d3af6c2376c1a2e355f5e3 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Mon, 23 Feb 2026 12:24:48 +0000 Subject: [PATCH 56/58] Fix tls command check. --- watchtower.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/watchtower.py b/watchtower.py index 09631c62..8e45927c 100644 --- a/watchtower.py +++ b/watchtower.py @@ -593,13 +593,6 @@ def get_expected_command(chute, miner_hotkey: str, seed: int = None, tls: bool = "--validator-ss58", settings.validator_ss58, ] - if tls: - parts += [ - "--keyfile", - "/app/.chutetls/key.pem", - "--certfile", - "/app/.chutetls/cert.pem", - ] return " ".join(parts).strip() # Legacy format. From 33ff6c4883c7cbcec2515ee6737ca68376ea5476 Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Mon, 23 Feb 2026 12:59:31 -0500 Subject: [PATCH 57/58] lock modules flag --- api/chute/response.py | 1 + api/chute/router.py | 11 +++++++++++ api/chute/schemas.py | 2 ++ api/instance/router.py | 9 +++++++-- api/instance/schemas.py | 1 + api/instance/util.py | 6 +++++- 6 files changed, 27 insertions(+), 3 deletions(-) diff --git a/api/chute/response.py b/api/chute/response.py index 52f82af1..c6614acc 100644 --- a/api/chute/response.py +++ b/api/chute/response.py @@ -61,6 +61,7 @@ class ChuteResponse(BaseModel): preemptible: bool allow_external_egress: Optional[bool] = True tee: Optional[bool] = False + lock_modules: Optional[bool] = None effective_compute_multiplier: Optional[float] = None compute_multiplier_factors: Optional[Dict[str, float]] = None bounty: Optional[int] = None diff --git a/api/chute/router.py b/api/chute/router.py index 3ed2252b..3a1e44db 100644 --- a/api/chute/router.py +++ b/api/chute/router.py @@ -1098,6 +1098,14 @@ async def _deploy_chute( if "affine" in chute_args.name.lower() or "turbovision" in chute_args.name.lower(): allow_egress = False + # Module locking: standard templates are always locked, otherwise default False. + if chute_args.standard_template: + lock_modules = True + elif chute_args.lock_modules is not None: + lock_modules = chute_args.lock_modules + else: + lock_modules = False + # Cache encryption, currently not fully function so disabled. if chute_args.encrypted_fs is None: chute_args.encrypted_fs = False @@ -1326,6 +1334,7 @@ async def _deploy_chute( ) chute.allow_external_egress = allow_egress chute.tee = chute_args.tee + chute.lock_modules = lock_modules chute.encrypted_fs = chute.encrypted_fs and chute_args.encrypted_fs # XX prevent changing else: try: @@ -1371,6 +1380,7 @@ async def _deploy_chute( allow_external_egress=allow_egress, encrypted_fs=chute_args.encrypted_fs, tee=chute_args.tee, + lock_modules=lock_modules, ) except ValueError as exc: raise HTTPException( @@ -2029,6 +2039,7 @@ async def teeify_chute( allow_external_egress=chute.allow_external_egress, encrypted_fs=chute.encrypted_fs, tee=True, + lock_modules=chute.lock_modules if chute.lock_modules is not None else False, immutable=True, ) except ValueError as exc: diff --git a/api/chute/schemas.py b/api/chute/schemas.py index 191929ff..d29faf37 100644 --- a/api/chute/schemas.py +++ b/api/chute/schemas.py @@ -209,6 +209,7 @@ class ChuteArgs(BaseModel): allow_external_egress: Optional[bool] = Field(default=False) encrypted_fs: Optional[bool] = Field(default=False) tee: Optional[bool] = Field(default=False) + lock_modules: Optional[bool] = Field(default=None) class InvocationArgs(BaseModel): @@ -250,6 +251,7 @@ class Chute(Base): allow_external_egress = Column(Boolean, default=False) encrypted_fs = Column(Boolean, default=False) tee = Column(Boolean, default=False) + lock_modules = Column(Boolean, nullable=True, default=None) immutable = Column(Boolean, default=False) disabled = Column(Boolean, default=False) diff --git a/api/instance/router.py b/api/instance/router.py index b24d4cfd..181c7b2e 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -1051,7 +1051,7 @@ async def _validate_launch_config_instance( f"{log_prefix} has tampered with netnanny? {args.netnanny_hash=} {args.egress=} {chute.allow_external_egress=}" ) launch_config.failed_at = func.now() - launch_config.verification_error = "Failed netnanny validation." + launch_config.verification_error = "Failed aegis validation." await db.commit() raise HTTPException( status_code=status.HTTP_403_FORBIDDEN, @@ -1599,7 +1599,12 @@ async def get_launch_config( token = None if semcomp(chute.chutes_version or "0.0.0", "0.3.61") >= 0: token = create_launch_jwt_v2( - launch_config, egress=chute.allow_external_egress, disk_gb=disk_gb + launch_config, + egress=chute.allow_external_egress, + lock_modules=True + if chute.standard_template + else (chute.lock_modules if chute.lock_modules is not None else False), + disk_gb=disk_gb, ) else: token = create_launch_jwt(launch_config, disk_gb=disk_gb) diff --git a/api/instance/schemas.py b/api/instance/schemas.py index 34d0c04f..b9dab724 100644 --- a/api/instance/schemas.py +++ b/api/instance/schemas.py @@ -57,6 +57,7 @@ class LaunchConfigArgs(BaseModel): port_mappings: list[PortMap] fsv: Optional[str] = None egress: Optional[bool] = None + lock_modules: Optional[bool] = None netnanny_hash: Optional[str] = None run_path: Optional[str] = None py_dirs: Optional[list[str]] = None diff --git a/api/instance/util.py b/api/instance/util.py index 8afbd040..a560db4c 100644 --- a/api/instance/util.py +++ b/api/instance/util.py @@ -749,7 +749,10 @@ def _decode_chutes_jwt(token: str, *, require_exp: bool) -> dict: def create_launch_jwt_v2( - launch_config: LaunchConfig, disk_gb: int = None, egress: bool = False + launch_config: LaunchConfig, + disk_gb: int = None, + egress: bool = False, + lock_modules: bool = False, ) -> str: now = datetime.now(timezone.utc) expires_at = now + timedelta(hours=2) @@ -763,6 +766,7 @@ def create_launch_jwt_v2( "env_key": launch_config.env_key, "iss": "chutes", "egress": egress, + "lock_modules": lock_modules, "env_type": env_type, } if launch_config.job_id: From 9ca46e0636fa7cb10cb5151bdf997fde37e1947a Mon Sep 17 00:00:00 2001 From: Jon Durbin Date: Mon, 23 Feb 2026 13:12:08 -0500 Subject: [PATCH 58/58] inspecto logging on failure with >= 0.5.5 --- api/instance/router.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/api/instance/router.py b/api/instance/router.py index 181c7b2e..e5e810d4 100644 --- a/api/instance/router.py +++ b/api/instance/router.py @@ -839,10 +839,11 @@ async def _validate_launch_config_inspecto( detail=launch_config.verification_error, ) - enforce_inspecto = "PS_OP" in os.environ and semcomp(chute.chutes_version, "0.5.5") < 0 + check_inspecto = "PS_OP" in os.environ + enforce_inspecto = check_inspecto and semcomp(chute.chutes_version, "0.5.5") < 0 inspecto_valid = True fail_reason = None - if enforce_inspecto: + if check_inspecto: inspecto_hash = await get_inspecto_hash(chute.image_id) if not inspecto_hash: logger.info(f"INSPECTO: image_id={chute.image_id} has no inspecto hash; allowing.") @@ -877,14 +878,19 @@ async def _validate_launch_config_inspecto( inspecto_valid = False fail_reason = f"inspecto verification failed: {payload}" if not inspecto_valid: - logger.error(f"{log_prefix} has invalid inspecto verification: {fail_reason}") - launch_config.failed_at = func.now() - launch_config.verification_error = "Failed inspecto environment/lib verification." - await db.commit() - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail=launch_config.verification_error, - ) + if enforce_inspecto: + logger.error(f"{log_prefix} has invalid inspecto verification: {fail_reason}") + launch_config.failed_at = func.now() + launch_config.verification_error = "Failed inspecto environment/lib verification." + await db.commit() + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail=launch_config.verification_error, + ) + else: + logger.warning( + f"{log_prefix} inspecto mismatch (not enforced, chutes_version={chute.chutes_version}): {fail_reason}" + ) async def _validate_launch_config_filesystem(