From 19f6863fec246e53167e592e264ff640a44db4d9 Mon Sep 17 00:00:00 2001 From: Mark Hartmann Date: Mon, 5 May 2025 14:36:57 +0200 Subject: [PATCH] handle chunked requests (llama-swap compatibility) --- mlx_lm/server.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/mlx_lm/server.py b/mlx_lm/server.py index f74bdf6b..7388885d 100644 --- a/mlx_lm/server.py +++ b/mlx_lm/server.py @@ -293,8 +293,22 @@ def do_POST(self): return # Fetch and parse request body - content_length = int(self.headers["Content-Length"]) - raw_body = self.rfile.read(content_length) + content_length = self.headers.get("Content-Length") + if content_length: + raw_body = self.rfile.read(int(content_length)) + else: + chunks = [] + while True: + line = self.rfile.readline() + if not line: + break + size = int(line.strip(), 16) + if size == 0: + self.rfile.readline() # Trailer CRLF + break + chunks.append(self.rfile.read(size)) + self.rfile.read(2) # CR+LF + raw_body = b"".join(chunks) self.body = json.loads(raw_body.decode()) indent = "\t" # Backslashes can't be inside of f-strings logging.debug(f"Incoming Request Body: {json.dumps(self.body, indent=indent)}")