Skip to content

Commit ad2f7b6

Browse files
committed
fix test_return_token_ids.py and update enable_thinking
1 parent 58616e4 commit ad2f7b6

File tree

2 files changed

+3
-33
lines changed

2 files changed

+3
-33
lines changed

fastdeploy/worker/gpu_model_runner.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,9 @@ def _init_share_inputs(self, max_num_seqs: int):
962962
self.share_inputs["kv_num_blocks_x_cpu"] = None # CPU
963963

964964
# Initialize thinking related buffers
965+
self.share_inputs["enable_thinking"] = paddle.full(shape=[max_num_seqs, 1], fill_value=False, dtype="bool")
966+
self.share_inputs["need_think_end"] = paddle.full(shape=[max_num_seqs, 1], fill_value=0, dtype="int32")
967+
self.share_inputs["reasoning_index"] = paddle.full(shape=[max_num_seqs, 1], fill_value=0, dtype="int32")
965968
self.share_inputs["max_think_lens"] = paddle.full(shape=[max_num_seqs, 1], fill_value=-1, dtype="int32")
966969
self.share_inputs["limit_think_status"] = paddle.full(shape=[max_num_seqs, 1], fill_value=0, dtype="int32")
967970

tests/pooling/test_Qwen3-Embedding_serving.py

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -273,36 +273,3 @@ def test_single_text_embedding(embedding_api_url, headers):
273273
else:
274274
print(f"Comparing with baseline: {baseline_file}")
275275
check_embedding_against_baseline(embedding, baseline_file, threshold=0.01)
276-
277-
278-
def test_batch_embeddings(embedding_api_url, headers):
279-
"""Test embedding generation for batch inputs."""
280-
payload = {
281-
"input": [
282-
"北京天安门在哪里?",
283-
],
284-
"model": "Qwen3-Embedding-0.6B",
285-
}
286-
287-
resp = requests.post(embedding_api_url, headers=headers, json=payload)
288-
assert resp.status_code == 200, f"Unexpected status code: {resp.status_code}"
289-
290-
result = resp.json()
291-
assert "data" in result, "Response missing 'data' field"
292-
assert len(result["data"]) == 1, "Expected three embedding results"
293-
294-
base_path = os.getenv("MODEL_PATH", "")
295-
296-
for idx, item in enumerate(result["data"]):
297-
embedding = item["embedding"]
298-
299-
baseline_filename = f"Qwen3-Embedding-0.6B-batch-{idx}-baseline.json"
300-
if base_path:
301-
baseline_file = os.path.join(base_path, baseline_filename)
302-
else:
303-
baseline_file = baseline_filename
304-
305-
if not os.path.exists(baseline_file):
306-
save_embedding_baseline(embedding, baseline_file)
307-
else:
308-
check_embedding_against_baseline(embedding, baseline_file, threshold=0.01)

0 commit comments

Comments
 (0)