Skip to content

Commit 6bc1ed2

Browse files
committed
delete FD_DISABLE_CHUNKED_PREFILL and FD_USE_GET_SAVE_OUTPUT_V1
1 parent 30795d2 commit 6bc1ed2

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

fastdeploy/model_executor/pre_and_post_process.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -857,8 +857,7 @@ def post_process_pooling(
857857
)
858858

859859
if not skip_save_output:
860-
if envs.FD_USE_GET_SAVE_OUTPUT_V1:
861-
if save_each_rank or model_output.mp_rank == 0:
862-
output = _build_stream_transfer_data(output_tokens=None, pooler_outputs=pooler_output.outputs)
860+
if save_each_rank or model_output.mp_rank == 0:
861+
output = _build_stream_transfer_data(output_tokens=None, pooler_outputs=pooler_output.outputs)
863862

864-
async_output_queue.put(output)
863+
async_output_queue.put(output)

fastdeploy/worker/gpu_model_runner.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ def get_supported_pooling_tasks(self) -> list[PoolingTask]:
940940
if self.cache_config.enable_chunked_prefill and "encode" in supported_tasks:
941941
supported_tasks.remove("encode")
942942

943-
logger.warning(
943+
logger.debug(
944944
"Chunked prefill is not supported with "
945945
"encode task which using ALL pooling. "
946946
"Please turn off chunked prefill by export=FD_DISABLE_CHUNKED_PREFILL=1 before using it."
@@ -1537,6 +1537,11 @@ def _dummy_pooler_run_task(
15371537

15381538
req_num_tokens = num_tokens // num_reqs
15391539

1540+
print("num_tokens", num_tokens)
1541+
print("max_num_seqs", max_num_seqs)
1542+
print("num_reqs", num_reqs)
1543+
print("min_tokens_per_req", min_tokens_per_req)
1544+
print("num_scheduled_token_list", num_scheduled_tokens_list)
15401545
dummy_prompt_lens = paddle.to_tensor(num_scheduled_tokens_list, dtype="int64")
15411546
dummy_token_ids = paddle.zeros(
15421547
[num_reqs, req_num_tokens],

0 commit comments

Comments
 (0)