File tree Expand file tree Collapse file tree 2 files changed +9
-5
lines changed Expand file tree Collapse file tree 2 files changed +9
-5
lines changed Original file line number Diff line number Diff line change @@ -857,8 +857,7 @@ def post_process_pooling(
857857 )
858858
859859 if not skip_save_output :
860- if envs .FD_USE_GET_SAVE_OUTPUT_V1 :
861- if save_each_rank or model_output .mp_rank == 0 :
862- output = _build_stream_transfer_data (output_tokens = None , pooler_outputs = pooler_output .outputs )
860+ if save_each_rank or model_output .mp_rank == 0 :
861+ output = _build_stream_transfer_data (output_tokens = None , pooler_outputs = pooler_output .outputs )
863862
864- async_output_queue .put (output )
863+ async_output_queue .put (output )
Original file line number Diff line number Diff line change @@ -940,7 +940,7 @@ def get_supported_pooling_tasks(self) -> list[PoolingTask]:
940940 if self .cache_config .enable_chunked_prefill and "encode" in supported_tasks :
941941 supported_tasks .remove ("encode" )
942942
943- logger .warning (
943+ logger .debug (
944944 "Chunked prefill is not supported with "
945945 "encode task which using ALL pooling. "
946946 "Please turn off chunked prefill by export=FD_DISABLE_CHUNKED_PREFILL=1 before using it."
@@ -1537,6 +1537,11 @@ def _dummy_pooler_run_task(
15371537
15381538 req_num_tokens = num_tokens // num_reqs
15391539
1540+ print ("num_tokens" , num_tokens )
1541+ print ("max_num_seqs" , max_num_seqs )
1542+ print ("num_reqs" , num_reqs )
1543+ print ("min_tokens_per_req" , min_tokens_per_req )
1544+ print ("num_scheduled_token_list" , num_scheduled_tokens_list )
15401545 dummy_prompt_lens = paddle .to_tensor (num_scheduled_tokens_list , dtype = "int64" )
15411546 dummy_token_ids = paddle .zeros (
15421547 [num_reqs , req_num_tokens ],
You can’t perform that action at this time.
0 commit comments