@@ -60,13 +60,12 @@ def hash_combine(seed, token_id):
6060
6161
6262class PrefixCachePlugin (Plugin ):
63- def __init__ (self , generator_backend , kvcache_settings , infer_context , output_filter , plugin_data_param , ** kwargs ):
63+ def __init__ (self , generator_backend , kvcache_settings , infer_context , plugin_data_param , ** kwargs ):
6464 super ().__init__ ()
6565 self .generator_backend = generator_backend
6666 self .model_wrapper = self .generator_backend .model_wrapper
6767 self .kvcache_settings = kvcache_settings
6868 self .infer_context = infer_context
69- self .output_filter = output_filter
7069 self .plugin_data_param = plugin_data_param
7170 self .model_name = self .generator_backend .model_name
7271 self .sp_size = self .infer_context .spcp_parallel_info .sp_size
@@ -180,10 +179,10 @@ def model_inputs_update(self, model_inputs, input_metadata, sampling_metadata, c
180179 f'#batchsize: { batch_size } , '
181180 f'#batched-tokens: { input_metadata .total_seq_num } , '
182181 f'#local cached-tokens: { local_matched_token_num } , '
183- f'#local cache hit rate: { round (local_cache_hit_rate , 3 )} %, '
182+ f'#local cached hit rate: { round (local_cache_hit_rate , 3 )} %, '
184183 f'#remote cached-tokens: { remote_matched_token_num } , '
185- f'#remote cache hit rate: { round (remote_cache_hit_rate , 3 )} %, '
186- f'#cache hit rate: { round (local_cache_hit_rate + remote_cache_hit_rate , 3 )} %' )
184+ f'#remote cached hit rate: { round (remote_cache_hit_rate , 3 )} %, '
185+ f'#cached hit rate: { round (local_cache_hit_rate + remote_cache_hit_rate , 3 )} %' )
187186 print_log (self .rank , logger .info , f'Prefix Cache Global Reporter: '
188187 f'#total prefill tokens: { self .total_token_num } , '
189188 f'#total local matched tokens: { self .total_local_matched_token_num } , '
0 commit comments