Skip to content

Commit d2ef8fe

Browse files
authored
[fix] Update patch to track mindie changes (#863)
## Purpose Update patch to track mindie changes ## Modifications ## Test
1 parent a82a043 commit d2ef8fe

File tree

2 files changed

+4
-6
lines changed

2 files changed

+4
-6
lines changed

docs/source/index.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ Paper list:
4949
getting-started/quickstart_vllm
5050
getting-started/quickstart_vllm_ascend
5151
getting-started/quickstart_sglang
52-
getting-started/quickstart_mindie_llm
5352
getting-started/kv_cache_calculator
5453
:::
5554

ucm/integration/mindie/patch/prefix_cache_plugin.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ def hash_combine(seed, token_id):
6060

6161

6262
class PrefixCachePlugin(Plugin):
63-
def __init__(self, generator_backend, kvcache_settings, infer_context, output_filter, plugin_data_param, **kwargs):
63+
def __init__(self, generator_backend, kvcache_settings, infer_context, plugin_data_param, **kwargs):
6464
super().__init__()
6565
self.generator_backend = generator_backend
6666
self.model_wrapper = self.generator_backend.model_wrapper
6767
self.kvcache_settings = kvcache_settings
6868
self.infer_context = infer_context
69-
self.output_filter = output_filter
7069
self.plugin_data_param = plugin_data_param
7170
self.model_name = self.generator_backend.model_name
7271
self.sp_size = self.infer_context.spcp_parallel_info.sp_size
@@ -180,10 +179,10 @@ def model_inputs_update(self, model_inputs, input_metadata, sampling_metadata, c
180179
f'#batchsize: {batch_size}, '
181180
f'#batched-tokens: {input_metadata.total_seq_num}, '
182181
f'#local cached-tokens: {local_matched_token_num}, '
183-
f'#local cache hit rate: {round(local_cache_hit_rate, 3)}%, '
182+
f'#local cached hit rate: {round(local_cache_hit_rate, 3)}%, '
184183
f'#remote cached-tokens: {remote_matched_token_num}, '
185-
f'#remote cache hit rate: {round(remote_cache_hit_rate, 3)}%, '
186-
f'#cache hit rate: {round(local_cache_hit_rate + remote_cache_hit_rate, 3)}%')
184+
f'#remote cached hit rate: {round(remote_cache_hit_rate, 3)}%, '
185+
f'#cached hit rate: {round(local_cache_hit_rate + remote_cache_hit_rate, 3)}%')
187186
print_log(self.rank, logger.info, f'Prefix Cache Global Reporter: '
188187
f'#total prefill tokens: {self.total_token_num}, '
189188
f'#total local matched tokens: {self.total_local_matched_token_num}, '

0 commit comments

Comments
 (0)