Skip to content

Commit

Permalink
minor change on scheduler for hierarchical caching
Browse files Browse the repository at this point in the history
  • Loading branch information
xiezhq-hermann committed Dec 19, 2024
1 parent df95d8c commit 4fd25f3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
6 changes: 5 additions & 1 deletion python/sglang/srt/managers/schedule_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,12 @@ def calc_priority(self, waiting_queue: List[Req]):
for r in waiting_queue:
# NOTE: the prefix_indices must always be aligned with last_node
r.prefix_indices, r.last_node = self.tree_cache.match_prefix(
rid=r.rid, key=r.adjust_max_prefix_ids(), load_cache=True
rid=r.rid, key=r.adjust_max_prefix_ids()
)
# to prevent evicting nodes referenced by other requests in waiting queue
self.tree_cache.inc_lock_ref(r.last_node)
for r in waiting_queue:
self.tree_cache.dec_lock_ref(r.last_node)

prefix_computed = True

Expand Down
5 changes: 4 additions & 1 deletion python/sglang/srt/managers/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,7 @@ def check_memory(self):
available_size = (
self.token_to_kv_pool.available_size() + self.tree_cache.evictable_size()
)
# todo, locked memory for hierarchical cache is not leaked
if available_size != self.max_total_num_tokens:
msg = (
"KV cache pool leak detected!"
Expand Down Expand Up @@ -833,7 +834,9 @@ def get_new_batch_prefill(self) -> Optional[ScheduleBatch]:
res = adder.add_one_req(req)
if res != AddReqResult.CONTINUE:
if res == AddReqResult.NO_TOKEN:
self.batch_is_full = True
# todo: to be aware of the locked memory for hierarchical cache
# self.batch_is_full = True
pass
break

# Update waiting queue
Expand Down

0 comments on commit 4fd25f3

Please sign in to comment.