-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Description
Search before asking
- I had searched in the issues and found no similar issues.
Version
branch 3.0
What's Wrong?
If a cluster or workload group contains a small number of ETL tasks occupying hundreds of gigabytes of memory and a large number of small queries consuming only hundreds of kilobytes of memory each, when the ETL tasks cause memory exhaustion in the Backend (BE), the small queries may be killed first before the large-memory ETL tasks are terminated.
The reason is that when releasing the top-N memory-consuming SQL queries, no protection was implemented for small SQL queries. This resulted in the termination of small SQL queries that contributed little to memory release, significantly impacting business experience.
What You Expected?
To prevent this issue, we should adopt an approach similar to killing memory-overcommitted SQL queries—only terminate those SQL operations whose memory usage exceeds a predefined threshold.
in MemTrackerLimiter::free_top_overcommit_query,32MB small query does not cancel
doris/be/src/runtime/memory/mem_tracker_limiter.cpp
Lines 555 to 585 in f8bd969
{ | |
SCOPED_TIMER(find_cost_time); | |
for (unsigned i = 1; i < tracker_groups.size(); ++i) { | |
if (seek_num > GC_MAX_SEEK_TRACKER) { | |
break; | |
} | |
std::lock_guard<std::mutex> l(tracker_groups[i].group_lock); | |
for (auto trackerWptr : tracker_groups[i].trackers) { | |
auto tracker = trackerWptr.lock(); | |
if (tracker != nullptr && tracker->type() == type) { | |
seek_num++; | |
// 32M small query does not cancel | |
if (tracker->consumption() <= 33554432 || | |
tracker->consumption() < tracker->limit()) { | |
small_num++; | |
continue; | |
} | |
if (tracker->is_query_cancelled()) { | |
canceling_task.push_back(fmt::format("{}:{} Bytes", tracker->label(), | |
tracker->consumption())); | |
continue; | |
} | |
auto overcommit_ratio = int64_t( | |
(static_cast<double>(tracker->consumption()) / tracker->limit()) * | |
10000); | |
max_pq.emplace(overcommit_ratio, tracker->label()); | |
query_consumption[tracker->label()] = tracker->consumption(); | |
} | |
} | |
} | |
} |
but in MemTrackerLimiter::free_top_memory_query,queries small than 32MB have not been protected
doris/be/src/runtime/memory/mem_tracker_limiter.cpp
Lines 437 to 476 in f8bd969
{ | |
SCOPED_TIMER(find_cost_time); | |
for (unsigned i = 1; i < tracker_groups.size(); ++i) { | |
if (seek_num > GC_MAX_SEEK_TRACKER) { | |
break; | |
} | |
std::lock_guard<std::mutex> l(tracker_groups[i].group_lock); | |
for (auto trackerWptr : tracker_groups[i].trackers) { | |
auto tracker = trackerWptr.lock(); | |
if (tracker != nullptr && tracker->type() == type) { | |
seek_num++; | |
if (tracker->is_query_cancelled()) { | |
canceling_task.push_back(fmt::format("{}:{} Bytes", tracker->label(), | |
tracker->consumption())); | |
continue; | |
} | |
if (tracker->consumption() > min_free_mem) { | |
min_pq = MemTrackerMinQueue(); | |
min_pq.emplace(tracker->consumption(), tracker->label()); | |
prepare_free_mem = tracker->consumption(); | |
break; | |
} else if (tracker->consumption() + prepare_free_mem < min_free_mem) { | |
min_pq.emplace(tracker->consumption(), tracker->label()); | |
prepare_free_mem += tracker->consumption(); | |
} else if (!min_pq.empty() && tracker->consumption() > min_pq.top().first) { | |
min_pq.emplace(tracker->consumption(), tracker->label()); | |
prepare_free_mem += tracker->consumption(); | |
while (prepare_free_mem - min_pq.top().first > min_free_mem) { | |
prepare_free_mem -= min_pq.top().first; | |
min_pq.pop(); | |
} | |
} | |
} | |
} | |
if (prepare_free_mem > min_free_mem && min_pq.size() == 1) { | |
// Found a big task, short circuit seek. | |
break; | |
} | |
} | |
} |
How to Reproduce?
No response
Anything Else?
No response
Are you willing to submit PR?
- Yes I am willing to submit a PR!
Code of Conduct
- I agree to follow this project's Code of Conduct