Skip to content

Commit f8bd969

Browse files
authored
branch-3.0: [fix](memory) Fix metadata memory tracking and profile (#47379)
Cherry-picked from #44739
1 parent 84a21bd commit f8bd969

32 files changed

+465
-321
lines changed

be/src/common/daemon.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -509,15 +509,18 @@ void Daemon::cache_adjust_capacity_thread() {
509509
void Daemon::cache_prune_stale_thread() {
510510
int32_t interval = config::cache_periodic_prune_stale_sweep_sec;
511511
while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) {
512-
if (interval <= 0) {
513-
LOG(WARNING) << "config of cache clean interval is illegal: [" << interval
514-
<< "], force set to 3600 ";
515-
interval = 3600;
512+
if (config::cache_periodic_prune_stale_sweep_sec <= 0) {
513+
LOG(WARNING) << "config of cache clean interval is: [" << interval
514+
<< "], it means the cache prune stale thread is disabled, will wait 3s "
515+
"and check again.";
516+
interval = 3;
517+
continue;
516518
}
517519
if (config::disable_memory_gc) {
518520
continue;
519521
}
520522
CacheManager::instance()->for_each_cache_prune_stale();
523+
interval = config::cache_periodic_prune_stale_sweep_sec;
521524
}
522525
}
523526

be/src/olap/delta_writer_v2.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ namespace doris {
4646

4747
class FlushToken;
4848
class MemTable;
49-
class MemTracker;
5049
class Schema;
5150
class StorageEngine;
5251
class TupleDescriptor;

be/src/olap/memtable_writer.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ namespace doris {
4545

4646
class FlushToken;
4747
class MemTable;
48-
class MemTracker;
4948
class StorageEngine;
5049
class TupleDescriptor;
5150
class SlotDescriptor;

be/src/olap/metadata_adder.h

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,17 @@
2020
#include <bvar/bvar.h>
2121
#include <stdint.h>
2222

23+
#include "runtime/exec_env.h"
24+
#include "runtime/memory/mem_tracker_limiter.h"
2325
#include "util/runtime_profile.h"
2426

2527
namespace doris {
2628

2729
inline bvar::Adder<int64_t> g_rowset_meta_mem_bytes("doris_rowset_meta_mem_bytes");
2830
inline bvar::Adder<int64_t> g_rowset_meta_num("doris_rowset_meta_num");
2931

30-
inline bvar::Adder<int64_t> g_all_rowsets_mem_bytes("doris_all_rowsets_mem_bytes");
31-
inline bvar::Adder<int64_t> g_all_rowsets_num("doris_all_rowsets_num");
32+
inline bvar::Adder<int64_t> g_rowset_mem_bytes("doris_rowset_mem_bytes");
33+
inline bvar::Adder<int64_t> g_rowset_num("doris_rowset_num");
3234

3335
inline bvar::Adder<int64_t> g_tablet_meta_mem_bytes("doris_tablet_meta_mem_bytes");
3436
inline bvar::Adder<int64_t> g_tablet_meta_num("doris_tablet_meta_num");
@@ -42,8 +44,9 @@ inline bvar::Adder<int64_t> g_tablet_index_num("doris_tablet_index_num");
4244
inline bvar::Adder<int64_t> g_tablet_schema_mem_bytes("doris_tablet_schema_mem_bytes");
4345
inline bvar::Adder<int64_t> g_tablet_schema_num("doris_tablet_schema_num");
4446

45-
inline bvar::Adder<int64_t> g_all_segments_mem_bytes("doris_all_segments_mem_bytes");
46-
inline bvar::Adder<int64_t> g_all_segments_num("doris_all_segments_num");
47+
inline bvar::Adder<int64_t> g_segment_mem_bytes("doris_segment_mem_bytes");
48+
inline bvar::Adder<int64_t> g_segment_num("doris_segment_num");
49+
inline bvar::Adder<int64_t> g_segment_estimate_mem_bytes("doris_segment_estimate_mem_bytes");
4750

4851
inline bvar::Adder<int64_t> g_column_reader_mem_bytes("doris_column_reader_mem_bytes");
4952
inline bvar::Adder<int64_t> g_column_reader_num("doris_column_reader_num");
@@ -96,6 +99,10 @@ class ZoneMapIndexReader;
9699
When a derived Class extends MetadataAdder, then the Class's number and fixed length field's memory can be counted automatically.
97100
But if the Class has variable length field, then you should overwrite get_metadata_size and call update_metadata_size when the Class's memory changes.
98101
102+
get_metadata_size is only the memory of the metadata object itself, not include child objects,
103+
for example, TabletMeta::get_metadata_size does not include the memory of TabletSchema.
104+
Note, the memory allocated by Doris Allocator is not included.
105+
99106
There are some special situations that need to be noted:
100107
1. when the derived Class override copy constructor, you'd better update memory size(call update_metadata_size) if derived class's
101108
memory changed in its copy constructor or you not call MetadataAdder's copy constructor.
@@ -111,6 +118,31 @@ class MetadataAdder {
111118

112119
static void dump_metadata_object(RuntimeProfile* object_heap_dump_snapshot);
113120

121+
static int64_t get_all_tablets_size() {
122+
return g_tablet_meta_mem_bytes.get_value() + g_tablet_column_mem_bytes.get_value() +
123+
g_tablet_index_mem_bytes.get_value() + g_tablet_schema_mem_bytes.get_value();
124+
}
125+
126+
static int64_t get_all_rowsets_size() {
127+
return g_rowset_meta_mem_bytes.get_value() + g_rowset_mem_bytes.get_value();
128+
}
129+
130+
static int64_t get_all_segments_size() {
131+
return g_segment_mem_bytes.get_value() + g_column_reader_mem_bytes.get_value() +
132+
g_bitmap_index_reader_mem_bytes.get_value() +
133+
g_bloom_filter_index_reader_mem_bytes.get_value() +
134+
g_index_page_reader_mem_bytes.get_value() +
135+
g_indexed_column_reader_mem_bytes.get_value() +
136+
g_inverted_index_reader_mem_bytes.get_value() +
137+
g_ordinal_index_reader_mem_bytes.get_value() +
138+
g_zone_map_index_reader_mem_bytes.get_value();
139+
}
140+
141+
// Doris currently uses the estimated segments memory as the basis, maybe it is more realistic.
142+
static int64_t get_all_segments_estimate_size() {
143+
return g_segment_estimate_mem_bytes.get_value();
144+
}
145+
114146
protected:
115147
MetadataAdder(const MetadataAdder& other);
116148

@@ -122,7 +154,6 @@ class MetadataAdder {
122154

123155
MetadataAdder<T>& operator=(const MetadataAdder<T>& other) = default;
124156

125-
private:
126157
int64_t _current_meta_size {0};
127158

128159
void add_mem_size(int64_t val);
@@ -167,7 +198,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) {
167198
if constexpr (std::is_same_v<T, RowsetMeta>) {
168199
g_rowset_meta_mem_bytes << val;
169200
} else if constexpr (std::is_same_v<T, Rowset>) {
170-
g_all_rowsets_mem_bytes << val;
201+
g_rowset_mem_bytes << val;
171202
} else if constexpr (std::is_same_v<T, TabletMeta>) {
172203
g_tablet_meta_mem_bytes << val;
173204
} else if constexpr (std::is_same_v<T, TabletColumn>) {
@@ -177,7 +208,7 @@ void MetadataAdder<T>::add_mem_size(int64_t val) {
177208
} else if constexpr (std::is_same_v<T, TabletSchema>) {
178209
g_tablet_schema_mem_bytes << val;
179210
} else if constexpr (std::is_same_v<T, segment_v2::Segment>) {
180-
g_all_segments_mem_bytes << val;
211+
g_segment_mem_bytes << val;
181212
} else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) {
182213
g_column_reader_mem_bytes << val;
183214
} else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) {
@@ -208,7 +239,7 @@ void MetadataAdder<T>::add_num(int64_t val) {
208239
if constexpr (std::is_same_v<T, RowsetMeta>) {
209240
g_rowset_meta_num << val;
210241
} else if constexpr (std::is_same_v<T, Rowset>) {
211-
g_all_rowsets_num << val;
242+
g_rowset_num << val;
212243
} else if constexpr (std::is_same_v<T, TabletMeta>) {
213244
g_tablet_meta_num << val;
214245
} else if constexpr (std::is_same_v<T, TabletColumn>) {
@@ -218,7 +249,7 @@ void MetadataAdder<T>::add_num(int64_t val) {
218249
} else if constexpr (std::is_same_v<T, TabletSchema>) {
219250
g_tablet_schema_num << val;
220251
} else if constexpr (std::is_same_v<T, segment_v2::Segment>) {
221-
g_all_segments_num << val;
252+
g_segment_num << val;
222253
} else if constexpr (std::is_same_v<T, segment_v2::ColumnReader>) {
223254
g_column_reader_num << val;
224255
} else if constexpr (std::is_same_v<T, segment_v2::BitmapIndexReader>) {
@@ -250,12 +281,12 @@ void MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna
250281
COUNTER_SET(rowset_meta_mem_bytes_counter, g_rowset_meta_mem_bytes.get_value());
251282
COUNTER_SET(rowset_meta_num_counter, g_rowset_meta_num.get_value());
252283

253-
RuntimeProfile::Counter* all_rowsets_mem_bytes_counter =
254-
ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsMemBytes", TUnit::BYTES);
255-
RuntimeProfile::Counter* all_rowsets_num_counter =
256-
ADD_COUNTER(object_heap_dump_snapshot, "AllRowsetsNum", TUnit::UNIT);
257-
COUNTER_SET(all_rowsets_mem_bytes_counter, g_all_rowsets_mem_bytes.get_value());
258-
COUNTER_SET(all_rowsets_num_counter, g_all_rowsets_num.get_value());
284+
RuntimeProfile::Counter* rowset_mem_bytes_counter =
285+
ADD_COUNTER(object_heap_dump_snapshot, "RowsetMemBytes", TUnit::BYTES);
286+
RuntimeProfile::Counter* rowset_num_counter =
287+
ADD_COUNTER(object_heap_dump_snapshot, "RowsetNum", TUnit::UNIT);
288+
COUNTER_SET(rowset_mem_bytes_counter, g_rowset_mem_bytes.get_value());
289+
COUNTER_SET(rowset_num_counter, g_rowset_num.get_value());
259290

260291
RuntimeProfile::Counter* tablet_meta_mem_bytes_counter =
261292
ADD_COUNTER(object_heap_dump_snapshot, "TabletMetaMemBytes", TUnit::BYTES);
@@ -285,12 +316,12 @@ void MetadataAdder<T>::dump_metadata_object(RuntimeProfile* object_heap_dump_sna
285316
COUNTER_SET(tablet_schema_mem_bytes_counter, g_tablet_schema_mem_bytes.get_value());
286317
COUNTER_SET(tablet_schema_num_counter, g_tablet_schema_num.get_value());
287318

288-
RuntimeProfile::Counter* all_segments_mem_bytes_counter =
289-
ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsMemBytes", TUnit::BYTES);
290-
RuntimeProfile::Counter* all_segments_num_counter =
291-
ADD_COUNTER(object_heap_dump_snapshot, "AllSegmentsNum", TUnit::UNIT);
292-
COUNTER_SET(all_segments_mem_bytes_counter, g_all_segments_mem_bytes.get_value());
293-
COUNTER_SET(all_segments_num_counter, g_all_segments_num.get_value());
319+
RuntimeProfile::Counter* segment_mem_bytes_counter =
320+
ADD_COUNTER(object_heap_dump_snapshot, "SegmentMemBytes", TUnit::BYTES);
321+
RuntimeProfile::Counter* segment_num_counter =
322+
ADD_COUNTER(object_heap_dump_snapshot, "SegmentNum", TUnit::UNIT);
323+
COUNTER_SET(segment_mem_bytes_counter, g_segment_mem_bytes.get_value());
324+
COUNTER_SET(segment_num_counter, g_segment_num.get_value());
294325

295326
RuntimeProfile::Counter* column_reader_mem_bytes_counter =
296327
ADD_COUNTER(object_heap_dump_snapshot, "ColumnReaderMemBytes", TUnit::BYTES);

be/src/olap/rowset/segment_v2/indexed_column_reader.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,8 +80,8 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory,
8080
_sole_data_page = PagePointer(_meta.ordinal_index_meta().root_page());
8181
} else {
8282
RETURN_IF_ERROR(load_index_page(_meta.ordinal_index_meta().root_page(),
83-
&_ordinal_index_page_handle, &_ordinal_index_reader,
84-
index_load_stats));
83+
&_ordinal_index_page_handle,
84+
_ordinal_index_reader.get(), index_load_stats));
8585
_has_index_page = true;
8686
}
8787
}
@@ -92,7 +92,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory,
9292
_sole_data_page = PagePointer(_meta.value_index_meta().root_page());
9393
} else {
9494
RETURN_IF_ERROR(load_index_page(_meta.value_index_meta().root_page(),
95-
&_value_index_page_handle, &_value_index_reader,
95+
&_value_index_page_handle, _value_index_reader.get(),
9696
index_load_stats));
9797
_has_index_page = true;
9898
}

be/src/olap/rowset/segment_v2/indexed_column_reader.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,12 @@ class EncodingInfo;
5050
class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
5151
public:
5252
explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta)
53-
: _file_reader(std::move(file_reader)), _meta(meta) {}
53+
: _file_reader(std::move(file_reader)), _meta(meta) {
54+
_ordinal_index_reader = std::make_unique<IndexPageReader>();
55+
_value_index_reader = std::make_unique<IndexPageReader>();
56+
}
5457

55-
~IndexedColumnReader();
58+
~IndexedColumnReader() override;
5659

5760
Status load(bool use_page_cache, bool kept_in_memory,
5861
OlapReaderStatistics* index_load_stats = nullptr);
@@ -91,8 +94,8 @@ class IndexedColumnReader : public MetadataAdder<IndexedColumnReader> {
9194
bool _has_index_page = false;
9295
// valid only when the column contains only one data page
9396
PagePointer _sole_data_page;
94-
IndexPageReader _ordinal_index_reader;
95-
IndexPageReader _value_index_reader;
97+
std::unique_ptr<IndexPageReader> _ordinal_index_reader;
98+
std::unique_ptr<IndexPageReader> _value_index_reader;
9699
PageHandle _ordinal_index_page_handle;
97100
PageHandle _value_index_page_handle;
98101

@@ -108,8 +111,8 @@ class IndexedColumnIterator {
108111
explicit IndexedColumnIterator(const IndexedColumnReader* reader,
109112
OlapReaderStatistics* stats = nullptr)
110113
: _reader(reader),
111-
_ordinal_iter(&reader->_ordinal_index_reader),
112-
_value_iter(&reader->_value_index_reader),
114+
_ordinal_iter(reader->_ordinal_index_reader.get()),
115+
_value_iter(reader->_value_index_reader.get()),
113116
_stats(stats) {}
114117

115118
// Seek to the given ordinal entry. Entry 0 is the first entry.

be/src/olap/rowset/segment_v2/page_handle.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
#include "util/slice.h" // for Slice
2424

2525
namespace doris {
26+
27+
// After disable page cache, sometimes we need to know the percentage of data pages in query memory.
28+
inline bvar::Adder<int64_t> g_page_no_cache_mem_bytes("doris_page_no_cache_mem_bytes");
29+
2630
namespace segment_v2 {
2731

2832
// When a column page is read into memory, we use this to store it.
@@ -37,8 +41,7 @@ class PageHandle {
3741
// This class will take the ownership of input data's memory. It will
3842
// free it when deconstructs.
3943
PageHandle(DataPage* data) : _is_data_owner(true), _data(data) {
40-
_page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
41-
_page_tracker->consume(_data->capacity());
44+
g_page_no_cache_mem_bytes << _data->capacity();
4245
}
4346

4447
// This class will take the content of cache data, and will make input
@@ -51,20 +54,18 @@ class PageHandle {
5154
// we can use std::exchange if we switch c++14 on
5255
std::swap(_is_data_owner, other._is_data_owner);
5356
std::swap(_data, other._data);
54-
_page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
5557
}
5658

5759
PageHandle& operator=(PageHandle&& other) noexcept {
5860
std::swap(_is_data_owner, other._is_data_owner);
5961
std::swap(_data, other._data);
6062
_cache_data = std::move(other._cache_data);
61-
_page_tracker = ExecEnv::GetInstance()->page_no_cache_mem_tracker();
6263
return *this;
6364
}
6465

6566
~PageHandle() {
6667
if (_is_data_owner) {
67-
_page_tracker->release(_data->capacity());
68+
g_page_no_cache_mem_bytes << -_data->capacity();
6869
delete _data;
6970
} else {
7071
DCHECK(_data == nullptr);
@@ -85,7 +86,6 @@ class PageHandle {
8586
// otherwise _cache_data is valid, and data is belong to cache.
8687
bool _is_data_owner = false;
8788
DataPage* _data = nullptr;
88-
std::shared_ptr<MemTracker> _page_tracker;
8989
PageCacheHandle _cache_data;
9090

9191
// Don't allow copy and assign

be/src/olap/rowset/segment_v2/segment.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,11 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table
163163
_tablet_schema(std::move(tablet_schema)),
164164
_idx_file_info(idx_file_info) {}
165165

166-
Segment::~Segment() = default;
166+
Segment::~Segment() {
167+
g_segment_estimate_mem_bytes << -_tracked_meta_mem_usage;
168+
// if failed, fix `_tracked_meta_mem_usage` accuracy
169+
DCHECK(_tracked_meta_mem_usage == meta_mem_usage());
170+
}
167171

168172
io::UInt128Wrapper Segment::file_cache_key(std::string_view rowset_id, uint32_t seg_id) {
169173
return io::BlockFileCache::hash(fmt::format("{}_{}.dat", rowset_id, seg_id));
@@ -174,6 +178,12 @@ int64_t Segment::get_metadata_size() const {
174178
(_pk_index_meta ? _pk_index_meta->ByteSizeLong() : 0);
175179
}
176180

181+
void Segment::update_metadata_size() {
182+
MetadataAdder::update_metadata_size();
183+
g_segment_estimate_mem_bytes << _meta_mem_usage - _tracked_meta_mem_usage;
184+
_tracked_meta_mem_usage = _meta_mem_usage;
185+
}
186+
177187
Status Segment::_open() {
178188
_footer_pb = std::make_unique<SegmentFooterPB>();
179189
RETURN_IF_ERROR(_parse_footer(_footer_pb.get()));
@@ -191,8 +201,6 @@ Status Segment::_open() {
191201
_meta_mem_usage += _pk_index_meta->ByteSizeLong();
192202
}
193203

194-
update_metadata_size();
195-
196204
_meta_mem_usage += sizeof(*this);
197205
_meta_mem_usage += _tablet_schema->num_columns() * config::estimated_mem_per_column_reader;
198206

@@ -201,6 +209,8 @@ Status Segment::_open() {
201209
// 0.01 comes from PrimaryKeyIndexBuilder::init
202210
_meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8;
203211

212+
update_metadata_size();
213+
204214
return Status::OK();
205215
}
206216

@@ -471,6 +481,7 @@ Status Segment::_load_pk_bloom_filter(OlapReaderStatistics* stats) {
471481
// for BE UT "segment_cache_test"
472482
return _load_pk_bf_once.call([this] {
473483
_meta_mem_usage += 100;
484+
update_metadata_size();
474485
return Status::OK();
475486
});
476487
}

be/src/olap/rowset/segment_v2/segment.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ class IDataType;
5757
class ShortKeyIndexDecoder;
5858
class Schema;
5959
class StorageReadOptions;
60-
class MemTracker;
6160
class PrimaryKeyIndexReader;
6261
class RowwiseIterator;
6362
struct RowLocation;
@@ -93,6 +92,7 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
9392
~Segment();
9493

9594
int64_t get_metadata_size() const override;
95+
void update_metadata_size();
9696

9797
Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options,
9898
std::unique_ptr<RowwiseIterator>* iter);
@@ -163,6 +163,8 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
163163

164164
io::FileReaderSPtr file_reader() { return _file_reader; }
165165

166+
// Including the column reader memory.
167+
// another method `get_metadata_size` not include the column reader, only the segment object itself.
166168
int64_t meta_mem_usage() const { return _meta_mem_usage; }
167169

168170
// Identify the column by unique id or path info
@@ -249,9 +251,8 @@ class Segment : public std::enable_shared_from_this<Segment>, public MetadataAdd
249251
// 1. Tracking memory use by segment meta data such as footer or index page.
250252
// 2. Tracking memory use by segment column reader
251253
// The memory consumed by querying is tracked in segment iterator.
252-
// TODO: Segment::_meta_mem_usage Unknown value overflow, causes the value of SegmentMeta mem tracker
253-
// is similar to `-2912341218700198079`. So, temporarily put it in experimental type tracker.
254254
int64_t _meta_mem_usage;
255+
int64_t _tracked_meta_mem_usage = 0;
255256

256257
RowsetId _rowset_id;
257258
TabletSchemaSPtr _tablet_schema;

be/src/olap/rowset_builder.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ namespace doris {
3838
class CalcDeleteBitmapToken;
3939
class FlushToken;
4040
class MemTable;
41-
class MemTracker;
4241
class StorageEngine;
4342
class TupleDescriptor;
4443
class SlotDescriptor;

0 commit comments

Comments
 (0)