Skip to content

Commit 6473258

Browse files
mgoulishmick
andauthored
Temporarily stop creation of vflow observability records if too many at (#1792)
* Temporarily stop creation of vflow observability records if too many at once. To prevent large memory growth in some test scenarios. * Respond to feedback * Make lower threshold close to upper. --------- Co-authored-by: mick <[email protected]>
1 parent a91d5a7 commit 6473258

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

include/qpid/dispatch/vanflow.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,34 @@ typedef enum vflow_record_type {
5757
VFLOW_RECORD_BIFLOW_APP = 0x11, // Bidirectional Application (L7) flow
5858
} vflow_record_type_t;
5959

60+
#define VFLOW_RECORD_MAX VFLOW_RECORD_BIFLOW_APP // Update this if you add new record types to vflow_record_type
61+
62+
// Most record-types support critical functionality and must be emitted every time the corresponding
63+
// event occurs. But a few record-types support discretionary functionality that can be interrupted,
64+
// if necessary, to prevent excessive memory growth.
65+
// This array identifies which is which.
66+
static const bool discretionary_records[VFLOW_RECORD_MAX + 1] = {
67+
false, // VFLOW_RECORD_SITE (0x00)
68+
false, // VFLOW_RECORD_ROUTER (0x01)
69+
false, // VFLOW_RECORD_LINK (0x02)
70+
false, // VFLOW_RECORD_CONTROLLER (0x03)
71+
false, // VFLOW_RECORD_LISTENER (0x04)
72+
false, // VFLOW_RECORD_CONNECTOR (0x05)
73+
true, // VFLOW_RECORD_FLOW (0x06)
74+
false, // VFLOW_RECORD_PROCESS (0x07)
75+
false, // VFLOW_RECORD_IMAGE (0x08)
76+
false, // VFLOW_RECORD_INGRESS (0x09)
77+
false, // VFLOW_RECORD_EGRESS (0x0a)
78+
false, // VFLOW_RECORD_COLLECTOR (0x0b)
79+
false, // VFLOW_RECORD_PROCESS_GROUP (0x0c)
80+
false, // VFLOW_RECORD_HOST (0x0d)
81+
false, // VFLOW_RECORD_LOG (0x0e)
82+
false, // VFLOW_RECORD_ROUTER_ACCESS (0x0f)
83+
true, // VFLOW_RECORD_BIFLOW_TPORT (0x10)
84+
true, // VFLOW_RECORD_BIFLOW_APP (0x11)
85+
86+
};
87+
6088
// clang-format off
6189
typedef enum vflow_attribute {
6290
// Note: these values are shared with the Skupper control plane - do not re-use or change them without updating the

src/vanflow.c

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@
4646
#define DEFERRED_DELETION_TICKS 25 // Five seconds
4747
#define VFLOW_ID_CUSTOM 0xffffffffffffffff
4848

49+
// If the number of discretionary records rises above
50+
// this threshold, stop production of them to avoid
51+
// excessive memory growth.
52+
#define DISCRETIONARY_RECORDS_STOP_THRESHOLD 5000
53+
54+
// If the number of discretionary records falls below
55+
// this threshold, allow their production to start again.
56+
#define DISCRETIONARY_RECORDS_START_THRESHOLD 4990
57+
4958
//
5059
// If the record_id value is VFLOW_ID_CUSTOM, use the full_id for arbitrary strings, otherwise use ${s.source_id}:${record_id}
5160
//
@@ -148,6 +157,10 @@ static const int rate_span = 10; // Ten-second rolling av
148157
static sys_atomic_t site_configured;
149158

150159
typedef struct {
160+
// How many records of types that support discretionary currently exist?
161+
// Their creation can be interrupted if necessary to avoid excessive memory growth.
162+
sys_atomic_t discretionary_record_count;
163+
sys_atomic_t emit_discretionary_records;
151164
qdr_core_t *router_core;
152165
sys_mutex_t lock;
153166
sys_mutex_t id_lock;
@@ -864,6 +877,15 @@ static void _vflow_create_router_record(void)
864877
*/
865878
static void _vflow_free_record_TH(vflow_record_t *record, bool recursive)
866879
{
880+
// If this is one of the record types that supports discretionary functionality,
881+
// count it. If the number currently existing is now below the
882+
// resumption threshold, indicate that we should resume producing them.
883+
if (discretionary_records[record->record_type]) {
884+
if (sys_atomic_dec(&state->discretionary_record_count) <= DISCRETIONARY_RECORDS_START_THRESHOLD) {
885+
sys_atomic_set(&state->emit_discretionary_records, 1);
886+
}
887+
}
888+
867889
//
868890
// If this record is a child of a parent, remove it from the parent's child list
869891
//
@@ -1830,6 +1852,19 @@ vflow_record_t *vflow_start_record_custom_id(vflow_record_type_t record_type, vf
18301852
//=====================================================================================
18311853
vflow_record_t *vflow_start_record(vflow_record_type_t record_type, vflow_record_t *parent)
18321854
{
1855+
// If this is one of the record types that supports discretionary functionality,
1856+
// and if we already have the maximum allowed number of those, just don't produce
1857+
// another one.
1858+
// Otherwise, produce and count it, and check if this one put us over the limit.
1859+
if (discretionary_records[record_type]) {
1860+
if (0 == sys_atomic_get(&state->emit_discretionary_records)) {
1861+
return 0;
1862+
}
1863+
if (sys_atomic_inc(&state->discretionary_record_count) >= DISCRETIONARY_RECORDS_STOP_THRESHOLD) {
1864+
sys_atomic_set(&state->emit_discretionary_records, 0);
1865+
}
1866+
}
1867+
18331868
vflow_record_t *record = new_vflow_record_t();
18341869
vflow_work_t *work = _vflow_work(_vflow_start_record_TH);
18351870
ZERO(record);
@@ -1862,6 +1897,12 @@ vflow_record_t *vflow_start_co_record_iter(vflow_record_type_t record_type, qd_i
18621897
// search/replacement algorithm in _vflow_process_co_record_TH will need to be re-written in a more general way.
18631898
//
18641899
assert(record_type == VFLOW_RECORD_BIFLOW_TPORT);
1900+
// The above record type is 'discretionary', which
1901+
// means we may have been told to temporarily halt
1902+
// production of them.
1903+
if (sys_atomic_get(&state->emit_discretionary_records) == 0) {
1904+
return 0;
1905+
}
18651906
vflow_record_t *record = new_vflow_record_t();
18661907
ZERO(record);
18671908
record->record_type = record_type;
@@ -1902,14 +1943,15 @@ void vflow_end_record(vflow_record_t *record)
19021943
void vflow_serialize_identity(const vflow_record_t *record, qd_composed_field_t *field)
19031944
{
19041945
char buffer[IDENTITY_MAX + 1];
1905-
assert(!!record);
19061946
if (!!record) {
19071947
if (record->identity.record_id == VFLOW_ID_CUSTOM) {
19081948
qd_compose_insert_string(field, record->identity.s.full_id);
19091949
} else {
19101950
snprintf(buffer, IDENTITY_MAX, "%s:%"PRIu64, record->identity.s.source_id, record->identity.record_id);
19111951
qd_compose_insert_string(field, buffer);
19121952
}
1953+
} else {
1954+
qd_compose_insert_null(field);
19131955
}
19141956
}
19151957

@@ -2440,6 +2482,8 @@ static void _vflow_init(qdr_core_t *core, void **adaptor_context)
24402482
state = NEW(vflow_state_t);
24412483
ZERO(state);
24422484

2485+
sys_atomic_init(&state->discretionary_record_count, 0);
2486+
sys_atomic_init(&state->emit_discretionary_records, 1);
24432487
state->router_core = core;
24442488
state->hostname = getenv("HOSTNAME");
24452489
size_t hostLength = !!state->hostname ? strlen(state->hostname) : 0;

0 commit comments

Comments
 (0)