Skip to content

Commit dbf144b

Browse files
committed
feat: add procfs process tracking
1 parent 018a806 commit dbf144b

File tree

2 files changed

+35
-14
lines changed

2 files changed

+35
-14
lines changed

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
- Lading now built with edition 2024
1010
- Removed use of compromised `tj-actions/changed-files` action from project's GitHub CI configuration
1111
- Fixed devcontainer configuration to ensure the `rust-analyzer` can run successfully within IDEs
12+
- Added a new gauge `processes_found` and a new warning log for processes we skipped
1213

1314
## [0.25.6]
1415
## Fixed

Diff for: lading/src/observer/linux/procfs.rs

+34-14
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ impl Sampler {
8383
// A tally of the total RSS and PSS consumed by the parent process and
8484
// its children.
8585
let mut aggr = memory::smaps_rollup::Aggregator::default();
86+
let mut processes_found: i32 = 0;
87+
let mut pids_skipped: FxHashSet<i32> = FxHashSet::default();
8688

8789
// Every sample run we collect all the child processes rooted at the
8890
// parent. As noted by the procfs documentation is this done by
@@ -115,25 +117,43 @@ impl Sampler {
115117
pids.insert(pid);
116118
}
117119
}
118-
}
120+
}
119121
}
120122
}
121123

124+
processes_found += 1;
122125
let pid = process.pid();
123-
if let Err(e) = self.handle_process(process, &mut aggr, include_smaps).await {
124-
warn!("Encountered uncaught error when handling `/proc/{pid}/`: {e}");
125-
}
126+
match self.handle_process(process, &mut aggr, include_smaps).await {
127+
Ok(true) => { /* Handled successfully */ }
128+
Ok(false) => {
129+
pids_skipped.insert(pid);
130+
}
131+
Err(e) => {
132+
warn!("Encountered uncaught error when handling `/proc/{pid}/`: {e}");
133+
}
126134
}
127135

128136
// Update the process_info map to only hold processes seen by the current poll call.
129137
self.process_info.retain(|pid, _| pids.contains(pid));
130138

131139
gauge!("total_rss_bytes").set(aggr.rss as f64);
132140
gauge!("total_pss_bytes").set(aggr.pss as f64);
141+
gauge!("processes_found").set(processes_found as f64);
142+
143+
// If we skipped any processes, log a warning.
144+
if !pids_skipped.is_empty() {
145+
warn!(
146+
"Skipped {} processes: {:?}",
147+
pids_skipped.len(),
148+
pids_skipped
149+
);
150+
}
133151

134-
Ok(())
152+
()
135153
}
136154

155+
/// Handle a process. Returns true if the process was handled successfully,
156+
/// false if it was skipped for any reason.
137157
#[allow(
138158
clippy::similar_names,
139159
clippy::too_many_lines,
@@ -146,7 +166,7 @@ impl Sampler {
146166
process: Process,
147167
aggr: &mut memory::smaps_rollup::Aggregator,
148168
include_smaps: bool,
149-
) -> Result<(), Error> {
169+
) -> Result<bool, Error> {
150170
let pid = process.pid();
151171

152172
// `/proc/{pid}/status`
@@ -156,12 +176,12 @@ impl Sampler {
156176
warn!("Couldn't read status: {:?}", e);
157177
// The pid may have exited since we scanned it or we may not
158178
// have sufficient permission.
159-
return Ok(());
179+
return Ok(false);
160180
}
161181
};
162182
if status.tgid != pid {
163183
// This is a thread, not a process and we do not wish to scan it.
164-
return Ok(());
184+
return Ok(false);
165185
}
166186

167187
// If we haven't seen this process before, initialize its ProcessInfo.
@@ -174,7 +194,7 @@ impl Sampler {
174194
warn!("Couldn't read exe for pid {}: {:?}", pid, e);
175195
// The pid may have exited since we scanned it or we may not
176196
// have sufficient permission.
177-
return Ok(());
197+
return Ok(false);
178198
}
179199
};
180200
let comm = match proc_comm(pid).await {
@@ -183,7 +203,7 @@ impl Sampler {
183203
warn!("Couldn't read comm for pid {}: {:?}", pid, e);
184204
// The pid may have exited since we scanned it or we may not
185205
// have sufficient permission.
186-
return Ok(());
206+
return Ok(false);
187207
}
188208
};
189209
let cmdline = match proc_cmdline(pid).await {
@@ -192,7 +212,7 @@ impl Sampler {
192212
warn!("Couldn't read cmdline for pid {}: {:?}", pid, e);
193213
// The pid may have exited since we scanned it or we may not
194214
// have sufficient permission.
195-
return Ok(());
215+
return Ok(false);
196216
}
197217
};
198218
let pid_s = format!("{pid}");
@@ -238,7 +258,7 @@ impl Sampler {
238258
// which will happen if we don't have permissions or, more
239259
// likely, the process has exited.
240260
warn!("Couldn't process `/proc/{pid}/stat`: {e}");
241-
return Ok(());
261+
return Ok(false);
242262
}
243263

244264
if include_smaps {
@@ -317,10 +337,10 @@ impl Sampler {
317337
// which will happen if we don't have permissions or, more
318338
// likely, the process has exited.
319339
warn!("Couldn't process `/proc/{pid}/smaps_rollup`: {err}");
320-
return Ok(());
340+
return Ok(false);
321341
}
322342

323-
Ok(())
343+
Ok(true)
324344
}
325345
}
326346

0 commit comments

Comments
 (0)