Skip to content

Commit a6cb1ce

Browse files
committed
Fix recheck on processes which are already grouped
-recheck already allows process_exporter to put processes which might change name after they start in the right group. However this only works if the initial process name is not matched. In case a matcher catches it early, the process is left in its old group after its name changes.
1 parent e52ab0a commit a6cb1ce

File tree

3 files changed

+82
-16
lines changed

3 files changed

+82
-16
lines changed

proc/read.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ type (
116116
// GetProcID() returns (pid,starttime), which can be considered a unique process id.
117117
GetProcID() (ID, error)
118118
// GetStatic() returns various details read from files under /proc/<pid>/. Technically
119-
// name may not be static, but we'll pretend it is.
119+
// name may not always be static.
120120
GetStatic() (Static, error)
121121
// GetMetrics() returns various metrics read from files under /proc/<pid>/.
122122
// It returns an error on complete failure. Otherwise, it returns metrics

proc/tracker.go

Lines changed: 46 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ func (tp *trackedProc) getUpdate() Update {
126126
States: tp.metrics.States,
127127
Wchans: make(map[string]int),
128128
}
129+
129130
if tp.metrics.Wchan != "" {
130131
u.Wchans[tp.metrics.Wchan] = 1
131132
}
@@ -177,26 +178,33 @@ func (t *Tracker) track(groupName string, idinfo IDInfo) {
177178
t.tracked[idinfo.ID] = &tproc
178179
}
179180

180-
func (t *Tracker) ignore(id ID, startTime time.Time) {
181-
// only ignore ID if we didn't set recheck to true
181+
func (t *Tracker) shouldRecheck(id ID, startTime time.Time) bool {
182182
if t.recheck {
183183
if t.recheckTimeLimit == 0 {
184184
// plain -recheck with no time limit:
185-
return
185+
return true
186186
}
187187
if startTime.Add(t.recheckTimeLimit).After(time.Now()) {
188188
// -recheckWithTimeLimit is used and the limit is not reached yet:
189-
return
189+
return true
190190
}
191191
}
192-
t.tracked[id] = nil
192+
return false
193193
}
194194

195-
func (tp *trackedProc) update(metrics Metrics, now time.Time, cerrs *CollectErrors, threads []Thread) {
195+
func (t *Tracker) ignore(id ID, startTime time.Time) {
196+
// only ignore ID if we didn't set recheck to true
197+
if !t.shouldRecheck(id, startTime) {
198+
t.tracked[id] = nil
199+
}
200+
}
201+
202+
func (tp *trackedProc) update(metrics Metrics, static Static, now time.Time, cerrs *CollectErrors, threads []Thread) {
196203
// newcounts: resource consumption since last cycle
197204
newcounts := metrics.Counts
198205
tp.lastaccum = newcounts.Sub(tp.metrics.Counts)
199206
tp.metrics = metrics
207+
tp.static = static
200208
tp.lastUpdate = now
201209
if len(threads) > 1 {
202210
if tp.threads == nil {
@@ -272,17 +280,18 @@ func (t *Tracker) handleProc(proc Proc, updateTime time.Time) (*IDInfo, CollectE
272280
}
273281
}
274282

283+
static, err := proc.GetStatic()
284+
if err != nil {
285+
if t.debug {
286+
log.Printf("error reading static details for %+v: %v", procID, err)
287+
}
288+
return nil, cerrs
289+
}
290+
275291
var newProc *IDInfo
276292
if known {
277-
last.update(metrics, updateTime, &cerrs, threads)
293+
last.update(metrics, static, updateTime, &cerrs, threads)
278294
} else {
279-
static, err := proc.GetStatic()
280-
if err != nil {
281-
if t.debug {
282-
log.Printf("error reading static details for %+v: %v", procID, err)
283-
}
284-
return nil, cerrs
285-
}
286295
newProc = &IDInfo{procID, static, metrics, threads}
287296
if t.debug {
288297
log.Printf("found new proc: %s", newProc)
@@ -459,8 +468,30 @@ func (t *Tracker) Update(iter Iter) (CollectErrors, []Update, error) {
459468
}
460469

461470
tp := []Update{}
462-
for _, tproc := range t.tracked {
471+
for procID, tproc := range t.tracked {
463472
if tproc != nil {
473+
// Reclassify any proc that may have changed names
474+
if t.shouldRecheck(procID, tproc.static.StartTime) {
475+
nacl := common.ProcAttributes{
476+
Name: tproc.static.Name,
477+
Cmdline: tproc.static.Cmdline,
478+
Cgroups: tproc.static.Cgroups,
479+
Username: t.lookupUid(tproc.static.EffectiveUID),
480+
PID: procID.Pid,
481+
StartTime: tproc.static.StartTime,
482+
}
483+
484+
shouldTrack, gname := t.namer.MatchAndName(nacl)
485+
if shouldTrack && gname != tproc.groupName {
486+
if t.debug {
487+
log.Printf("moving process %v from group %s to %s", procID.Pid, tproc.groupName, gname)
488+
}
489+
tproc.groupName = gname
490+
} else if !shouldTrack {
491+
t.ignore(procID, tproc.static.StartTime)
492+
continue
493+
}
494+
}
464495
tp = append(tp, tproc.getUpdate())
465496
}
466497
}

proc/tracker_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,3 +180,38 @@ func TestTrackerThreads(t *testing.T) {
180180
}
181181
}
182182
}
183+
184+
// Verify that the tracker correctly groups a process whose attributes
185+
// change after it has started when recheck is enabled and even though
186+
// the process was grouped the first time it was seen.
187+
func TestTrackerChangeName(t *testing.T) {
188+
p1 := 1
189+
n1_1, n1_2 := "n1_1", "n1_2"
190+
t1 := time.Unix(1, 0).UTC()
191+
192+
tests := []struct {
193+
procs []IDInfo
194+
want []Update
195+
}{
196+
{
197+
[]IDInfo{newProcStart(p1, n1_1, 1)},
198+
[]Update{{GroupName: n1_1, Start: t1, Wchans: msi{}}},
199+
},
200+
{
201+
// p1 is still here but it has changed its name
202+
[]IDInfo{newProcStart(p1, n1_2, 1)},
203+
[]Update{{GroupName: n1_2, Start: t1, Wchans: msi{}}},
204+
},
205+
}
206+
// recheck is enabled here
207+
tr := NewTracker(newNamer(n1_1, n1_2), false, true, 0, false)
208+
209+
opts := cmpopts.SortSlices(lessUpdateGroupName)
210+
for i, tc := range tests {
211+
_, got, err := tr.Update(procInfoIter(tc.procs...))
212+
noerr(t, err)
213+
if diff := cmp.Diff(got, tc.want, opts); diff != "" {
214+
t.Errorf("%d: update differs: (-got +want)\n%s", i, diff)
215+
}
216+
}
217+
}

0 commit comments

Comments
 (0)