Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

proc: add amdgpu support #2009

Merged
merged 1 commit into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions qa/1222
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ _filter()
-e '/^proc\.id\.container: Missing metric value(s)/d' \
-e '/^proc\.psinfo\.labels: No value(s) available/d' \
-e '/^proc\.psinfo\.ngid: Metric not supported by this version/d' \
-e '/^proc\.fdinfo\..*: Metric not supported/d' \
-e '/^proc\.fdinfo\..*: No value(s) available/d' \
-e '/pmdaFetch: Fetch callback error from metric PMID 3\.11\.0\[.*]: No data available/d' \
-e '/proc\.psinfo\.tty_pgrp: No value(s) available/d' \
-e '/ acct: existing pacct file did not grow /d' \
Expand Down Expand Up @@ -105,6 +107,7 @@ NF == 0 && seen == 1 { if (numval == 1) print metric ": 1 value"
-e '/^proc\.psinfo\.ngid:/d' \
-e '/^proc\.psinfo\.tty_pgrp:/d' \
-e '/^proc\.smaps\./d' \
-e '/^proc\.fdinfo\./d' \
-e '/^Command: /s/,proc_init .*/,proc_init ... metrics .../' \
-e '/ERROR SUMMARY/q' \
# end
Expand Down
2 changes: 2 additions & 0 deletions qa/364
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ _filter_linux()
{
# pcp-atop uses metrics not supported on some kernels
# proc.smaps.* metrics are not present for older kernels
# proc.fdinfo.* metrics neither
#
if [ $PCP_PLATFORM != linux ]
then
Expand All @@ -310,6 +311,7 @@ _filter_linux()
-e '/^proc\.psinfo\.cgroups -12351 Missing metric value(s)/d' \
-e '/^proc\.namespaces\.envid -12350 Metric not supported/d' \
-e '/^hotproc\.namespaces\.envid -12350 Metric not supported/d' \
-e '/^proc\.fdinfo\..* Metric not supported/d' \
# linux
fi
}
Expand Down
4 changes: 3 additions & 1 deletion src/pmdas/linux_proc/clusters.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,10 @@
#define CLUSTER_PID_AUTOGROUP 74 /* /proc/<pid>/autogroup */
#define CLUSTER_HOTPROC_PID_AUTOGROUP 75 /* /proc/<pid>/autogroup */
#define CLUSTER_CGROUP2_IRQ_PRESSURE 76
#define CLUSTER_PID_FDINFO 77 /* /proc/<pid>/fdinfo */
#define CLUSTER_HOTPROC_PID_FDINFO 78 /* /proc/<pid>/fdinfo */

#define MIN_CLUSTER 8 /* first cluster number we use here */
#define MAX_CLUSTER 77 /* one more than highest cluster number used */
#define MAX_CLUSTER 79 /* one more than highest cluster number used */

#endif /* _CLUSTERS_H */
1 change: 1 addition & 0 deletions src/pmdas/linux_proc/help
Original file line number Diff line number Diff line change
Expand Up @@ -591,3 +591,4 @@ of accounting information:
0 inactive (no information available)
1 system (system level accounting from whatever file accton(8) is using)
2 private (accounting records from $PCP_VAR_DIR/pmcd/pacct)

14 changes: 14 additions & 0 deletions src/pmdas/linux_proc/help_text.h
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,18 @@ help_text_t help_text[] = {
{ .name = "autogroup.enabled", .shorthelp = "Scheduling autogroup feature for CFS is enabled in the kernel", .longhelp = "Contents of /proc/sys/kernel/sched_autogroup_enabled as described in sched(7)." },
{ .name = "autogroup.id", .shorthelp = "Process autogroup identifier from /proc/<pid>/autogroup", .longhelp = "Process scheduling autogroup identifier as described in sched(7)." },
{ .name = "autogroup.nice", .shorthelp = "Process autogroup nice level from /proc/<pid>/autogroup", .longhelp = "Process scheduling autogroup nice level as described in sched(7)." },

{ .name = "fdinfo.drm_memory_cpu", .shorthelp = "Accumulation of the drm-memory-cpu field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "CPU memory which can be used by the GPU to store buffer objects." },
{ .name = "fdinfo.drm_memory_gtt", .shorthelp = "Accumulation of the drm-memory-gtt field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "GTT memory which can be used by the GPU to store buffer objects." },
{ .name = "fdinfo.drm_memory_vram", .shorthelp = "Accumulation of the drm-memory-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "VRAM memory which can be used by the GPU to store buffer objects." },
{ .name = "fdinfo.drm_shared_cpu", .shorthelp = "Accumulation of the drm-shared-cpu field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "CPU memory which can be used by the GPU to store buffer objects, and is shared with another file." },
{ .name = "fdinfo.drm_shared_gtt", .shorthelp = "Accumulation of the drm-shared-gtt field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "GTT memory which can be used by the GPU to store buffer objects, and is shared with another file." },
{ .name = "fdinfo.drm_shared_vram", .shorthelp = "Accumulation of the drm-shared-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "VRAM memory which can be used by the GPU to store buffer objects, and is shared with another file." },

{ .name = "fdinfo.amd_evicted_visible_vram", .shorthelp = "Accumulation of the amd-evicted-visible-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "Sum of evicted buffers due to CPU access." },
{ .name = "fdinfo.amd_evicted_vram", .shorthelp = "Accumulation of the amd-evicted-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "Sum of evicted buffers, includes visible VRAM" },
{ .name = "fdinfo.amd_memory_visible_vram", .shorthelp = "Accumulation of the amd-memory-visible-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "Current visible VRAM usage" },
{ .name = "fdinfo.amd_requested_gtt", .shorthelp = "Accumulation of the amd-requested-gtt field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "How much GTT memory userspace asked for" },
{ .name = "fdinfo.amd_requested_visible_vram", .shorthelp = "Accumulation of the amd-requested-visible-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "How much visible VRAM userspace asked for" },
{ .name = "fdinfo.amd_requested_vram", .shorthelp = "Accumulation of the amd-requested-vram field from /proc/<pid>/fdinfo/* file descriptors", .longhelp = "How much VRAM userspace asked for, includes visible VRAM" },
};
104 changes: 104 additions & 0 deletions src/pmdas/linux_proc/pmda.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include "../linux/convert.h"

#include <ctype.h>
#include <sys/syslog.h>
#include <unistd.h>
#include <sys/vfs.h>
#include <sys/stat.h>
Expand Down Expand Up @@ -1348,6 +1349,52 @@ static pmdaMetric metrictab[] = {
/* acct.control.state */
{ NULL, { PMDA_PMID(CLUSTER_ACCT,CONTROL_ACCT_STATE), PM_TYPE_32, PM_INDOM_NULL,
PM_SEM_DISCRETE, PMDA_PMUNITS(0,0,0,0,0,0) }, },

/*
* Fdinfo cluster
*/

/* proc.fdinfo.drm_memory */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,0), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.drm.memory_cpu */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,1), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.drm.memory_gtt */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,2), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.drm.memory_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,3), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.drm.shared_cpu */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,4), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.drm.shared_gtt */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,5), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.drm.shared_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,6), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},

/* proc.fdinfo.amd.evicted_visible_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,7), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.amd.evicted_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,8), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.amd.memory_visible_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,9), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.amd.requested_gtt */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,10), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.amd.requested_visible_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,11), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},
/* proc.fdinfo.amd.requested_vram */
{ NULL, { PMDA_PMID(CLUSTER_PID_FDINFO,12), PM_TYPE_U64, PROC_INDOM,
PM_SEM_INSTANT, PMDA_PMUNITS(1,0,0,PM_SPACE_KBYTE,0,0)}},

};

pmInDom
Expand Down Expand Up @@ -1444,6 +1491,7 @@ proc_refresh(pmdaExt *pmda, int *need_refresh)
need_refresh[CLUSTER_PID_CWD] ||
need_refresh[CLUSTER_PID_EXE] ||
need_refresh[CLUSTER_PID_FD] ||
need_refresh[CLUSTER_PID_FDINFO] ||
need_refresh[CLUSTER_PROC_RUNQ]) {
refresh_proc_pid(&proc_pid,
need_refresh[CLUSTER_PROC_RUNQ]? &proc_runq : NULL,
Expand All @@ -1464,6 +1512,7 @@ proc_refresh(pmdaExt *pmda, int *need_refresh)
need_refresh[CLUSTER_HOTPROC_PID_CWD] ||
need_refresh[CLUSTER_HOTPROC_PID_EXE] ||
need_refresh[CLUSTER_HOTPROC_PID_FD] ||
need_refresh[CLUSTER_HOTPROC_PID_FDINFO] ||
need_refresh[CLUSTER_HOTPROC_GLOBAL] ||
need_refresh[CLUSTER_HOTPROC_PRED]){
refresh_hotproc_pid(&hotproc_pid,
Expand Down Expand Up @@ -1495,6 +1544,7 @@ proc_instance(pmInDom indom, int inst, char *name, pmInResult **result, pmdaExt
need_refresh[CLUSTER_PID_CWD]++;
need_refresh[CLUSTER_PID_IO]++;
need_refresh[CLUSTER_PID_FD]++;
need_refresh[CLUSTER_PID_FDINFO]++;
break;
case HOTPROC_INDOM:
need_refresh[CLUSTER_HOTPROC_PID_STAT]++;
Expand All @@ -1511,6 +1561,7 @@ proc_instance(pmInDom indom, int inst, char *name, pmInResult **result, pmdaExt
need_refresh[CLUSTER_HOTPROC_PID_FD]++;
need_refresh[CLUSTER_HOTPROC_GLOBAL]++;
need_refresh[CLUSTER_HOTPROC_PRED]++;
need_refresh[CLUSTER_HOTPROC_PID_FDINFO]++;
break;

case CGROUP_CPUSET_INDOM:
Expand Down Expand Up @@ -3430,6 +3481,59 @@ proc_fetchCallBack(pmdaMetric *mdesc, unsigned int inst, pmAtomValue *atom)
}
break;

case CLUSTER_HOTPROC_PID_FDINFO:
active_proc_pid = &hotproc_pid;
/*FALLTHROUGH*/
case CLUSTER_PID_FDINFO:
if (!have_access)
return PM_ERR_PERMISSION;
if ((entry = fetch_proc_pid_fdinfo(inst, active_proc_pid, &sts)) == NULL)
return sts;
if (!(entry->success & PROC_PID_FLAG_FDINFO))
return 0;

switch (item) {
case 0: /* proc.fdinfo.drm.memory_cpu */
atom->ull = entry->fdinfo.drm_memory_cpu;
break;
case 1: /* proc.fdinfo.drm.memory_gtt */
atom->ull = entry->fdinfo.drm_memory_gtt;
break;
case 2: /* proc.fdinfo.drm.memory_vram */
atom->ull = entry->fdinfo.drm_memory_vram;
break;
case 3: /* proc.fdinfo.drm.shared_cpu */
atom->ull = entry->fdinfo.drm_shared_cpu;
break;
case 4: /* proc.fdinfo.drm.shared_gtt */
atom->ull = entry->fdinfo.drm_shared_gtt;
break;
case 5: /* proc.fdinfo.drm.shared_vram */
atom->ull = entry->fdinfo.drm_shared_vram;
break;

case 6: /* proc.fdinfo.amd.evicted_visible_vram */
atom->ull = entry->fdinfo.amd_evicted_visible_vram;
break;
case 7: /* proc.fdinfo.amd.evicted_vram */
atom->ull = entry->fdinfo.amd_evicted_vram;
break;
case 8: /* proc.fdinfo.amd.memory_visible_vram */
atom->ull = entry->fdinfo.amd_memory_visible_vram;
break;
case 9: /* proc.fdinfo.amd.requested_gtt */
atom->ull = entry->fdinfo.amd_requested_gtt;
break;
case 10: /* proc.fdinfo.amd.requested_visible_vram */
atom->ull = entry->fdinfo.amd_requested_visible_vram;
break;
case 11: /* proc.fdinfo.amd.requested_vram */
atom->ull = entry->fdinfo.amd_requested_vram;
break;
default: /* unknown cluster */
return PM_ERR_PMID;
}
break;
default: /* unknown cluster */
return PM_ERR_PMID;
}
Expand Down
18 changes: 18 additions & 0 deletions src/pmdas/linux_proc/proc_dynamic.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ enum {
DYNPROC_GROUP_NAMESPACE,
DYNPROC_GROUP_SMAPS,
DYNPROC_GROUP_AUTOGROUP,
DYNPROC_GROUP_FDINFO,

NUM_DYNPROC_GROUPS
};
Expand Down Expand Up @@ -69,6 +70,7 @@ static int proc_hotproc_cluster_list[][2] = {
{ CLUSTER_PID_EXE, CLUSTER_HOTPROC_PID_EXE },
{ CLUSTER_PID_CWD, CLUSTER_HOTPROC_PID_CWD },
{ CLUSTER_PID_AUTOGROUP, CLUSTER_HOTPROC_PID_AUTOGROUP },
{ CLUSTER_PID_FDINFO, CLUSTER_HOTPROC_PID_FDINFO },
};


Expand Down Expand Up @@ -257,6 +259,21 @@ static dynproc_metric_t smaps_metrics[] = {
{ .name = "pss_dirty", .cluster = CLUSTER_PID_SMAPS, .item=20 },
};

static dynproc_metric_t fdinfo_metrics[] = {
{ .name = "drm_memory_cpu", .cluster = CLUSTER_PID_FDINFO, .item=0 },
{ .name = "drm_memory_gtt", .cluster = CLUSTER_PID_FDINFO, .item=1 },
{ .name = "drm_memory_vram", .cluster = CLUSTER_PID_FDINFO, .item=2 },
{ .name = "drm_shared_cpu", .cluster = CLUSTER_PID_FDINFO, .item=3 },
{ .name = "drm_shared_gtt", .cluster = CLUSTER_PID_FDINFO, .item=4 },
{ .name = "drm_shared_vram", .cluster = CLUSTER_PID_FDINFO, .item=5 },
{ .name = "amd_evicted_visible_vram", .cluster = CLUSTER_PID_FDINFO, .item=6 },
{ .name = "amd_evicted_vram", .cluster = CLUSTER_PID_FDINFO, .item=7 },
{ .name = "amd_memory_visible_vram", .cluster = CLUSTER_PID_FDINFO, .item=8 },
{ .name = "amd_requested_gtt", .cluster = CLUSTER_PID_FDINFO, .item=9 },
{ .name = "amd_requested_visible_vram", .cluster = CLUSTER_PID_FDINFO, .item=10 },
{ .name = "amd_requested_vram", .cluster = CLUSTER_PID_FDINFO, .item=11 },
};

static dynproc_group_t dynproc_groups[] = {
[DYNPROC_GROUP_PSINFO] = { .name = "psinfo", .metrics = psinfo_metrics, .nmetrics = sizeof(psinfo_metrics)/sizeof(dynproc_metric_t)},
[DYNPROC_GROUP_ID] = { .name = "id", .metrics = id_metrics, .nmetrics = sizeof(id_metrics)/sizeof(dynproc_metric_t)},
Expand All @@ -267,6 +284,7 @@ static dynproc_group_t dynproc_groups[] = {
[DYNPROC_GROUP_NAMESPACE] = { .name = "namespaces", .metrics = namespace_metrics, .nmetrics = sizeof(namespace_metrics)/sizeof(dynproc_metric_t) },
[DYNPROC_GROUP_SMAPS] = { .name = "smaps", .metrics = smaps_metrics, .nmetrics = sizeof(smaps_metrics)/sizeof(dynproc_metric_t)},
[DYNPROC_GROUP_AUTOGROUP] = { .name = "autogroup", .metrics = autogroup_metrics, .nmetrics = sizeof(autogroup_metrics)/sizeof(dynproc_metric_t) },
[DYNPROC_GROUP_FDINFO] = { .name = "fdinfo", .metrics = fdinfo_metrics, .nmetrics = sizeof(fdinfo_metrics)/sizeof(dynproc_metric_t) },
};

/*
Expand Down
Loading
Loading