Skip to content

Commit

Permalink
Merge branches 'log-throttle' and 'vmw_balloon'
Browse files Browse the repository at this point in the history
  • Loading branch information
natoscott committed May 6, 2024
3 parents 12c9c83 + 57495c3 + e75276f commit a856de2
Show file tree
Hide file tree
Showing 16 changed files with 280 additions and 42 deletions.
64 changes: 64 additions & 0 deletions qa/1595
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/bin/sh
# PCP QA Test No. 1595
# Exercise Linux kernel virtual machine memory metrics.
#
# Copyright (c) 2024 Red Hat. All Rights Reserved.
#

seq=`basename $0`
echo "QA output created by $seq"

# get standard environment, filters and checks
. ./common.product
. ./common.filter
. ./common.check

_check_valgrind
[ $PCP_PLATFORM = linux ] || _notrun "Tests Linux kernel memory metrics"

_cleanup()
{
cd $here
$sudo rm -rf $tmp $tmp.*
}

status=1 # failure is the default!
$sudo rm -rf $tmp $tmp.* $seq.full
trap "_cleanup; exit \$status" 0 1 2 3 15

_filter()
{
sed \
-e "s,$PCP_PMDAS_DIR,PCP_PMDAS_DIR,g" \
#end
}

# real QA test starts here
root=$tmp.root
export LINUX_STATSPATH=$root
pmda="60,$PCP_PMDAS_DIR/linux/pmda_linux.so,linux_init"
memmetrics=`pminfo mem.vmmemctl | LC_COLLATE=POSIX sort`

for mem in $here/linux/sysfs-vmmemctl-*
do
rm -fr $root
mkdir -p $root/sys/kernel/debug || _fail "root in use when processing $mem"
cp $mem $root/sys/kernel/debug/vmmemctl
cd $root
base=`basename $mem`

echo "== Valgrind checking vmmemctl metric metadata - $base"
_run_valgrind pminfo -L -K clear -K add,$pmda -dmtT $memmetrics 2>&1 \
| _filter

echo "== Valgrind checking vmmemctl metric values - $base"
_run_valgrind pminfo -L -K clear -K add,$pmda -fm $memmetrics 2>&1 \
| _filter

echo && echo "== done" && echo
cd $here
done

# success, all done
status=0
exit
42 changes: 42 additions & 0 deletions qa/1595.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
QA output created by 1595
== Valgrind checking vmmemctl metric metadata - sysfs-vmmemctl-001
=== std out ===

mem.vmmemctl.current PMID: 60.35.4 [Virtual machine balloon memory actually allocated]
Data Type: 64-bit unsigned int InDom: PM_INDOM_NULL 0xffffffff
Semantics: instant Units: byte
Help:
Virtual machine balloon memory actually allocated

mem.vmmemctl.target PMID: 60.35.5 [Virtual machine balloon memory allocation target size]
Data Type: 64-bit unsigned int InDom: PM_INDOM_NULL 0xffffffff
Semantics: instant Units: byte
Help:
Virtual machine balloon memory allocation target size
=== std err ===
=== filtered valgrind report ===
Memcheck, a memory error detector
Command: pminfo -L -K clear -K add,60,PCP_PMDAS_DIR/linux/pmda_linux.so,linux_init -dmtT mem.vmmemctl.current mem.vmmemctl.target
LEAK SUMMARY:
definitely lost: 0 bytes in 0 blocks
indirectly lost: 0 bytes in 0 blocks
ERROR SUMMARY: 0 errors from 0 contexts ...
== Valgrind checking vmmemctl metric values - sysfs-vmmemctl-001
=== std out ===

mem.vmmemctl.current PMID: 60.35.4
value 4194304000

mem.vmmemctl.target PMID: 60.35.5
value 4193390592
=== std err ===
=== filtered valgrind report ===
Memcheck, a memory error detector
Command: pminfo -L -K clear -K add,60,PCP_PMDAS_DIR/linux/pmda_linux.so,linux_init -fm mem.vmmemctl.current mem.vmmemctl.target
LEAK SUMMARY:
definitely lost: 0 bytes in 0 blocks
indirectly lost: 0 bytes in 0 blocks
ERROR SUMMARY: 0 errors from 0 contexts ...

== done

5 changes: 5 additions & 0 deletions qa/193
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ _stop_auto_restart pmcd

_cleanup()
{
pmstore pmcd.control.debug 0 >>$seq.full 2>&1 # pdu==1
_restore_auto_restart pmcd
}

Expand All @@ -32,6 +33,7 @@ _wait_for_pmcd
_wait_for_pmlogger

# real QA test starts here
pmstore pmcd.control.debug 1 >>$seq.full 2>&1 # pdu==1
src/crashpmcd

# give pmcd a chance to deal with PDUs from crashpmcd
Expand All @@ -43,6 +45,9 @@ _filter_pmcd_log <$PCP_PMCDLOG_PATH \
| sed \
-e '1,/ok FD /d' \
-e '/ok FD /d' \
-e '/pmXmitPDU: ERROR/d' \
-e '/pmGetPDU: TYPE/d' \
-e '/^000:/d' \
-e 's/fd=[0-9][0-9]*/fd=N/' \
-e '/HandleClientInput/s/client\[[0-9][0-9]*]/client[N]/' \
-e 's/len=-1: Connection reset by peer.*/END-OF-FILE/' \
Expand Down
1 change: 1 addition & 0 deletions qa/group
Original file line number Diff line number Diff line change
Expand Up @@ -2003,6 +2003,7 @@ x11
1589 pcp2json pcp2xxx python local
1591 archive archive_v3 local
1592 archive archive_v3 local valgrind
1595 pmda.linux local
1598 pmda.statsd local
1599 pmda.statsd local
1600 pmseries pmcd pmproxy pmlogger local
Expand Down
9 changes: 5 additions & 4 deletions qa/linux/GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,23 @@ include $(TOPDIR)/src/include/builddefs

TESTDIR = $(PCP_VAR_DIR)/testsuite/linux
ROOTFILES = $(shell echo *-root-*.tgz *-root.tgz)
SYSFSFILES = $(shell echo sysfs-*-*.tgz)
SYSFSFILES = $(shell echo sysfs-*-*.tgz sysfs-*-???)
ZFSFILES = $(shell echo zfs-stats.*.tgz)
CPUINFOFILES = $(shell echo cpuinfo-*)
PROCIRQFILES = $(shell echo interrupts-* softirqs-*)
PROCNETFILES = $(shell echo procnet-* proc_net_*)
PROCSERIALFILES = $(shell echo proc_serial_*)
PROCSYSFILES = $(shell echo procsys-*)
BIGPIDFILE = $(shell echo proc-big-pid-001.tgz)
PROCPIDFILE = $(shell echo proc-*-pid-???.tgz)

default setup default_pcp:

install install_pcp: $(ROOTFILES)
$(INSTALL) -m 755 -d $(TESTDIR)
$(INSTALL) -m 644 -f $(ROOTFILES) $(SYSFSFILES) $(ZFSFILES) $(TESTDIR)
$(INSTALL) -m 644 -f $(CPUINFOFILES) $(BIGPIDFILE) $(PROCSERIALFILES) $(TESTDIR)
$(INSTALL) -m 644 -f $(PROCIRQFILES) $(PROCNETFILES) $(PROCSYSFILES) $(TESTDIR)
$(INSTALL) -m 644 -f $(PROCPIDFILE) $(PROCSERIALFILES) $(TESTDIR)
$(INSTALL) -m 644 -f $(PROCIRQFILES) $(CPUINFOFILES) $(TESTDIR)
$(INSTALL) -m 644 -f $(PROCNETFILES) $(PROCSYSFILES) $(TESTDIR)
$(INSTALL) -m 644 -f GNUmakefile.install $(TESTDIR)/GNUmakefile

include $(BUILDRULES)
Expand Down
30 changes: 30 additions & 0 deletions qa/linux/sysfs-vmmemctl-001
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
balloon capabilities : 0x3e
used capabilities : 0x1e
is resetting : n
target : 1023777
current : 1024000
start : 0 (0 failed)
target : 768 (0 failed)
lock : 0 (0 failed)
unlock : 0 (0 failed)
guestType : 0 (0 failed)
batchLock : 0 (0 failed)
batchUnlock : 0 (0 failed)
2m-lock : 15 (0 failed)
2m-unlock : 2 (0 failed)
doorbellSet : 0 (0 failed)
timer : 768
doorbell : 14
reset : 0
shrink : 0
shrinkFree : 0
alloc (4k): 0
alloc (2M): 2036
allocFail (4k): 0
allocFail (2M): 0
errAlloc (4k): 0
errAlloc (2M): 0
errFree (4k): 0
errFree (2M): 0
free (4k): 0
free (2M): 36
23 changes: 23 additions & 0 deletions qa/src/test_pcp_sockets.python
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import socket
import cpmapi as api
from pcp import pmapi

address = 'localhost'
port = 44321

c = []
for i in range(0, 1234):
print('context', i)
ctx = pmapi.pmContext(api.PM_CONTEXT_HOST, "local:")
print('created', i)
c.append(ctx)

s = []
for i in range(0, 1234):
sock = socket.socket()
print('socket', i)
sock.connect((address, port))
print('connect', i)
sock.send(b"abba\r") # -- gives a too-large PDU
print('send', i)
# s.append(sock) # -- exercise pduread: timeout
58 changes: 33 additions & 25 deletions src/libpcp/src/pdu.c
Original file line number Diff line number Diff line change
Expand Up @@ -244,10 +244,7 @@ pduread(int fd, char *buf, int len, int part, int timeout)
* Need all parts of the PDU to be received by dead_hand
* This enforces a low overall timeout for the whole PDU
* (as opposed to just a timeout for individual calls to
* recv). A more invasive alternative (better) approach
* would see all I/O performed in the main event loop,
* and I/O routines transformed to continuation-passing
* style.
* recv).
*/
gettimeofday(&dead_hand, NULL);
dead_hand.tv_sec += wait.tv_sec;
Expand Down Expand Up @@ -558,9 +555,10 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
if (len == -1) {
if (! __pmSocketClosed()) {
char errmsg[PM_MAXERRMSGLEN];
pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
"__pmGetPDU", fd, len,
pmErrStr_r(-oserror(), errmsg, sizeof(errmsg)));
if (pmDebugOptions.pdu)
pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
"__pmGetPDU", fd, len,
pmErrStr_r(-oserror(), errmsg, sizeof(errmsg)));
}
}
else if (len >= (int)sizeof(php->len)) {
Expand All @@ -579,15 +577,17 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
}
else if (len < 0) {
char errmsg[PM_MAXERRMSGLEN];
pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
"__pmGetPDU", fd, len,
pmErrStr_r(len, errmsg, sizeof(errmsg)));
if (pmDebugOptions.pdu)
pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: len=%d: %s",
"__pmGetPDU", fd, len,
pmErrStr_r(len, errmsg, sizeof(errmsg)));
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_IPC;
}
else if (len > 0) {
pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: bad len=%d",
"__pmGetPDU", fd, len);
if (pmDebugOptions.pdu)
pmNotifyErr(LOG_ERR, "%s: fd=%d hdr read: bad len=%d",
"__pmGetPDU", fd, len);
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_IPC;
}
Expand All @@ -606,8 +606,9 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
* PDU length indicates insufficient bytes for a PDU header
* ... looks like DOS attack like PV 935490
*/
pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU len=%d in hdr",
"__pmGetPDU", fd, php->len);
if (pmDebugOptions.pdu)
pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU len=%d in hdr",
"__pmGetPDU", fd, php->len);
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_IPC;
}
Expand All @@ -618,16 +619,18 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
* (note, pmcd and pmdas have to be able to _send_ large PDUs,
* e.g. for a pmResult or instance domain enquiry)
*/
if (len < (int)(sizeof(php->len) + sizeof(php->type)))
/* PDU too short to provide a valid type */
pmNotifyErr(LOG_ERR, "%s: fd=%d bad PDU len=%d in hdr "
"exceeds maximum client PDU size (%d)",
"__pmGetPDU", fd, php->len, ceiling);
else
pmNotifyErr(LOG_ERR, "%s: fd=%d type=0x%x bad PDU len=%d in hdr "
"exceeds maximum client PDU size (%d)",
"__pmGetPDU", fd, (unsigned)ntohl(php->type),
php->len, ceiling);
if (pmDebugOptions.pdu) {
if (len < (int)(sizeof(php->len) + sizeof(php->type)))
/* PDU too short to provide a valid type */
pmNotifyErr(LOG_ERR, "%s: fd=%d bad PDU len=%d in hdr"
" exceeds maximum client PDU size (%d)",
"__pmGetPDU", fd, php->len, ceiling);
else
pmNotifyErr(LOG_ERR, "%s: fd=%d type=0x%x bad PDU len=%d in hdr"
" exceeds maximum client PDU size (%d)",
"__pmGetPDU", fd, (unsigned)ntohl(php->type),
php->len, ceiling);
}
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_TOOBIG;
}
Expand Down Expand Up @@ -667,6 +670,10 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_TIMEOUT;
}
else if (!pmDebugOptions.pdu) {
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_IPC;
}
else if (len < 0) {
char errmsg[PM_MAXERRMSGLEN];
pmNotifyErr(LOG_ERR, "%s: fd=%d data read: len=%d: %s",
Expand Down Expand Up @@ -700,7 +707,8 @@ PM_FAULT_RETURN(PM_ERR_TIMEOUT);
* PDU type is bad ... could be a possible mem leak attack like
* https://bugzilla.redhat.com/show_bug.cgi?id=841319
*/
pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU type=%d in hdr",
if (pmDebugOptions.pdu)
pmNotifyErr(LOG_ERR, "%s: fd=%d illegal PDU type=%d in hdr",
"__pmGetPDU", fd, php->type);
__pmUnpinPDUBuf(pdubuf);
return PM_ERR_IPC;
Expand Down
32 changes: 21 additions & 11 deletions src/pmcd/src/client.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ NotifyEndContext(int ctx)
ClientInfo *
AcceptNewClient(int reqfd)
{
static unsigned int seq = 0;
static unsigned int seq, saved, count;
static struct timeval then;
int i, fd;
__pmSockLen addrlen;
struct timeval now;
Expand All @@ -83,21 +84,30 @@ AcceptNewClient(int reqfd)
addrlen = __pmSockAddrSize();
fd = __pmAccept(reqfd, client[i].addr, &addrlen);
if (fd == -1) {
if (neterror() == EPERM) {
pmNotifyErr(LOG_NOTICE, "AcceptNewClient(%d): "
"Permission Denied\n", reqfd);
}
else if (neterror() == ECONNABORTED) {
if (neterror() == ECONNABORTED) {
/* quietly ignore this one ... */
;
}
else {
/*
* unexpected ... ignore the client (we used to kill off pmcd
* but that seems way too extreme)
/* Permission denied or an unexpected error (e.g. EMFILE)
* - rate limit the logging and make this client go away.
*/
pmNotifyErr(LOG_ERR, "AcceptNewClient(%d): Unexpected error from __pmAccept: %d: %s\n",
reqfd, neterror(), netstrerror());
pmtimevalNow(&now);
if (neterror() != saved || now.tv_sec > then.tv_sec + 60) {
if (neterror() == EPERM)
pmNotifyErr(LOG_NOTICE, "AcceptNewClient(%d): "
"Permission Denied (%d suppressed)\n",
reqfd, count);
else
pmNotifyErr(LOG_ERR, "AcceptNewClient(%d): "
"Accept error (%d suppressed): %d: %s\n",
reqfd, count, neterror(), netstrerror());
saved = neterror();
count = 0;
} else {
count++;
}
then = now;
}
client[i].fd = -1;
DeleteClient(&client[i]);
Expand Down
2 changes: 1 addition & 1 deletion src/pmcd/src/pmcd.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ HandleReadyAgents(__pmFdSet *readyFds)
}

static void
CheckNewClient(__pmFdSet * fdset, int rfd, int family)
CheckNewClient(__pmFdSet *fdset, int rfd, int family)
{
int s, sts, accepted = 1;
__uint32_t challenge;
Expand Down
Loading

0 comments on commit a856de2

Please sign in to comment.