Skip to content

Commit

Permalink
hv: trace: show cpu usage of vms in pcpu sharing case
Browse files Browse the repository at this point in the history
To maximize the cpu utilization, core 0 is usually shared by service
vm and guest vm. But there are no statistics to show the cpu occupation
of each vm.

This patch is to provide cpu usage statistic for users. To calculate
it, a new trace event is added and marked in scheduling context switch,
accompanying with a new python script to analyze the data from acrntrace
output.

Tracked-On: #8621
Signed-off-by: nacui <[email protected]>
Reviewed-by: Junjie Mao <[email protected]>
Reviewed-by: Haiwei Li <[email protected]>
  • Loading branch information
nacui-intel authored and acrnsi-robot committed Jun 28, 2024
1 parent 926f234 commit 512c98f
Show file tree
Hide file tree
Showing 6 changed files with 216 additions and 2 deletions.
6 changes: 6 additions & 0 deletions hypervisor/common/schedule.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include <schedule.h>
#include <sprintf.h>
#include <asm/irq.h>
#include <trace.h>

bool is_idle_thread(const struct thread_object *obj)
{
Expand Down Expand Up @@ -172,6 +173,7 @@ void schedule(void)
struct thread_object *next = &per_cpu(idle, pcpu_id);
struct thread_object *prev = ctl->curr_obj;
uint64_t rflag;
char name[16];

obtain_schedule_lock(pcpu_id, &rflag);
if (ctl->scheduler->pick_next != NULL) {
Expand All @@ -182,6 +184,10 @@ void schedule(void)
/* If we picked different sched object, switch context */
if (prev != next) {
if (prev != NULL) {
memcpy_erms(name, prev->name, 4);
memcpy_erms(name + 4, next->name, 4);
memset(name + 8, 0, sizeof(name) - 8);
TRACE_16STR(TRACE_SCHED_NEXT, name);
if (prev->switch_out != NULL) {
prev->switch_out(prev);
}
Expand Down
2 changes: 1 addition & 1 deletion hypervisor/debug/trace.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ void TRACE_6C(uint32_t evid, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uin
#define TRACE_ENTER TRACE_16STR(TRACE_FUNC_ENTER, __func__)
#define TRACE_EXIT TRACE_16STR(TRACE_FUNC_EXIT, __func__)

static inline void TRACE_16STR(uint32_t evid, const char name[])
void TRACE_16STR(uint32_t evid, const char name[])
{
struct trace_entry entry;
uint16_t cpu_id = get_pcpu_id();
Expand Down
5 changes: 5 additions & 0 deletions hypervisor/include/debug/trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@

#define TRACE_VM_EXIT 0x10U
#define TRACE_VM_ENTER 0X11U

/* event to calculate cpu usage with shared pcpu */
#define TRACE_SCHED_NEXT 0x20U

#define TRACE_VMEXIT_ENTRY 0x10000U

#define TRACE_VMEXIT_EXCEPTION_OR_NMI (TRACE_VMEXIT_ENTRY + 0x00000000U)
Expand All @@ -44,5 +48,6 @@
void TRACE_2L(uint32_t evid, uint64_t e, uint64_t f);
void TRACE_4I(uint32_t evid, uint32_t a, uint32_t b, uint32_t c, uint32_t d);
void TRACE_6C(uint32_t evid, uint8_t a1, uint8_t a2, uint8_t a3, uint8_t a4, uint8_t b1, uint8_t b2);
void TRACE_16STR(uint32_t evid, const char name[]);

#endif /* TRACE_H */
6 changes: 5 additions & 1 deletion misc/debug_tools/acrn_trace/scripts/acrnalyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import os
from vmexit_analyze import analyze_vm_exit
from irq_analyze import analyze_irq
from cpuusage_analyze import analyze_cpu_usage

def usage():
"""print the usage of the script
Expand All @@ -29,6 +30,7 @@ def usage():
-f, --frequency=[unsigned int]: TSC frequency in MHz
--vm_exit: to generate vm_exit report
--irq: to generate irq related report
--cpu_usage: to generate cpu_usage report
''')

def do_analysis(ifile, ofile, analyzer, freq):
Expand Down Expand Up @@ -62,7 +64,7 @@ def main(argv):
# Default TSC frequency of MRB in MHz
freq = 1881.6
opts_short = "hi:o:f:"
opts_long = ["ifile=", "ofile=", "frequency=", "vm_exit", "irq"]
opts_long = ["ifile=", "ofile=", "frequency=", "vm_exit", "irq", "cpu_usage"]
analyzer = []

try:
Expand All @@ -85,6 +87,8 @@ def main(argv):
analyzer.append(analyze_vm_exit)
elif opt == "--irq":
analyzer.append(analyze_irq)
elif opt == "--cpu_usage":
analyzer.append(analyze_cpu_usage)
else:
assert False, "unhandled option"

Expand Down
196 changes: 196 additions & 0 deletions misc/debug_tools/acrn_trace/scripts/cpuusage_analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
#!/usr/bin/python3
# -*- coding: UTF-8 -*-

"""
This script defines the function to do cpu usage analysis
"""
import sys
import string
import struct
import csv
import os

cpu_id = 0
stat_tsc = 0

# map event TRACE_SCHED_NEXT defined in file trace.h
SCHED_NEXT = 0x20

# max number of vm is 16, another 1 is for hv
VM_NUM = 16
time_vm_running = [0] * (VM_NUM + 1)
count_all_trace = 0
count_sched_trace = 0

# Q: 64-bit tsc, Q: 64-bit event, 16c: char name[16]
TRCREC = "QQ16c"

def process_trace_data(ifile):
"""parse the trace data file
Args:
ifile: input trace data file
Return:
None
"""

global stat_tsc, cpu_id, time_vm_running, count_all_trace, count_sched_trace

# The duration of cpu running is tsc_end - tsc_begin
tsc_begin = 0
tsc_end = 0
time_ambiguous = 0

fd = open(ifile, 'rb')
while True:
try:
count_all_trace += 1
line = fd.read(struct.calcsize(TRCREC))
if not line:
break
x = struct.unpack(TRCREC, line)
if x[0] == 0:
break
tsc_end = x[0]
if count_all_trace == 1:
tsc_begin = tsc_end
tsc_last_sched = tsc_begin
event = x[1]
cpu_id = event >> 56
event = event & 0xffffffffffff
if event == SCHED_NEXT:
count_sched_trace += 1
s=''
for _ in list(x[2:]):
d = _.decode('ascii', errors='ignore')
s += d

if s[:2] == "vm":
vm_prev = int(s[2])
if s[3] != ":":
vm_prev = vm_prev*10 + int(s[3])
elif s[:4] == "idle":
vm_prev = VM_NUM
else:
print("Error: trace data is not correct!")
return

if s[4:6] == "vm":
vm_next = int(s[6])
if s[7] != ":":
vm_next = vm_next*10 + int(s[7])
elif s[4:8] == "idle":
vm_next = VM_NUM
else:
print("Error: trace data is not correct!")
return

if (count_sched_trace == 1) or (vm_prev == vm_prev_last):
time_vm_running[vm_prev] += tsc_end - tsc_last_sched
else:
print("line %d: last_next =vm%d, current_prev=vm%d" % (count_all_trace, vm_prev, vm_prev_last))
print("Warning: last schedule next is not the current task. Trace log is lost.")
time_ambiguous += tsc_end - tsc_last_sched

tsc_last_sched = tsc_end
vm_prev_last = vm_next

except (IOError, struct.error) as e:
sys.exit()

print ("Start trace %d tsc cycle" % (tsc_begin))
print ("End trace %d tsc cycle" % (tsc_end))
stat_tsc = tsc_end - tsc_begin
assert stat_tsc != 0, "total_run_time in statistic is 0,\
tsc_end %d, tsc_begin %d"\
% (tsc_end, tsc_begin)

if count_sched_trace == 0:
print ("There is no context switch in HV scheduling during this period. "
"This CPU may be exclusively owned by one vm.\n"
"The CPU usage is 100%")
return
if time_ambiguous > 0:
print("Warning: ambiguous running time: %d tsc cycle, occupying %2.2f%% cpu."
% (time_ambiguous, float(time_ambiguous)*100/stat_tsc))

# the last time
time_vm_running[vm_next] += tsc_end - tsc_last_sched


def generate_report(ofile, freq):
""" generate analysis report
Args:
ofile: output report
freq: TSC frequency of the device trace data from
Return:
None
"""
global stat_tsc, cpu_id, time_vm_running, count_all_trace, count_sched_trace

if (count_sched_trace == 0):
return

csv_name = ofile + '.csv'
try:
with open(csv_name, 'a') as filep:
f_csv = csv.writer(filep)

stat_sec = float(stat_tsc) / (float(freq) * 1000 * 1000)
print ("Total run time: %d cpu cycles" % (stat_tsc))
print ("TSC Freq: %s MHz" % (freq))
print ("Total run time: %.2f sec" % (stat_sec))
print ("Total trace items: %d" % (count_all_trace))
print ("Total scheduling trace: %d" % (count_sched_trace))

f_csv.writerow(['Total run time(tsc cycles)', 'Total run time(sec)', 'Freq(MHz)'])
f_csv.writerow(['%d' % (stat_tsc),
'%.2f' % (stat_sec),
'%s' % (freq)])

print ("%-28s\t%-12s\t%-12s\t%-24s\t%-16s" % ("PCPU ID", "VM ID",
"VM Running/sec", "VM Running(tsc cycles)", "CPU Usage"))
f_csv.writerow(['PCPU ID',
'VM_ID',
'Time Consumed/sec',
'Time Consumed(tsc cycles)',
'CPU Usage%'])

for vmid, tsc in enumerate(time_vm_running):
run_tsc = tsc
run_per = float(run_tsc) * 100 / float(stat_tsc)
run_sec = float(run_tsc) / (float(freq) * 1000 * 1000)
if vmid != VM_NUM:
print ("%-28d\t%-12d\t%-10.2f\t%-24d\t%-2.2f%%" %
(cpu_id, vmid, run_sec, run_tsc, run_per))
row = [cpu_id, vmid, '%.2f' % (run_sec), run_tsc, '%2.2f' % (run_per)]
else:
print ("%-28d\t%-12s\t%-10.2f\t%-24d\t%-2.2f%%" %
(cpu_id, 'Idle', run_sec, run_tsc, run_per))
row = [cpu_id, 'Idle', '%.2f' % (run_sec), run_tsc, '%2.2f' % (run_per)]
f_csv.writerow(row)

except IOError as err:
print ("Output File Error: " + str(err))

def analyze_cpu_usage(ifile, ofile, freq):
"""do cpu usage analysis of each vm
Args:
ifile: input trace data file
ofile: output report file
freq: TSC frequency of the host where we capture the trace data
Return:
None
"""

print("VM CPU usage analysis started... \n\tinput file: %s\n"
"\toutput file: %s.csv" % (ifile, ofile))

if os.stat(ifile).st_size == 0:
print("The input trace file is empty. The corresponding CPU may be offline.")
return
if float(freq) == 0.0:
print("The input cpu frequency cannot be zero!")
return
process_trace_data(ifile)
# print cpu usage of each vm
generate_report(ofile, freq)
3 changes: 3 additions & 0 deletions misc/debug_tools/acrn_trace/scripts/formats
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,6 @@
# For TRACE_4I
0x0001001E CPU%(cpu)d 0x%(event)016x %(tsc)d IO instruction [port = %(1)d, direction = %(2)d, sz = %(3)d, cur_context_idx = %(4)d]
0x00010000 CPU%(cpu)d 0x%(event)016x %(tsc)d exception or nmi [vector = 0x%(1)08x, err = %(2)d, d3 = %(1)d, d4 = %(2)d]

# For TRACE_16STR
0x00000005 CPU%(cpu)d 0x%(event)016x %(tsc)d sched next [data = %(1)c%(2)c%(3)c%(4)c%(5)c%(6)c%(7)c%(8)c%(9)c%(10)c%(11)c%(12)c%(13)c%(14)c%(15)c]

0 comments on commit 512c98f

Please sign in to comment.