22
33module (... ,package.seeall )
44
5- local packet = require (" core.packet" )
6- local lib = require (" core.lib" )
7- local link = require (" core.link" )
8- local config = require (" core.config" )
9- local timer = require (" core.timer" )
10- local shm = require (" core.shm" )
11- local histogram = require (' core.histogram' )
12- local counter = require (" core.counter" )
13- local jit = require (" jit" )
14- local S = require (" syscall" )
15- local ffi = require (" ffi" )
16- local C = ffi .C
17- local S = require (" syscall" )
5+ local packet = require (" core.packet" )
6+ local lib = require (" core.lib" )
7+ local link = require (" core.link" )
8+ local config = require (" core.config" )
9+ local timer = require (" core.timer" )
10+ local shm = require (" core.shm" )
11+ local histogram = require (' core.histogram' )
12+ local counter = require (" core.counter" )
13+ local timeline_mod = require (" core.timeline" ) -- avoid collision with timeline
14+ local jit = require (" jit" )
15+ local S = require (" syscall" )
16+ local ffi = require (" ffi" )
17+ local C = ffi .C
18+
1819require (" core.packet_h" )
1920
2021-- Packet per pull
@@ -33,14 +34,48 @@ local named_program_root = shm.root .. "/" .. "by-name"
3334program_name = false
3435
3536-- Auditlog state
36- auditlog_enabled = false
37+ local auditlog_enabled = false
3738function enable_auditlog ()
3839 jit .auditlog (shm .path (" audit.log" ))
3940 auditlog_enabled = true
4041end
4142
43+ -- Timeline event log
44+ local timeline_log , events -- initialized on demand
45+ function timeline ()
46+ if timeline_log == nil then
47+ timeline_log = timeline_mod .new (" engine/timeline" )
48+ timeline_mod .rate (timeline_log , 9 ) -- initially log events with rate >= 9
49+ events = timeline_mod .load_events (timeline_log , " core.engine" )
50+ end
51+ return timeline_log
52+ end
53+
54+ function randomize_log_rate ()
55+ -- Randomize the log rate. Enable each rate in 5x more breaths
56+ -- than the rate below by randomly picking from log5() distribution.
57+ -- Goal is ballpark 1000 messages per second (~15min for 1M entries.)
58+ --
59+ -- Could be better to reduce the log rate over time to "stretch"
60+ -- logs for long running processes? Improvements possible :-).
61+ --
62+ -- We use rates 0-9 where 9 means "log always", and 0 means "log never."
63+ local rate = math.max (1 , math.ceil (math.log (math.random (5 ^ 9 ))/ math.log (5 )))
64+ timeline_mod .rate (timeline_log , rate )
65+ end
66+
67+ -- Breath latency histogram
68+ local latency -- initialized on demand
69+ function enable_latency_histogram ()
70+ if latency == nil then
71+ latency = histogram .create (' engine/latency.histogram' , 1e-6 , 1e0 )
72+ end
73+ end
74+
4275-- The set of all active apps and links in the system, indexed by name.
4376app_table , link_table = {}, {}
77+ -- Timeline events specific to app instances
78+ app_events = setmetatable ({}, { __mode = ' k' })
4479
4580configuration = config .new ()
4681
@@ -311,6 +346,7 @@ function compute_config_actions (old, new)
311346 end
312347 end
313348
349+ events .config_actions_computed ()
314350 return actions
315351end
316352
@@ -336,42 +372,44 @@ function apply_config_actions (actions)
336372 local link = app .output [linkname ]
337373 app .output [linkname ] = nil
338374 remove_link_from_array (app .output , link )
339- if app .link then app :link () end
375+ if app .link then app :link () app_events [ app ]. linked () end
340376 end
341377 function ops .unlink_input (appname , linkname )
342378 local app = app_table [appname ]
343379 local link = app .input [linkname ]
344380 app .input [linkname ] = nil
345381 remove_link_from_array (app .input , link )
346- if app .link then app :link () end
382+ if app .link then app :link () app_events [ app ]. linked () end
347383 end
348384 function ops .free_link (linkspec )
349385 link .free (link_table [linkspec ], linkspec )
350386 link_table [linkspec ] = nil
351387 configuration .links [linkspec ] = nil
352388 end
353389 function ops .new_link (linkspec )
354- link_table [linkspec ] = link .new (linkspec )
390+ local link = link .new (linkspec )
391+ link_table [linkspec ] = link
355392 configuration .links [linkspec ] = true
356393 end
357394 function ops .link_output (appname , linkname , linkspec )
358395 local app = app_table [appname ]
359396 local link = assert (link_table [linkspec ])
360397 app .output [linkname ] = link
361398 table.insert (app .output , link )
362- if app .link then app :link () end
399+ if app .link then app :link () app_events [ app ]. linked () end
363400 end
364401 function ops .link_input (appname , linkname , linkspec )
365402 local app = app_table [appname ]
366403 local link = assert (link_table [linkspec ])
367404 app .input [linkname ] = link
368405 table.insert (app .input , link )
369- if app .link then app :link () end
406+ if app .link then app :link () app_events [ app ]. linked () end
370407 end
371408 function ops .stop_app (name )
372409 local app = app_table [name ]
373- if app .stop then app :stop () end
410+ if app .stop then app :stop () app_events [ app ]. stopped () end
374411 if app .shm then shm .delete_frame (app .shm ) end
412+ app_events [app ] = nil
375413 app_table [name ] = nil
376414 configuration .apps [name ] = nil
377415 end
@@ -382,6 +420,8 @@ function apply_config_actions (actions)
382420 name , tostring (app )))
383421 end
384422 local zone = app .zone or (type (class .name ) == ' string' and class .name ) or getfenv (class .new )._NAME or name
423+ app_events [app ] =
424+ timeline_mod .load_events (timeline (), " core.app" , {app = name })
385425 app .appname = name
386426 app .output = {}
387427 app .input = {}
@@ -392,21 +432,25 @@ function apply_config_actions (actions)
392432 app .shm = shm .create_frame (" apps/" .. name , app .shm )
393433 end
394434 configuration .apps [name ] = { class = class , arg = arg }
435+ app_events [app ].started ()
395436 end
396437 function ops .reconfig_app (name , class , arg )
397438 local app = app_table [name ]
398- app :reconfig (arg )
439+ app :reconfig (arg ) app_events [ app ]. reconfigured ()
399440 configuration .apps [name ].arg = arg
400441 end
401442
443+ events .configure (counter .read (configs ) + 1 )
402444 -- Dispatch actions.
403445 for _ , action in ipairs (actions ) do
404446 local name , args = unpack (action )
405447 if log then io.write (" engine: " , name , " " , args [1 ], " \n " ) end
406448 assert (ops [name ], name )(unpack (args ))
407449 end
450+ events .config_applied ()
408451
409452 compute_breathe_order ()
453+ events .breathe_order_computed ()
410454end
411455
412456-- Sort the NODES topologically according to SUCCESSORS via
@@ -510,23 +554,30 @@ function main (options)
510554 enable_auditlog ()
511555 end
512556
513- -- Setup vmprofile
514- setvmprofile ( " engine " )
557+ -- Ensure timeline is created and initialized
558+ timeline ( )
515559
560+ -- Enable latency histogram unless explicitly disabled
516561 local breathe = breathe
517562 if options .measure_latency or options .measure_latency == nil then
518- local latency = histogram . create ( ' engine/latency.histogram ' , 1e-6 , 1e0 )
563+ enable_latency_histogram ( )
519564 breathe = latency :wrap_thunk (breathe , now )
520565 end
521566
567+ -- Setup vmprofile
568+ setvmprofile (" engine" )
569+
570+ events .engine_started ()
522571 monotonic_now = C .get_monotonic_time ()
523572 repeat
524573 breathe ()
525- if not no_timers then timer .run () end
574+ if not no_timers then timer .run () events . polled_timers () end
526575 if not busywait then pace_breathing () end
576+ randomize_log_rate () -- roll random log rate
527577 until done and done ()
528578 counter .commit ()
529579 if not options .no_report then report (options .report ) end
580+ events .engine_stopped ()
530581
531582 -- Switch to catch-all profile
532583 setvmprofile (" program" )
@@ -542,14 +593,18 @@ function pace_breathing ()
542593 nextbreath = nextbreath or monotonic_now
543594 local sleep = tonumber (nextbreath - monotonic_now )
544595 if sleep > 1e-6 then
596+ events .sleep_Hz (Hz , math .round (sleep * 1e6 ))
545597 C .usleep (sleep * 1e6 )
546598 monotonic_now = C .get_monotonic_time ()
599+ events .wakeup_from_sleep ()
547600 end
548601 nextbreath = math.max (nextbreath + 1 / Hz , monotonic_now )
549602 else
550603 if lastfrees == counter .read (frees ) then
551604 sleep = math.min (sleep + 1 , maxsleep )
605+ events .sleep_on_idle (sleep )
552606 C .usleep (sleep )
607+ events .wakeup_from_sleep ()
553608 else
554609 sleep = math.floor (sleep / 2 )
555610 end
@@ -560,33 +615,73 @@ function pace_breathing ()
560615end
561616
562617function breathe ()
618+ local freed_packets0 = counter .read (frees )
619+ local freed_bytes0 = counter .read (freebytes )
620+ events .breath_start (counter .read (breaths ), freed_packets0 , freed_bytes0 ,
621+ counter .read (freebits ))
563622 running = true
564623 monotonic_now = C .get_monotonic_time ()
624+ events .got_monotonic_time (C .get_time_ns ())
565625 -- Restart: restart dead apps
566626 restart_dead_apps ()
567627 -- Inhale: pull work into the app network
568628 for i = 1 , # breathe_pull_order do
569629 local app = breathe_pull_order [i ]
570630 if app .pull and not app .dead then
571- with_restart (app , app .pull )
631+ if timeline_mod .rate (timeline_log ) <= 3 then
632+ app_events [app ].pull (linkstats (app ))
633+ with_restart (app , app .pull )
634+ app_events [app ].pulled (linkstats (app ))
635+ else
636+ with_restart (app , app .pull )
637+ end
572638 end
573639 end
640+ events .breath_pulled ()
574641 -- Exhale: push work out through the app network
575642 for i = 1 , # breathe_push_order do
576643 local app = breathe_push_order [i ]
577644 if app .push and not app .dead then
578- with_restart (app , app .push )
645+ if timeline_mod .rate (timeline_log ) <= 3 then
646+ app_events [app ].push (linkstats (app ))
647+ with_restart (app , app .push )
648+ app_events [app ].pushed (linkstats (app ))
649+ else
650+ with_restart (app , app .push )
651+ end
579652 end
580653 end
654+ events .breath_pushed ()
655+ local freed
656+ local freed_packets = counter .read (frees ) - freed_packets0
657+ local freed_bytes = (counter .read (freebytes ) - freed_bytes0 )
658+ local freed_bytes_per_packet = freed_bytes / math.max (tonumber (freed_packets ), 1 )
659+ events .breath_end (counter .read (breaths ), freed_packets , freed_bytes_per_packet )
581660 counter .add (breaths )
582661 -- Commit counters and rebalance freelists at a reasonable frequency
583662 if counter .read (breaths ) % 100 == 0 then
584663 counter .commit ()
664+ events .commited_counters ()
585665 packet .rebalance_freelists ()
586666 end
587667 running = false
588668end
589669
670+ function linkstats (app )
671+ local inp , inb , outp , outb , dropp , dropb = 0 , 0 , 0 , 0 , 0 , 0
672+ for i = 1 , # app .input do
673+ inp = inp + tonumber (counter .read (app .input [i ].stats .rxpackets ))
674+ inb = inb + tonumber (counter .read (app .input [i ].stats .rxbytes ))
675+ end
676+ for i = 1 , # app .output do
677+ outp = outp + tonumber (counter .read (app .output [i ].stats .txpackets ))
678+ outb = outb + tonumber (counter .read (app .output [i ].stats .txbytes ))
679+ dropp = dropp + tonumber (counter .read (app .output [i ].stats .txdrop ))
680+ dropb = dropb + tonumber (counter .read (app .output [i ].stats .txdropbytes ))
681+ end
682+ return inp , inb , outp , outb , dropp , dropb
683+ end
684+
590685function report (options )
591686 if not options or options .showload then
592687 report_load ()
0 commit comments