Skip to content

Commit 2c6a9dd

Browse files
committed
periodic sampling of metrics to avoid flood, fixup examples
1 parent 3c83c09 commit 2c6a9dd

File tree

6 files changed

+128
-84
lines changed

6 files changed

+128
-84
lines changed

Cargo.lock

+3-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

thread-manager/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -26,5 +26,6 @@ affinity = "0.1.2"
2626

2727
[dev-dependencies]
2828
axum = "0.7.9"
29+
env_logger = { workspace = true }
2930
serde_json = { workspace = true }
3031
toml = { workspace = true }

thread-manager/examples/core_contention_basics.rs

+21-15
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
1-
use std::{
2-
future::IntoFuture,
3-
io::{Read, Write},
4-
net::{IpAddr, Ipv4Addr, SocketAddr},
5-
path::PathBuf,
6-
time::Duration,
1+
use {
2+
agave_thread_manager::*,
3+
log::{debug, info},
4+
std::{
5+
future::IntoFuture,
6+
io::{Read, Write},
7+
net::{IpAddr, Ipv4Addr, SocketAddr},
8+
path::PathBuf,
9+
time::Duration,
10+
},
711
};
812

913
async fn axum_main(port: u16) {
@@ -31,35 +35,36 @@ async fn axum_main(port: u16) {
3135
match timeout {
3236
Ok(v) => v.unwrap(),
3337
Err(_) => {
34-
println!("Terminating server on port {port}");
38+
info!("Terminating server on port {port}");
3539
}
3640
}
3741
}
38-
use agave_thread_manager::*;
3942

4043
fn main() -> anyhow::Result<()> {
44+
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
4145
let experiments = [
42-
"examples/core_contention_dedicated_set.json",
43-
"examples/core_contention_contending_set.json",
46+
"examples/core_contention_dedicated_set.toml",
47+
"examples/core_contention_contending_set.toml",
4448
];
4549

4650
for exp in experiments {
47-
println!("===================");
48-
println!("Running {exp}");
51+
info!("===================");
52+
info!("Running {exp}");
4953
let mut conffile = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
5054
conffile.push(exp);
5155
let mut buf = String::new();
5256
std::fs::File::open(conffile)?.read_to_string(&mut buf)?;
5357
let cfg: RuntimeManagerConfig = toml::from_str(&buf)?;
54-
//println!("Loaded config {}", serde_json::to_string_pretty(&cfg)?);
5558

5659
let rtm = ThreadManager::new(cfg).unwrap();
5760
let tok1 = rtm
5861
.get_tokio("axum1")
5962
.expect("Expecting runtime named axum1");
63+
tok1.start_metrics_sampling(Duration::from_secs(1));
6064
let tok2 = rtm
6165
.get_tokio("axum2")
6266
.expect("Expecting runtime named axum2");
67+
tok2.start_metrics_sampling(Duration::from_secs(1));
6368

6469
let wrk_cores: Vec<_> = (32..64).collect();
6570
let results = std::thread::scope(|s| {
@@ -72,6 +77,7 @@ fn main() -> anyhow::Result<()> {
7277
let jh = s.spawn(|| run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap());
7378
jh.join().expect("WRK crashed!")
7479
});
80+
//print out the results of the bench run
7581
println!("Results are: {:?}", results);
7682
}
7783
Ok(())
@@ -112,7 +118,7 @@ fn run_wrk(
112118
let mut all_latencies = vec![];
113119
let mut all_rps = vec![];
114120
for (out, port) in outs.zip(ports.iter()) {
115-
println!("=========================");
121+
debug!("=========================");
116122
std::io::stdout().write_all(&out.stderr)?;
117123
let res = str::from_utf8(&out.stdout)?;
118124
let mut res = res.lines().last().unwrap().split(' ');
@@ -122,7 +128,7 @@ fn run_wrk(
122128

123129
let requests: usize = res.next().unwrap().parse()?;
124130
let rps = requests as f32 / 10.0;
125-
println!("WRK results for port {port}: {latency:?} {rps}");
131+
debug!("WRK results for port {port}: {latency:?} {rps}");
126132
all_latencies.push(Duration::from_micros(latency_us));
127133
all_rps.push(rps);
128134
}

thread-manager/examples/core_contention_sweep.rs

+35-32
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
1-
use std::{
2-
collections::HashMap,
3-
future::IntoFuture,
4-
io::Write,
5-
net::{IpAddr, Ipv4Addr, SocketAddr},
6-
path::PathBuf,
7-
time::Duration,
1+
use {
2+
agave_thread_manager::*,
3+
log::{debug, info},
4+
std::{
5+
collections::HashMap,
6+
future::IntoFuture,
7+
io::Write,
8+
net::{IpAddr, Ipv4Addr, SocketAddr},
9+
path::PathBuf,
10+
time::Duration,
11+
},
812
};
913

1014
async fn axum_main(port: u16) {
1115
use axum::{routing::get, Router};
12-
1316
// basic handler that responds with a static string
1417
async fn root() -> &'static str {
1518
tokio::time::sleep(Duration::from_millis(1)).await;
@@ -24,6 +27,7 @@ async fn axum_main(port: u16) {
2427
tokio::net::TcpListener::bind(SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), port))
2528
.await
2629
.unwrap();
30+
info!("Server on port {port} ready");
2731
let timeout = tokio::time::timeout(
2832
Duration::from_secs(11),
2933
axum::serve(listener, app).into_future(),
@@ -32,11 +36,10 @@ async fn axum_main(port: u16) {
3236
match timeout {
3337
Ok(v) => v.unwrap(),
3438
Err(_) => {
35-
println!("Terminating server on port {port}");
39+
info!("Terminating server on port {port}");
3640
}
3741
}
3842
}
39-
use agave_thread_manager::*;
4043
fn make_config_shared(cc: usize) -> RuntimeManagerConfig {
4144
let tokio_cfg_1 = TokioConfig {
4245
core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: cc },
@@ -46,12 +49,8 @@ fn make_config_shared(cc: usize) -> RuntimeManagerConfig {
4649
let tokio_cfg_2 = tokio_cfg_1.clone();
4750
RuntimeManagerConfig {
4851
tokio_configs: HashMap::from([
49-
("tokio1".into(), tokio_cfg_1),
50-
("tokio2".into(), tokio_cfg_2),
51-
]),
52-
tokio_runtime_mapping: HashMap::from([
53-
("axum1".into(), "tokio1".into()),
54-
("axum2".into(), "tokio2".into()),
52+
("axum1".into(), tokio_cfg_1),
53+
("axum2".into(), tokio_cfg_2),
5554
]),
5655
..Default::default()
5756
}
@@ -75,12 +74,8 @@ fn make_config_dedicated(cc: usize) -> RuntimeManagerConfig {
7574
};
7675
RuntimeManagerConfig {
7776
tokio_configs: HashMap::from([
78-
("tokio1".into(), tokio_cfg_1),
79-
("tokio2".into(), tokio_cfg_2),
80-
]),
81-
tokio_runtime_mapping: HashMap::from([
82-
("axum1".into(), "tokio1".into()),
83-
("axum2".into(), "tokio2".into()),
77+
("axum1".into(), tokio_cfg_1),
78+
("axum2".into(), tokio_cfg_2),
8479
]),
8580
..Default::default()
8681
}
@@ -93,7 +88,7 @@ enum Regime {
9388
Single,
9489
}
9590
impl Regime {
96-
const VALUES: [Self; 3] = [Self::Shared, Self::Dedicated, Self::Single];
91+
const VALUES: [Self; 3] = [Self::Dedicated, Self::Shared, Self::Single];
9792
}
9893

9994
#[derive(Debug, Default, serde::Serialize)]
@@ -103,13 +98,14 @@ struct Results {
10398
}
10499

105100
fn main() -> anyhow::Result<()> {
101+
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
106102
let mut all_results: HashMap<String, Results> = HashMap::new();
107103
for regime in Regime::VALUES {
108104
let mut res = Results::default();
109105
for core_cnt in [2, 4, 8, 16] {
110106
let rtm;
111-
println!("===================");
112-
println!("Running {core_cnt} cores under {regime:?}");
107+
info!("===================");
108+
info!("Running {core_cnt} cores under {regime:?}");
113109
let (tok1, tok2) = match regime {
114110
Regime::Shared => {
115111
rtm = ThreadManager::new(make_config_shared(core_cnt)).unwrap();
@@ -143,24 +139,26 @@ fn main() -> anyhow::Result<()> {
143139
let wrk_cores: Vec<_> = (32..64).collect();
144140
let results = std::thread::scope(|s| {
145141
s.spawn(|| {
146-
tok1.tokio.spawn(axum_main(8888));
142+
tok1.start_metrics_sampling(Duration::from_secs(1));
143+
tok1.tokio.block_on(axum_main(8888));
147144
});
148145
let jh = match regime {
149146
Regime::Single => s.spawn(|| {
150-
run_wrk(&[8888, 8888], &wrk_cores, wrk_cores.len(), 1000).unwrap()
147+
run_wrk(&[8888, 8888], &wrk_cores, wrk_cores.len(), 3000).unwrap()
151148
}),
152149
_ => {
153150
s.spawn(|| {
154-
tok2.tokio.spawn(axum_main(8889));
151+
tok2.start_metrics_sampling(Duration::from_secs(1));
152+
tok2.tokio.block_on(axum_main(8889));
155153
});
156154
s.spawn(|| {
157-
run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap()
155+
run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 3000).unwrap()
158156
})
159157
}
160158
};
161159
jh.join().expect("WRK crashed!")
162160
});
163-
println!("Results are: {:?}", results);
161+
info!("Results are: {:?}", results);
164162
res.latencies_s.push(
165163
results.0.iter().map(|a| a.as_secs_f32()).sum::<f32>() / results.0.len() as f32,
166164
);
@@ -169,6 +167,8 @@ fn main() -> anyhow::Result<()> {
169167
all_results.insert(format!("{regime:?}"), res);
170168
std::thread::sleep(Duration::from_secs(3));
171169
}
170+
171+
//print the resulting measurements so they can be e.g. plotted with matplotlib
172172
println!("{}", serde_json::to_string_pretty(&all_results)?);
173173

174174
Ok(())
@@ -180,6 +180,9 @@ fn run_wrk(
180180
threads: usize,
181181
connections: usize,
182182
) -> anyhow::Result<(Vec<Duration>, Vec<f32>)> {
183+
//Sleep a bit to let axum start
184+
std::thread::sleep(Duration::from_millis(500));
185+
183186
let mut script = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
184187
script.push("examples/report.lua");
185188
let cpus: Vec<String> = cpus.iter().map(|c| c.to_string()).collect();
@@ -209,7 +212,7 @@ fn run_wrk(
209212
let mut all_latencies = vec![];
210213
let mut all_rps = vec![];
211214
for (out, port) in outs.zip(ports.iter()) {
212-
println!("=========================");
215+
debug!("=========================");
213216
std::io::stdout().write_all(&out.stderr)?;
214217
let res = str::from_utf8(&out.stdout)?;
215218
let mut res = res.lines().last().unwrap().split(' ');
@@ -219,7 +222,7 @@ fn run_wrk(
219222

220223
let requests: usize = res.next().unwrap().parse()?;
221224
let rps = requests as f32 / 10.0;
222-
println!("WRK results for port {port}: {latency:?} {rps}");
225+
debug!("WRK results for port {port}: {latency:?} {rps}");
223226
all_latencies.push(Duration::from_micros(latency_us));
224227
all_rps.push(rps);
225228
}

thread-manager/src/policy.rs

+19-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
use {
22
serde::{Deserialize, Serialize},
3+
std::sync::OnceLock,
34
thread_priority::ThreadExt,
45
};
56

7+
static CORE_COUNT: OnceLock<usize> = OnceLock::new();
8+
69
#[derive(Default, Debug, Clone, Serialize, Deserialize)]
710
pub enum CoreAllocation {
811
///Use OS default allocation (i.e. do not alter core affinity)
@@ -17,17 +20,31 @@ pub enum CoreAllocation {
1720
impl CoreAllocation {
1821
/// Converts into a vector of core IDs. OsDefault is converted to empty vector.
1922
pub fn as_core_mask_vector(&self) -> Vec<usize> {
23+
let core_count = CORE_COUNT.get_or_init(num_cpus::get);
2024
match *self {
2125
CoreAllocation::PinnedCores { min, max } => (min..max).collect(),
2226
CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(),
23-
CoreAllocation::OsDefault => vec![],
27+
CoreAllocation::OsDefault => Vec::from_iter(0..*core_count),
2428
}
2529
}
2630
}
2731

2832
#[cfg(target_os = "linux")]
2933
pub fn set_thread_affinity(cores: &[usize]) {
30-
affinity::set_thread_affinity(cores).expect("Can not set thread affinity for runtime worker");
34+
assert!(
35+
!cores.is_empty(),
36+
"Can not call setaffinity with empty cores mask"
37+
);
38+
if let Err(e) = affinity::set_thread_affinity(cores) {
39+
let thread = std::thread::current();
40+
let msg = format!(
41+
"Can not set core affinity {:?} for thread {:?} named {:?}, error {e}",
42+
cores,
43+
thread.id(),
44+
thread.name()
45+
);
46+
panic!("{}", msg);
47+
}
3148
}
3249

3350
#[cfg(not(target_os = "linux"))]

0 commit comments

Comments
 (0)