Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions rayon-demo/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,14 @@ publish = false
[dependencies]
rayon = { path = "../" }
cgmath = "0.18"
docopt = "1"
clap = { version = "3.2.3", features = ["derive"] }
fixedbitset = "0.4"
glium = "0.31"
lazy_static = "1"
rand = "0.8"
rand_xorshift = "0.3"
regex = "1"

[dependencies.serde]
version = "1.0.85"
features = ["derive"]

[target.'cfg(unix)'.dependencies]
libc = "0.2"

Expand Down
63 changes: 28 additions & 35 deletions rayon-demo/examples/cpu_monitor.rs
Original file line number Diff line number Diff line change
@@ -1,47 +1,40 @@
use docopt::Docopt;
use clap::{Parser, Subcommand};
use std::io;
use std::process;

const USAGE: &str = "
Usage: cpu_monitor [options] <scenario>
cpu_monitor --help

const ABOUT: &str = "
A test for monitoring how much CPU usage Rayon consumes under various
scenarios. This test is intended to be executed interactively, like so:

cargo run --example cpu_monitor -- tasks_ended

The list of scenarios you can try are as follows:

- tasks_ended: after all tasks have finished, go to sleep
- task_stall_root: a root task stalls for a very long time
- task_stall_scope: a task in a scope stalls for a very long time

Options:
-h, --help Show this message.
-d N, --depth N Control how hard the dummy task works [default: 27]
cargo run --example cpu_monitor -- tasks-ended
";

#[derive(serde::Deserialize)]
#[derive(Subcommand, Debug)]
pub enum Commands {
/// After all tasks have finished, go to sleep
TasksEnded,
/// A root task stalls for a very long time
TaskStallRoot,
/// A task in a scope stalls for a very long time
TaskStallScope,
}

#[derive(Parser, Debug)]
#[clap(about = ABOUT)]
pub struct Args {
arg_scenario: String,
flag_depth: usize,
#[clap(subcommand)]
command: Commands,

/// Control how hard the dummy task works
#[clap(short = 'd', long, default_value_t = 27)]
depth: usize,
}

fn main() {
let args: &Args = &Docopt::new(USAGE)
.and_then(|d| d.deserialize())
.unwrap_or_else(|e| e.exit());

match &args.arg_scenario[..] {
"tasks_ended" => tasks_ended(args),
"task_stall_root" => task_stall_root(args),
"task_stall_scope" => task_stall_scope(args),
_ => {
println!("unknown scenario: `{}`", args.arg_scenario);
println!("try --help");
process::exit(1);
}
let args: Args = Args::from_args();
match args.command {
Commands::TasksEnded => tasks_ended(&args),
Commands::TaskStallRoot => task_stall_root(&args),
Commands::TaskStallScope => task_stall_scope(&args),
}
}

Expand All @@ -58,8 +51,8 @@ fn task(args: &Args) {
rayon::join(|| join_recursively(n - 1), || join_recursively(n - 1));
}

println!("Starting heavy work at depth {}...wait.", args.flag_depth);
join_recursively(args.flag_depth);
println!("Starting heavy work at depth {}...wait.", args.depth);
join_recursively(args.depth);
println!("Heavy work done; check top. You should see CPU usage drop to zero soon.");
println!("Press <enter> to quit...");
}
Expand Down
130 changes: 66 additions & 64 deletions rayon-demo/src/life/mod.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,5 @@
const USAGE: &str = "
Usage: life bench [--size N] [--gens N] [--skip-bridge]
life play [--size N] [--gens N] [--fps N] [--skip-bridge]
life --help
Conway's Game of Life.

Commands:
bench Run the benchmark in different modes and print the timings.
play Run with a max frame rate and monitor CPU resources.
Options:
--size N Size of the game board (N x N) [default: 200]
--gens N Simulate N generations [default: 100]
--fps N Maximum frame rate [default: 60]
--skip-bridge Skips the tests with par-bridge, as it is much slower.
-h, --help Show this message.
";

use crate::cpu_time::{self, CpuMeasure};
use clap::{Parser, Subcommand};
use rand::distributions::Standard;
use rand::{thread_rng, Rng};
use std::iter::repeat;
Expand All @@ -24,21 +8,41 @@ use std::sync::Arc;
use std::thread;
use std::time::{Duration, Instant};

use docopt::Docopt;
use rayon::iter::ParallelBridge;
use rayon::prelude::*;

#[cfg(test)]
mod bench;

#[derive(serde::Deserialize)]
#[derive(Subcommand)]
enum Commands {
/// Run the benchmark in different modes and print the timings
Bench,
/// Run with a max frame rate and monitor CPU resources
Play,
}

#[derive(Parser)]
#[clap(about = "Conway's Game of Life")]
pub struct Args {
cmd_bench: bool,
cmd_play: bool,
flag_size: usize,
flag_gens: usize,
flag_fps: usize,
flag_skip_bridge: bool,
#[clap(subcommand)]
command: Commands,

/// Size of the game board (N x N)
#[clap(long, default_value_t = 200)]
size: usize,

/// Simulate N generations
#[clap(long, default_value_t = 100)]
gens: usize,

/// Maximum frame rate
#[clap(long, default_value_t = 60)]
fps: usize,

/// Skips the tests with par-bridge, as it is much slower
#[clap(long)]
skip_bridge: bool,
}

#[derive(PartialEq, Eq, Clone, Debug)]
Expand Down Expand Up @@ -233,7 +237,7 @@ fn par_bridge_generations_limited(board: Board, gens: usize, min_interval: Durat
}

fn measure(f: fn(Board, usize) -> (), args: &Args) -> Duration {
let (n, gens) = (args.flag_size, args.flag_gens);
let (n, gens) = (args.size, args.gens);
let brd = Board::new(n, n).random();
let start = Instant::now();

Expand All @@ -248,7 +252,7 @@ struct CpuResult {
}

fn measure_cpu(f: fn(Board, usize, Duration) -> (), args: &Args) -> CpuResult {
let (n, gens, rate) = (args.flag_size, args.flag_gens, args.flag_fps);
let (n, gens, rate) = (args.size, args.gens, args.fps);
let interval = Duration::from_secs_f64(1.0 / rate as f64);
let brd = Board::new(n, n).random();

Expand All @@ -264,50 +268,48 @@ fn measure_cpu(f: fn(Board, usize, Duration) -> (), args: &Args) -> CpuResult {
}

pub fn main(args: &[String]) {
let args: Args = Docopt::new(USAGE)
.and_then(|d| d.argv(args).deserialize())
.unwrap_or_else(|e| e.exit());

if args.cmd_bench {
let serial = measure(generations, &args).as_nanos();
println!(" serial: {:10} ns", serial);

let parallel = measure(parallel_generations, &args).as_nanos();
println!(
"parallel: {:10} ns -> {:.2}x speedup",
parallel,
serial as f64 / parallel as f64
);

if !args.flag_skip_bridge {
let par_bridge = measure(par_bridge_generations, &args).as_nanos();
let args: Args = Parser::parse_from(args);
match args.command {
Commands::Bench => {
let serial = measure(generations, &args).as_nanos();
println!(" serial: {:10} ns", serial);

let parallel = measure(parallel_generations, &args).as_nanos();
println!(
"par_bridge: {:10} ns -> {:.2}x speedup",
par_bridge,
serial as f64 / par_bridge as f64
"parallel: {:10} ns -> {:.2}x speedup",
parallel,
serial as f64 / parallel as f64
);
}
}

if args.cmd_play {
let serial = measure_cpu(generations_limited, &args);
println!(" serial: {:.2} fps", serial.actual_fps);
if let Some(cpu_usage) = serial.cpu_usage_percent {
println!(" cpu usage: {:.1}%", cpu_usage);
}

let parallel = measure_cpu(parallel_generations_limited, &args);
println!("parallel: {:.2} fps", parallel.actual_fps);
if let Some(cpu_usage) = parallel.cpu_usage_percent {
println!(" cpu usage: {:.1}%", cpu_usage);
if !args.skip_bridge {
let par_bridge = measure(par_bridge_generations, &args).as_nanos();
println!(
"par_bridge: {:10} ns -> {:.2}x speedup",
par_bridge,
serial as f64 / par_bridge as f64
);
}
}
Commands::Play => {
let serial = measure_cpu(generations_limited, &args);
println!(" serial: {:.2} fps", serial.actual_fps);
if let Some(cpu_usage) = serial.cpu_usage_percent {
println!(" cpu usage: {:.1}%", cpu_usage);
}

if !args.flag_skip_bridge {
let par_bridge = measure_cpu(par_bridge_generations_limited, &args);
println!("par_bridge: {:.2} fps", par_bridge.actual_fps);
if let Some(cpu_usage) = par_bridge.cpu_usage_percent {
let parallel = measure_cpu(parallel_generations_limited, &args);
println!("parallel: {:.2} fps", parallel.actual_fps);
if let Some(cpu_usage) = parallel.cpu_usage_percent {
println!(" cpu usage: {:.1}%", cpu_usage);
}

if !args.skip_bridge {
let par_bridge = measure_cpu(par_bridge_generations_limited, &args);
println!("par_bridge: {:.2} fps", par_bridge.actual_fps);
if let Some(cpu_usage) = par_bridge.cpu_usage_percent {
println!(" cpu usage: {:.1}%", cpu_usage);
}
}
}
}
}
73 changes: 36 additions & 37 deletions rayon-demo/src/matmul/mod.rs
Original file line number Diff line number Diff line change
@@ -1,26 +1,8 @@
const USAGE: &str = "
Usage: matmul bench [--size N]
matmul --help
Parallel matrix multiplication.

Commands:
bench Run the benchmark in different modes and print the timings.
Options:
--size N Row-size of matrices (rounded up to power of 2) [default: 1024]
-h, --help Show this message.
";

#[derive(serde::Deserialize)]
pub struct Args {
cmd_bench: bool,
flag_size: usize,
}
use clap::{Parser, Subcommand};
use std::time::Instant;

use docopt::Docopt;
use rayon::prelude::*;

use std::time::Instant;

// TODO: Investigate other cache patterns for row-major order that may be more
// parallelizable.
// https://tavianator.com/a-quick-trick-for-faster-naive-matrix-multiplication/
Expand Down Expand Up @@ -398,25 +380,42 @@ fn timed_matmul<F: FnOnce(&[f32], &[f32], &mut [f32])>(size: usize, f: F, name:
nanos
}

#[derive(Subcommand)]
enum Commands {
/// Run the benchmark in different modes and print the timings
Bench,
}

#[derive(Parser)]
#[clap(about = "Parallel matrix multiplication")]
pub struct Args {
#[clap(subcommand)]
command: Commands,

/// Row-size of matrices (rounded up to power of 2)
#[clap(long, default_value_t = 1024)]
size: usize,
}

pub fn main(args: &[String]) {
let args: Args = Docopt::new(USAGE)
.and_then(|d| d.argv(args).deserialize())
.unwrap_or_else(|e| e.exit());

if args.cmd_bench {
if args.flag_size <= 1024 {
// Crappy algorithm takes several minutes on larger inputs.
timed_matmul(args.flag_size, seq_matmul, "seq row-major");
let args: Args = Parser::parse_from(args);

match args.command {
Commands::Bench => {
if args.size <= 1024 {
// Crappy algorithm takes several minutes on larger inputs.
timed_matmul(args.size, seq_matmul, "seq row-major");
}
let seq = if args.size <= 2048 {
timed_matmul(args.size, seq_matmulz, "seq z-order")
} else {
0
};
let par = timed_matmul(args.size, matmulz, "par z-order");
timed_matmul(args.size, matmul_strassen, "par strassen");
let speedup = seq as f64 / par as f64;
println!("speedup: {:.2}x", speedup);
}
let seq = if args.flag_size <= 2048 {
timed_matmul(args.flag_size, seq_matmulz, "seq z-order")
} else {
0
};
let par = timed_matmul(args.flag_size, matmulz, "par z-order");
timed_matmul(args.flag_size, matmul_strassen, "par strassen");
let speedup = seq as f64 / par as f64;
println!("speedup: {:.2}x", speedup);
}
}

Expand Down
Loading