Skip to content

Commit 5eb7b68

Browse files
committed
add (provisional) support for injecting coman into jobs
1 parent 811aaaa commit 5eb7b68

File tree

9 files changed

+100
-14
lines changed

9 files changed

+100
-14
lines changed

.github/workflows/release.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
cargo_command: cargo
2424

2525
- os_name: Linux-aarch64
26-
os: ubuntu-latest
26+
os: ubuntu-24.04-arm
2727
target: aarch64-unknown-linux-musl
2828
bin: coman
2929
name: coman-Linux-aarch64-musl.tar.gz

Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,8 @@ resolver = "3"
55

66
[workspace.dependencies]
77
firecrest_client = { path = "./firecrest_client", version = "2.4.0" }
8+
9+
[profile.release]
10+
strip = true
11+
opt-level = "z"
12+
lto = true

coman/.config/config.toml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ sbatch_script_template = """
2424
#SBATCH --job-name={{name}}
2525
#SBATCH --ntasks=1
2626
#SBATCH --time=1:00:00
27-
srun {% if environment_file %}--environment={{environment_file}}{% endif %} {{command}}
27+
srun {% if environment_file %}--environment={{environment_file}}{% endif %} {% if coman_squash %}/coman/coman exec {% endif %}{{command}}
2828
"""
2929

3030
# the edf environment toml file template
@@ -37,7 +37,10 @@ srun {% if environment_file %}--environment={{environment_file}}{% endif %} {{co
3737
# mount: a dictionary of key/value pairs for folders to mount to the container, with key being the path in the cluster and value being the path in the container
3838
edf_file_template = """
3939
{% if edf_image %}image = "{{edf_image}}"{% endif %}
40-
mounts = [{% for source, target in mount %}"{{source}}:{{target}}",{% endfor %}]
40+
mounts = [
41+
{% for source, target in mount %}"{{source}}:{{target}}",{% endfor %}
42+
{% if coman_squash %}"{{coman_squash}}:/coman:sqsh"{% endif%}
43+
]
4144
workdir = "{{container_workdir}}"
4245
4346
[env]
@@ -49,7 +52,7 @@ workdir = "{{container_workdir}}"
4952
{% if ssh_public_key %}
5053
com.hooks.ssh.enabled = "true"
5154
com.hooks.ssh.authorize_ssh_key = "{{ ssh_public_key }}"
52-
com.hooks.ssh.port = 15263
55+
com.hooks.ssh.port = "15263"
5356
{% endif %}
5457
"""
5558

coman/src/cli.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ pub enum CscsJobCommands {
247247
no_ssh: bool,
248248
#[clap(short, long, help="ssh public key to use", value_hint=ValueHint::FilePath)]
249249
ssh_key: Option<PathBuf>,
250+
#[clap(long, action, help = "don't upload and inject coman into the container")]
251+
no_coman: bool,
250252
#[clap(trailing_var_arg = true, help = "The command to run in the container", value_hint=ValueHint::Other)]
251253
command: Option<Vec<String>>,
252254
},

coman/src/config.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ pub struct ComanConfig {
7373
#[serde(default)]
7474
pub name: Option<String>,
7575
#[serde(default)]
76+
pub coman_squash_path: Option<PathBuf>,
77+
#[serde(default)]
7678
pub cscs: CscsConfig,
7779
}
7880

coman/src/cscs/api_client/client.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ pub struct JobStartOptions {
5252
pub script_spec: ScriptSpec,
5353
pub no_ssh: bool,
5454
pub ssh_key: Option<PathBuf>,
55+
pub no_coman: bool,
5556
}
5657

5758
pub struct CscsApi {
@@ -160,8 +161,8 @@ impl CscsApi {
160161
.wrap_err("couldn't change directory permission")?;
161162
Ok(())
162163
}
163-
pub async fn upload(&self, system_name: &str, path: PathBuf, file: Vec<u8>) -> Result<()> {
164-
post_filesystem_ops_upload(&self.client, system_name, path, file)
164+
pub async fn upload(&self, system_name: &str, target: PathBuf, file: Vec<u8>) -> Result<()> {
165+
post_filesystem_ops_upload(&self.client, system_name, target, file)
165166
.await
166167
.wrap_err("couldn't upload file")?;
167168
Ok(())
@@ -170,10 +171,10 @@ impl CscsApi {
170171
&self,
171172
system_name: &str,
172173
account: Option<String>,
173-
path: PathBuf,
174+
target: PathBuf,
174175
size: i64,
175176
) -> Result<(i64, S3Upload)> {
176-
let job = post_filesystem_transfer_upload(&self.client, system_name, account, path, size)
177+
let job = post_filesystem_transfer_upload(&self.client, system_name, account, target, size)
177178
.await
178179
.wrap_err("couldn't upload file")?;
179180
if let DownloadFileResponseTransferDirectives::S3(directives) = job.transfer_directives {

coman/src/cscs/cli.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ pub(crate) async fn cli_cscs_file_upload(
272272
}
273273
}
274274

275-
async fn upload_chunk(path: PathBuf, offset: u64, size: u64, url: Url) -> Result<String> {
275+
pub(crate) async fn upload_chunk(path: PathBuf, offset: u64, size: u64, url: Url) -> Result<String> {
276276
let client = reqwest::Client::new();
277277

278278
let source_file = File::open(path).await?;

coman/src/cscs/handlers.rs

Lines changed: 76 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ use std::{
99

1010
use color_eyre::{Result, eyre::eyre};
1111
use eyre::Context;
12+
use itertools::Itertools;
1213
use reqwest::Url;
1314

1415
use super::api_client::client::{EdfSpec, ScriptSpec};
@@ -19,6 +20,7 @@ use crate::{
1920
client::{CscsApi, JobStartOptions},
2021
types::{FileStat, FileSystemType, Job, JobDetail, PathEntry, PathType, S3Upload, System, UserInfo},
2122
},
23+
cli::upload_chunk,
2224
oauth2::{
2325
CLIENT_ID_SECRET_NAME, CLIENT_SECRET_SECRET_NAME, client_credentials_login, finish_cscs_device_login,
2426
start_cscs_device_login,
@@ -201,14 +203,74 @@ async fn setup_ssh(
201203
}
202204
}
203205

206+
async fn inject_coman_squash(
207+
api_client: &CscsApi,
208+
base_path: &Path,
209+
current_system: &str,
210+
options: &JobStartOptions,
211+
) -> Result<Option<PathBuf>> {
212+
if options.no_coman {
213+
return Ok(None);
214+
}
215+
let config = Config::new().unwrap();
216+
let local_squash_path = match config.values.coman_squash_path.clone() {
217+
Some(path) => path,
218+
None => todo!(),
219+
};
220+
let target = base_path.join("coman.sqsh");
221+
let file_meta = std::fs::metadata(local_squash_path.clone())?;
222+
223+
#[cfg(target_family = "unix")]
224+
let size = file_meta.size() as usize;
225+
226+
#[cfg(target_family = "windows")]
227+
let size = file_meta.file_size() as usize;
228+
229+
//upload squash file
230+
let transfer_data = api_client
231+
.transfer_upload(current_system, config.values.cscs.account, target.clone(), size as i64)
232+
.await?;
233+
let mut etags: Vec<String> = Vec::new();
234+
let client = reqwest::Client::new();
235+
let num_parts = transfer_data.1.num_parts;
236+
for (chunk_id, transfer_url) in transfer_data.1.parts_upload_urls.into_iter().enumerate() {
237+
println!(
238+
"Uploading part {}/{} ({}Mb)",
239+
chunk_id + 1,
240+
num_parts,
241+
transfer_data.1.part_size / 1024 / 1024
242+
);
243+
let etag = upload_chunk(
244+
local_squash_path.clone(),
245+
(chunk_id as u64) * transfer_data.1.part_size,
246+
transfer_data.1.part_size,
247+
transfer_url,
248+
)
249+
.await?;
250+
etags.push(etag);
251+
}
252+
253+
let body = etags
254+
.into_iter()
255+
.enumerate()
256+
.map(|(i, etag)| (i + 1, etag))
257+
.map(|(i, etag)| format!("<Part><PartNumber>{}</PartNumber><ETag>{}</ETag></Part>", i, etag))
258+
.join("");
259+
let body = format!("<CompleteMultipartUpload>{}</CompleteMultipartUpload>", body);
260+
let req = client.post(transfer_data.1.complete_upload_url).body(body).build()?;
261+
let resp = client.execute(req).await?;
262+
resp.error_for_status()?;
263+
Ok(Some(target))
264+
}
265+
204266
async fn handle_edf(
205267
api_client: &CscsApi,
206268
base_path: &Path,
207269
current_system: &str,
208270
envvars: &HashMap<String, String>,
209271
workdir: &str,
210272
options: &JobStartOptions,
211-
) -> Result<PathBuf> {
273+
) -> Result<(PathBuf, Option<PathBuf>)> {
212274
let config = Config::new().unwrap();
213275
let environment_path = base_path.join("environment.toml");
214276
match options.edf_spec.clone() {
@@ -248,21 +310,23 @@ async fn handle_edf(
248310
}
249311

250312
let ssh_path = setup_ssh(api_client, base_path, current_system, options).await?;
313+
let coman_squash = inject_coman_squash(api_client, base_path, current_system, options).await?;
251314

252315
let mut context = tera::Context::new();
253316
context.insert("edf_image", &docker_image.to_edf());
254317
context.insert("container_workdir", &workdir);
255318
context.insert("env", &envvars);
256319
context.insert("mount", &mount);
257320
context.insert("ssh_public_key", &ssh_path);
321+
context.insert("coman_squash", &coman_squash);
258322

259323
let environment_file = tera.render("environment.toml", &context)?;
260324
api_client.mkdir(current_system, base_path.to_path_buf()).await?;
261325
api_client.chmod(current_system, base_path.to_path_buf(), "700").await?;
262326
api_client
263327
.upload(current_system, environment_path.clone(), environment_file.into_bytes())
264328
.await?;
265-
Ok(environment_path)
329+
Ok((environment_path, coman_squash))
266330
}
267331
EdfSpec::Local(local_path) => {
268332
let environment_file = std::fs::read_to_string(local_path.clone())?;
@@ -271,17 +335,20 @@ async fn handle_edf(
271335
api_client
272336
.upload(current_system, environment_path.clone(), environment_file.into_bytes())
273337
.await?;
274-
Ok(environment_path)
338+
Ok((environment_path, None))
275339
}
276-
EdfSpec::Remote(path) => Ok(path),
340+
EdfSpec::Remote(path) => Ok((path, None)),
277341
}
278342
}
343+
344+
#[allow(clippy::too_many_arguments)]
279345
async fn handle_script(
280346
api_client: &CscsApi,
281347
job_name: &str,
282348
base_path: &Path,
283349
current_system: &str,
284350
environment_path: &Path,
351+
coman_squash: Option<PathBuf>,
285352
workdir: &str,
286353
options: &JobStartOptions,
287354
) -> Result<PathBuf> {
@@ -300,6 +367,9 @@ async fn handle_script(
300367
);
301368
context.insert("environment_file", &environment_path.to_path_buf());
302369
context.insert("container_workdir", &workdir);
370+
if let Some(path) = coman_squash {
371+
context.insert("coman_squash", &path);
372+
}
303373
let script = tera.render("script.sh", &context)?;
304374
api_client
305375
.upload(current_system, script_path.clone(), script.into_bytes())
@@ -360,7 +430,7 @@ pub async fn cscs_job_start(
360430
let mut envvars = config.values.cscs.env.clone();
361431
envvars.extend(options.env.clone());
362432

363-
let environment_path = handle_edf(
433+
let (environment_path, coman_squash) = handle_edf(
364434
&api_client,
365435
&base_path,
366436
current_system,
@@ -376,6 +446,7 @@ pub async fn cscs_job_start(
376446
&base_path,
377447
current_system,
378448
&environment_path,
449+
coman_squash,
379450
&container_workdir,
380451
&options,
381452
)

coman/src/main.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ async fn main() -> Result<()> {
9898
script_spec,
9999
no_ssh,
100100
ssh_key,
101+
no_coman,
101102
} => {
102103
cli_cscs_job_start(
103104
name,
@@ -113,6 +114,7 @@ async fn main() -> Result<()> {
113114
script_spec: script_spec.unwrap_or_default().into(),
114115
no_ssh,
115116
ssh_key,
117+
no_coman,
116118
},
117119
system,
118120
platform,

0 commit comments

Comments
 (0)