From 9fa82c3c597dfe038005195d949d75d76216368f Mon Sep 17 00:00:00 2001 From: konstin Date: Sun, 12 Nov 2023 13:40:43 +0100 Subject: [PATCH] Move more features to monotrail-utils --- Cargo.lock | 16 +++++-- Cargo.toml | 5 ++ crates/install-wheel-rs/src/wheel.rs | 4 +- crates/monotrail-utils/Cargo.toml | 9 ++++ crates/monotrail-utils/src/lib.rs | 2 + .../monotrail-utils/src/parse_cpython_args.rs | 41 +++++++++++++++++ .../src/standalone_python.rs | 46 +++++++++---------- crates/monotrail/Cargo.toml | 13 +++--- crates/monotrail/src/inject_and_run.rs | 46 ++----------------- crates/monotrail/src/lib.rs | 1 - crates/monotrail/src/monotrail.rs | 42 ++++++++++++----- .../monotrail/src/poetry_integration/run.rs | 4 +- crates/monotrail/src/ppipx.rs | 4 +- 13 files changed, 137 insertions(+), 96 deletions(-) create mode 100644 crates/monotrail-utils/src/parse_cpython_args.rs rename crates/{monotrail => monotrail-utils}/src/standalone_python.rs (92%) diff --git a/Cargo.lock b/Cargo.lock index b614b3d..1ffac3a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -287,9 +287,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" dependencies = [ "libc", ] @@ -921,9 +921,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.148" +version = "0.2.150" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" +checksum = "89d92a4743f9a61002fae18374ed11e7973f530cb3a3255fb354818118b2203c" [[package]] name = "libgit2-sys" @@ -1134,16 +1134,24 @@ name = "monotrail-utils" version = "0.0.1" dependencies = [ "anyhow", + "cpufeatures", "fs-err", + "fs2", "indoc 2.0.4", "logtest", + "mockito", "pep508_rs", + "regex", "serde", "serde_json", + "tar", + "target-lexicon", "tempfile", "toml", "tracing", "unscanny", + "ureq", + "zstd", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 57ba203..61d699e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ resolver = "2" [workspace.dependencies] anyhow = "1.0.75" +cpufeatures = "0.2.11" fs-err = "2.9.0" fs2 = "0.4.3" indoc = "2.0.4" @@ -15,6 +16,7 @@ regex = "1.9.5" serde = { version = "1.0.188", features = ["derive"] } serde_json = "1.0.107" sha2 = "0.10.7" +tar = "0.4.40" target-lexicon = "0.12.11" tempfile = "3.8.0" thiserror = "1.0.48" @@ -22,8 +24,11 @@ toml = "0.8.0" tracing = "0.1.37" tracing-subscriber = "0.3.17" unscanny = "0.1.0" +ureq = { version = "2.7.1", features = ["json"] } walkdir = "2.4.0" which = "4.4.2" +widestring = "1.0.2" +zstd = "0.12.4" # Config for 'cargo dist' [workspace.metadata.dist] diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index f4274d6..78a4c38 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -776,8 +776,7 @@ fn install_script( // (#!/usr/bin/env python) for injection monotrail as python into PATH later let placeholder_python = b"#!python"; // scripts might be binaries, so we read an exact number of bytes instead of the first line as string - let mut start = Vec::new(); - start.resize(placeholder_python.len(), 0); + let mut start = vec![0; placeholder_python.len()]; script.read_exact(&mut start)?; let size_and_encoded_hash = if start == placeholder_python { let start = get_shebang(&location).as_bytes().to_vec(); @@ -1011,6 +1010,7 @@ pub fn parse_key_value_file( /// /// /// Wheel 1.0: +#[allow(clippy::too_many_arguments)] pub fn install_wheel( location: &InstallLocation, reader: impl Read + Seek, diff --git a/crates/monotrail-utils/Cargo.toml b/crates/monotrail-utils/Cargo.toml index 5121d12..02d953f 100644 --- a/crates/monotrail-utils/Cargo.toml +++ b/crates/monotrail-utils/Cargo.toml @@ -5,15 +5,24 @@ edition = "2021" [dependencies] anyhow = { workspace = true } +cpufeatures = { workspace = true } fs-err = { workspace = true } +fs2 = { workspace = true } pep508_rs = { workspace = true } +regex = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } +tar = { workspace = true } +target-lexicon = { workspace = true } +tempfile = { workspace = true } toml = { workspace = true } tracing = { workspace = true } unscanny = { workspace = true } +ureq = { workspace = true } +zstd = { workspace = true } [dev-dependencies] indoc = { workspace = true } logtest = { workspace = true } +mockito = { workspace = true } tempfile = { workspace = true } diff --git a/crates/monotrail-utils/src/lib.rs b/crates/monotrail-utils/src/lib.rs index 1dab22e..30c23df 100644 --- a/crates/monotrail-utils/src/lib.rs +++ b/crates/monotrail-utils/src/lib.rs @@ -3,3 +3,5 @@ pub use requirements_txt::RequirementsTxt; mod requirements_txt; +pub mod parse_cpython_args; +pub mod standalone_python; diff --git a/crates/monotrail-utils/src/parse_cpython_args.rs b/crates/monotrail-utils/src/parse_cpython_args.rs new file mode 100644 index 0000000..080d044 --- /dev/null +++ b/crates/monotrail-utils/src/parse_cpython_args.rs @@ -0,0 +1,41 @@ +/// python has idiosyncratic cli options that are hard to replicate with clap, so we roll our own. +/// Takes args without the first-is-current-program (i.e. python) convention. +/// +/// `usage: python [option] ... [-c cmd | -m mod | file | -] [arg] ...` +/// +/// Returns the script, if any +pub fn naive_python_arg_parser>(args: &[T]) -> Result, String> { + // These are hand collected from `python --help` + // See also https://docs.python.org/3/using/cmdline.html#command-line + let no_value_opts = [ + "-b", "-B", "-d", "-E", "-h", "-i", "-I", "-O", "-OO", "-q", "-s", "-S", "-u", "-v", "-V", + "-x", "-?", + ]; + let value_opts = ["--check-hash-based-pycs", "-W", "-X"]; + let mut arg_iter = args.iter(); + loop { + if let Some(arg) = arg_iter.next() { + if no_value_opts.contains(&arg.as_ref()) { + continue; + } else if value_opts.contains(&arg.as_ref()) { + // consume the value belonging to the options + let value = arg_iter.next(); + if value.is_none() { + return Err(format!("Missing argument for {}", arg.as_ref())); + } + continue; + } else if arg.as_ref() == "-c" || arg.as_ref() == "-m" { + let value = arg_iter.next(); + if value.is_none() { + return Err(format!("Missing argument for {}", arg.as_ref())); + } + return Ok(None); + } else { + return Ok(Some(arg.as_ref().to_string())); + } + } else { + // interactive python shell + return Ok(None); + } + } +} diff --git a/crates/monotrail/src/standalone_python.rs b/crates/monotrail-utils/src/standalone_python.rs similarity index 92% rename from crates/monotrail/src/standalone_python.rs rename to crates/monotrail-utils/src/standalone_python.rs index 4c6c0c2..804fc02 100644 --- a/crates/monotrail/src/standalone_python.rs +++ b/crates/monotrail-utils/src/standalone_python.rs @@ -1,9 +1,6 @@ //! Download and install standalone python builds (PyOxy) from //! -use crate::markers::marker_environment_from_python; -use crate::monotrail::{LaunchType, PythonContext}; -use crate::utils::cache_dir; use anyhow::{bail, Context}; use fs2::FileExt; use fs_err as fs; @@ -117,7 +114,7 @@ fn download_and_unpack_python(url: &str, target_dir: &Path) -> anyhow::Result<() .into_reader(); let tar = zstd::Decoder::new(tar_zstd)?; let mut archive = tar::Archive::new(tar); - fs::create_dir_all(&target_dir)?; + fs::create_dir_all(target_dir)?; archive.unpack(target_dir)?; Ok(()) } @@ -165,7 +162,7 @@ fn provision_python_inner( ) })?; // atomic installation by tempdir & rename - let temp_dir = tempdir_in(&python_parent_dir) + let temp_dir = tempdir_in(python_parent_dir) .context("Failed to create temporary directory for unpacking")?; match download_and_unpack_python(&url, temp_dir.path()) { Ok(()) => {} @@ -187,15 +184,13 @@ fn provision_python_inner( } } // we can use fs::rename here because we stay in the same directory - fs::rename(temp_dir, &unpack_dir).context("Failed to move installed python into place")?; + fs::rename(temp_dir, unpack_dir).context("Failed to move installed python into place")?; debug!("Installed python {}.{}", python_version.0, python_version.1); Ok(()) } -/// If a downloaded python version exists, return this, otherwise download and unpack a matching one -/// from indygreg/python-build-standalone -pub fn provision_python(python_version: (u8, u8)) -> anyhow::Result<(PythonContext, PathBuf)> { - let python_parent_dir = cache_dir()?.join("python-build-standalone"); +pub fn provision_python(python_version: (u8, u8), cache_dir: &Path) -> anyhow::Result<(PathBuf, PathBuf)> { + let python_parent_dir = cache_dir.join("python-build-standalone"); // We need this here for the locking logic fs::create_dir_all(&python_parent_dir).context("Failed to create cache dir")?; let unpack_dir = @@ -243,17 +238,8 @@ pub fn provision_python(python_version: (u8, u8)) -> anyhow::Result<(PythonConte .join("bin") .join("python3") }; - // TODO: Already init and use libpython here - let pep508_env = marker_environment_from_python(&python_binary); - let python_context = PythonContext { - sys_executable: python_binary, - version: python_version, - pep508_env, - launch_type: LaunchType::Binary, - }; - let python_home = unpack_dir.join("python").join("install"); - Ok((python_context, python_home)) + Ok((python_binary, python_home)) } /// Returns a regex matching a compatible optimized build from the indygreg/python-build-standalone @@ -300,14 +286,27 @@ pub fn filename_regex(major: u8, minor: u8) -> Regex { .unwrap() } + #[cfg(test)] mod test { + use std::path::PathBuf; + use tempfile::tempdir; use mockito::{Mock, ServerGuard}; - #[cfg(all(target_os = "linux", target_arch = "x86_64"))] use crate::standalone_python::provision_python; use crate::standalone_python::{find_python, PYTHON_STANDALONE_LATEST_RELEASE}; - use crate::utils::zstd_json_mock; + + pub fn zstd_json_mock(url: &str, fixture: impl Into) -> (ServerGuard, Mock) { + use fs_err::File; + + let mut server = mockito::Server::new(); + let mock = server + .mock("GET", url) + .with_header("content-type", "application/json") + .with_body(zstd::stream::decode_all(File::open(fixture).unwrap()).unwrap()) + .create(); + (server, mock) + } fn mock() -> (ServerGuard, Mock) { zstd_json_mock( @@ -336,7 +335,8 @@ mod test { #[cfg(all(target_os = "linux", target_arch = "x86_64"))] fn test_provision_nonexistent_version() { let _mocks = mock(); - let err = provision_python((3, 0)).unwrap_err(); + let tempdir = tempdir().unwrap(); + let err = provision_python((3, 0), tempdir.path()).unwrap_err(); let expected = vec![ r"Couldn't find a matching python 3.0 to download", r"Failed to find a matching python-build-standalone download: /^cpython-3\.0\.(\d+)\+(\d+)-x86_64\-unknown\-linux\-gnu-pgo\+lto-full\.tar\.zst$/. Searched in https://github.com/indygreg/python-build-standalone/releases/latest and https://github.com/indygreg/python-build-standalone/releases/tag/20220502", diff --git a/crates/monotrail/Cargo.toml b/crates/monotrail/Cargo.toml index a94ead0..338e8b3 100644 --- a/crates/monotrail/Cargo.toml +++ b/crates/monotrail/Cargo.toml @@ -11,7 +11,7 @@ name = "monotrail" [dependencies] anyhow = { workspace = true } clap = { version = "4.4.4", features = ["derive"] } -cpufeatures = "0.2.9" +cpufeatures = { workspace = true } data-encoding = "2.4.0" dirs = "5.0.1" fs-err = { workspace = true } @@ -21,8 +21,7 @@ indicatif = "0.17.7" install-wheel-rs = { version = "0.0.1", path = "../install-wheel-rs" } libc = "0.2.148" libloading = "0.8.0" -# For the zig build -libz-sys = { version = "1.1.12", features = ["static"] } +libz-sys = { version = "1.1.12", features = ["static"] } # For the zig build monotrail-utils = { version = "0.0.1", path = "../monotrail-utils" } nix = { version = "0.27.1", features = ["process"] } pep440_rs = "0.3.11" @@ -33,7 +32,7 @@ regex = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } sha2 = { workspace = true } -tar = "0.4.40" +tar = { workspace = true } target-lexicon = { workspace = true } tempfile = { workspace = true } thiserror = { workspace = true } @@ -41,10 +40,10 @@ toml = { workspace = true } tracing = { workspace = true } tracing-subscriber = { workspace = true } unscanny = { workspace = true } -ureq = { version = "2.7.1", features = ["json"] } +ureq = { workspace = true } walkdir = { workspace = true } -widestring = "1.0.2" -zstd = "0.12.4" +widestring = { workspace = true } +zstd = { workspace = true } [dev-dependencies] indoc = { workspace = true } diff --git a/crates/monotrail/src/inject_and_run.rs b/crates/monotrail/src/inject_and_run.rs index 4e4f505..9aef3ae 100644 --- a/crates/monotrail/src/inject_and_run.rs +++ b/crates/monotrail/src/inject_and_run.rs @@ -1,7 +1,8 @@ //! Communication with libpython +use monotrail_utils::parse_cpython_args::naive_python_arg_parser; use crate::monotrail::{find_scripts, install, load_specs, FinderData, InjectData, PythonContext}; -use crate::standalone_python::provision_python; +use crate::monotrail::provision_python_env; use crate::DEFAULT_PYTHON_VERSION; use anyhow::{bail, format_err, Context}; use fs_err as fs; @@ -97,47 +98,6 @@ unsafe fn pre_init(lib: &Library) -> anyhow::Result<()> { Ok(()) } -/// python has idiosyncratic cli options that are hard to replicate with clap, so we roll our own. -/// Takes args without the first-is-current-program (i.e. python) convention. -/// -/// `usage: python [option] ... [-c cmd | -m mod | file | -] [arg] ...` -/// -/// Returns the script, if any -pub fn naive_python_arg_parser>(args: &[T]) -> Result, String> { - // These are hand collected from `python --help` - // See also https://docs.python.org/3/using/cmdline.html#command-line - let no_value_opts = [ - "-b", "-B", "-d", "-E", "-h", "-i", "-I", "-O", "-OO", "-q", "-s", "-S", "-u", "-v", "-V", - "-x", "-?", - ]; - let value_opts = ["--check-hash-based-pycs", "-W", "-X"]; - let mut arg_iter = args.iter(); - loop { - if let Some(arg) = arg_iter.next() { - if no_value_opts.contains(&arg.as_ref()) { - continue; - } else if value_opts.contains(&arg.as_ref()) { - // consume the value belonging to the options - let value = arg_iter.next(); - if value.is_none() { - return Err(format!("Missing argument for {}", arg.as_ref())); - } - continue; - } else if arg.as_ref() == "-c" || arg.as_ref() == "-m" { - let value = arg_iter.next(); - if value.is_none() { - return Err(format!("Missing argument for {}", arg.as_ref())); - } - return Ok(None); - } else { - return Ok(Some(arg.as_ref().to_string())); - } - } else { - // interactive python shell - return Ok(None); - } - } -} /// The way we're using to load symbol by symbol with the type generic is really ugly and cumbersome /// If you know how to do this with `extern` or even pyo3-ffi directly please tell me. @@ -355,7 +315,7 @@ pub fn run_python_args( extras: &[String], ) -> anyhow::Result { let (args, python_version) = determine_python_version(args, python_version)?; - let (python_context, python_home) = provision_python(python_version)?; + let (python_context, python_home) = provision_python_env(python_version)?; let script = if let Some(root) = root { Some(root.to_path_buf()) diff --git a/crates/monotrail/src/lib.rs b/crates/monotrail/src/lib.rs index 05b7573..cbc7316 100644 --- a/crates/monotrail/src/lib.rs +++ b/crates/monotrail/src/lib.rs @@ -33,7 +33,6 @@ mod ppipx; mod python_bindings; mod source_distribution; mod spec; -mod standalone_python; mod utils; mod venv_parser; mod verify_installation; diff --git a/crates/monotrail/src/monotrail.rs b/crates/monotrail/src/monotrail.rs index 272dbe3..b1506c7 100644 --- a/crates/monotrail/src/monotrail.rs +++ b/crates/monotrail/src/monotrail.rs @@ -9,14 +9,12 @@ use crate::poetry_integration::read_dependencies::{ }; use crate::read_poetry_specs; use crate::spec::RequestedSpec; -use crate::standalone_python::provision_python; use crate::utils::{cache_dir, get_dir_content}; use anyhow::{bail, Context}; use fs_err as fs; use fs_err::{DirEntry, File}; use install_wheel_rs::{CompatibleTags, InstallLocation, Script, SHEBANG_PYTHON}; use pep508_rs::MarkerEnvironment; -use serde::Serialize; use std::collections::{BTreeMap, HashMap}; use std::env::{current_dir, current_exe}; #[cfg(unix)] @@ -27,6 +25,9 @@ use std::process::Command; use std::{env, io}; use tempfile::TempDir; use tracing::{debug, info, trace, warn}; +use monotrail_utils::standalone_python::provision_python; +use crate::markers::marker_environment_from_python; +use serde::Serialize; #[derive(Debug, Clone, Copy, Eq, PartialEq)] enum LockfileType { @@ -657,8 +658,7 @@ pub fn is_python_script(executable: &Path) -> anyhow::Result { let mut executable_file = File::open(&executable) .context("the executable file was right there and is now unreadable ಠ_ಠ")?; // scripts might be binaries, so we read an exact number of bytes instead of the first line as string - let mut start = Vec::new(); - start.resize(SHEBANG_PYTHON.as_bytes().len(), 0); + let mut start = vec![0; SHEBANG_PYTHON.as_bytes().len()]; executable_file.read_exact(&mut start)?; let is_python_script = start == SHEBANG_PYTHON.as_bytes(); Ok(is_python_script) @@ -673,7 +673,7 @@ pub fn run_command( args: &[String], ) -> anyhow::Result { let (args, python_version) = determine_python_version(args, python_version)?; - let (python_context, python_home) = provision_python(python_version)?; + let (python_context, python_home) = provision_python_env(python_version)?; let (specs, root_scripts, lockfile, root) = load_specs(root, extras, &python_context)?; let finder_data = install(&specs, root_scripts, lockfile, Some(root), &python_context)?; @@ -760,20 +760,21 @@ pub fn run_command_finder_data( { // Sorry for the to_string_lossy all over the place // https://stackoverflow.com/a/38948854/3549270 - let executable_c_str = CString::new(script_path.to_string_lossy().as_bytes()) - .context("Failed to convert executable path")?; + // unwrap safety: Strings can never contain internal null bytes + let executable_c_str = CString::new(script_path.to_string_lossy().as_bytes()).unwrap(); let args_c_string = args .iter() .map(|arg| { - CString::new(arg.as_bytes()).context("Failed to convert executable argument") + // unwrap safety: Strings can never contain internal null bytes + CString::new(arg.as_bytes()).unwrap() }) - .collect::>>()?; + .collect::>(); // We replace the current process with the new process is it's like actually just running // the real thing. // Note the that this may launch a python script, a native binary or anything else - nix::unistd::execv(&executable_c_str, &args_c_string) - .context("Failed to launch process")?; + // unwrap safety: Infallible (that's the actual type) + nix::unistd::execv(&executable_c_str, &args_c_string).unwrap(); unreachable!() } #[cfg(windows)] @@ -808,7 +809,7 @@ pub fn cli_from_git( let trail_args = args[1..].to_vec(); let (trail_args, python_version) = determine_python_version(&trail_args, python_version.as_deref())?; - let (python_context, python_home) = provision_python(python_version)?; + let (python_context, python_home) = provision_python_env(python_version)?; let (specs, repo_dir, lockfile) = specs_from_git(git_url, revision, &extras, None, &python_context)?; @@ -841,3 +842,20 @@ pub fn cli_from_git( }; Ok(Some(exit_code)) } + +/// If a downloaded python version exists, return this, otherwise download and unpack a matching one +/// from indygreg/python-build-standalone +pub fn provision_python_env(python_version: (u8, u8)) -> anyhow::Result<(PythonContext, PathBuf)> { + let (python_binary, python_home) = provision_python(python_version, cache_dir()?.as_path())?; + + // TODO: Already init and use libpython here + let pep508_env = marker_environment_from_python(&python_binary); + let python_context = PythonContext { + sys_executable: python_binary, + version: python_version, + pep508_env, + launch_type: LaunchType::Binary, + }; + + Ok((python_context, python_home)) +} diff --git a/crates/monotrail/src/poetry_integration/run.rs b/crates/monotrail/src/poetry_integration/run.rs index 55b9f45..e53656e 100644 --- a/crates/monotrail/src/poetry_integration/run.rs +++ b/crates/monotrail/src/poetry_integration/run.rs @@ -5,7 +5,7 @@ use crate::monotrail::install; use crate::poetry_integration::poetry_lock::PoetryLock; use crate::poetry_integration::poetry_toml::PoetryPyprojectToml; use crate::read_poetry_specs; -use crate::standalone_python::provision_python; +use crate::monotrail::provision_python_env; use anyhow::Context; use std::collections::BTreeMap; use std::path::PathBuf; @@ -14,7 +14,7 @@ use std::path::PathBuf; /// argument pub fn poetry_run(args: &[String], python_version: Option<&str>) -> anyhow::Result { let (args, python_version) = determine_python_version(&args, python_version)?; - let (python_context, python_home) = provision_python(python_version)?; + let (python_context, python_home) = provision_python_env(python_version)?; let pyproject_toml = include_str!("../../../../resources/poetry_boostrap_lock/pyproject.toml"); let poetry_toml: PoetryPyprojectToml = toml::from_str(pyproject_toml).unwrap(); diff --git a/crates/monotrail/src/ppipx.rs b/crates/monotrail/src/ppipx.rs index 19c37cf..bb97097 100644 --- a/crates/monotrail/src/ppipx.rs +++ b/crates/monotrail/src/ppipx.rs @@ -3,7 +3,7 @@ use crate::poetry_integration::lock::poetry_resolve_from_dir; use crate::poetry_integration::poetry_toml; use crate::poetry_integration::poetry_toml::PoetryPyprojectToml; use crate::poetry_integration::read_dependencies::read_toml_files; -use crate::standalone_python::provision_python; +use crate::monotrail::provision_python_env; use crate::utils::data_local_dir; use crate::{parse_major_minor, read_poetry_specs, DEFAULT_PYTHON_VERSION}; use anyhow::Context; @@ -29,7 +29,7 @@ pub fn ppipx( .transpose()? .unwrap_or(DEFAULT_PYTHON_VERSION); - let (python_context, python_home) = provision_python(python_version)?; + let (python_context, python_home) = provision_python_env(python_version)?; let package = package.unwrap_or(command); let package_extras = if extras.is_empty() { package.to_string()