From aae0398129a214e9704504db0a54da7f819151c3 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Tue, 4 Feb 2025 16:23:01 -0600 Subject: [PATCH 01/19] feat(ci): fetch and configure for python-build-standalone binaries --- .circleci/config.yml | 26 +++ .../scripts/fetch-python-standalone.bash | 170 ++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100755 .circleci/scripts/fetch-python-standalone.bash diff --git a/.circleci/config.yml b/.circleci/config.yml index 0e8681d2275..695dd4c745c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -213,6 +213,30 @@ jobs: name: cargo nextest command: TEST_LOG= RUST_LOG=info RUST_LOG_SPAN_EVENTS=full RUST_BACKTRACE=1 cargo nextest run --workspace --failure-output immediate-final --no-fail-fast + # Fetch python-build-standalone for official builds + fetch-python: + machine: + image: ubuntu-2204:current + resource_class: medium + environment: + PBS_DATE: "20250106" + PBS_VERSION: "3.11.11" + steps: + - checkout + - run: + name: pull Python Build Standalone + command: | + .circleci/scripts/fetch-python-standalone.bash \ + "python-artifacts" \ + "${PBS_DATE}" \ + "${PBS_VERSION}" + - store_artifacts: + path: python-artifacts + - persist_to_workspace: + root: . + paths: + - python-artifacts + # Build a dev binary. # # Compiles a binary with the default ("dev") cargo profile from the influxdb3 source @@ -561,6 +585,8 @@ workflows: <<: *any_filter - test: <<: *any_filter + - fetch-python: + <<: *any_filter - build-dev: # This workflow requires secrets stored in the environment. # These are not passed to workflows executed on forked diff --git a/.circleci/scripts/fetch-python-standalone.bash b/.circleci/scripts/fetch-python-standalone.bash new file mode 100755 index 00000000000..0910f8eba1b --- /dev/null +++ b/.circleci/scripts/fetch-python-standalone.bash @@ -0,0 +1,170 @@ +#!/bin/bash +set -euo pipefail + +# See https://github.com/astral-sh/python-build-standalone/releases +# USAGE: +# fetch-python-standalone.bash +# +# Eg: +# $ fetch-python-standalone.bash ./python-artifacts 20250106 3.11.11 +# +# This script is meant to be called by CircleCI such that the specified +# is persisted to a workspace that is later attached at /tmp/workspace/. +# In this manner, build script can do something like: +# PYO3_CONFIG_FILE=/tmp/workspace//pyo3_config_file.txt cargo build... + +readonly DOWNLOAD_DIR="$1" + +# URLs are constructed from this. Eg: +# https://github.com/astral-sh/...//cpython-+-... +readonly PBS_DATE="$2" +readonly PBS_VERSION="$3" +readonly PBS_MAJ_MIN=${PBS_VERSION%.*} +readonly PBS_TOP_DIR="/tmp/workspace" + +# Official influxdb3 builds use python-build-standalone since it: +# - is built to run well as an embedded interpreter +# - has a good upstream maintenance story (https://github.com/astral-sh) with +# lots of users and corporate sponsor +# - should deliver a consistent experience across OSes and architectures +# +# python-build-standalone provides many different builds. Official influxdb3 +# build targets: +# - aarch64-apple-darwin +# - aarch64-unknown-linux-gnu +# - x86_64-unknown-linux-gnu +# - x86_64-pc-windows-msvc-shared +# +# Note: musl builds of python-build-standablone currently (as of 2025-02-04) +# have limitations: +# - don't support importing bre-built python wheels (must compile and link 3rd +# party extensions into the binary (influxdb3) +# - historical performance issues with python and musl +# - availability limited to x86_64 (no aarch64) +# +# References +# - https://github.com/astral-sh/python-build-standalone/blob/main/docs/distributions.rst +# - https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst +# - https://edu.chainguard.dev/chainguard/chainguard-images/about/images-compiled-programs/glibc-vs-musl/#python-builds +# - https://pythonspeed.com/articles/alpine-docker-python/ +readonly TARGETS="aarch64-apple-darwin aarch64-unknown-linux-gnu x86_64-unknown-linux-gnu x86_64-pc-windows-msvc-shared" + +fetch() { + target="$1" + suffix="${2}" + if [ "${suffix}" = "full.tar.zst" ]; then + if [ "${target}" = "x86_64-pc-windows-msvc-shared" ]; then + suffix="pgo-${2}" + else + suffix="debug-${2}" + fi + fi + binary="cpython-${PBS_VERSION}+${PBS_DATE}-${target}-${suffix}" + url="https://github.com/astral-sh/python-build-standalone/releases/download/${PBS_DATE}/${binary}" + + echo "Downloading ${binary}" + curl --proto '=https' --tlsv1.2 -sS -L "$url" -o "${DOWNLOAD_DIR}/${binary}" + + echo "Downloading ${binary}.sha256" + curl --proto '=https' --tlsv1.2 -sS -L "${url}.sha256" -o "${DOWNLOAD_DIR}/${binary}.sha256" + dl_sha=$(cut -d ' ' -f 1 "${DOWNLOAD_DIR}/${binary}.sha256") + if [ -z "$dl_sha" ]; then + echo "Could not find properly formatted SHA256 in '${DOWNLOAD_DIR}/${binary}.sha256'" + exit 1 + fi + + printf "Verifying %s: " "${binary}" + ch_sha=$(sha256sum "${DOWNLOAD_DIR}/${binary}" | cut -d ' ' -f 1) + if [ "$ch_sha" = "$dl_sha" ]; then + echo "OK" + else + echo "ERROR (${ch_sha} != ${dl_sha})" + exit 1 + fi + + echo "Unpacking ${binary} to '${DOWNLOAD_DIR}'" + UNPACK_DIR="${DOWNLOAD_DIR}/${target}" + if [ "${target}" = "x86_64-pc-windows-msvc-shared" ]; then + UNPACK_DIR="${DOWNLOAD_DIR}/x86_64-pc-windows-gnu" + fi + mkdir "${UNPACK_DIR}" 2>/dev/null || true + if [[ "${suffix}" = *full.tar.zst ]]; then + # we only need the licensing from the full distribution + tar -C "${UNPACK_DIR}" --zstd -xf "${DOWNLOAD_DIR}/${binary}" python/PYTHON.json python/licenses + mv "${UNPACK_DIR}/python/PYTHON.json" "${UNPACK_DIR}/python/licenses" + else + tar -C "${UNPACK_DIR}" -zxf "${DOWNLOAD_DIR}/${binary}" + fi + + echo "Removing ${binary}" + rm -f "${DOWNLOAD_DIR}/${binary}" "${DOWNLOAD_DIR}/${binary}.sha256" + + if [[ "${suffix}" = *install_only_stripped.tar.gz ]]; then + echo "Creating ${UNPACK_DIR}/pyo3_config_file.txt" + PYO3_CONFIG_FILE="${UNPACK_DIR}/pyo3_config_file.txt" + PBS_DIR="${PBS_TOP_DIR}"/$(basename "${DOWNLOAD_DIR}")/$(basename "${UNPACK_DIR}") + if [ "${target}" = "x86_64-pc-windows-msvc-shared" ]; then + cat > "${PYO3_CONFIG_FILE}" < "${PYO3_CONFIG_FILE}" < "${UNPACK_DIR}/python/LICENSE.md" <> "${UNPACK_DIR}/python/LICENSE.md" < Date: Wed, 5 Feb 2025 15:15:57 -0600 Subject: [PATCH 02/19] fix: make the process engine usable on windows --- influxdb3_processing_engine/src/virtualenv.rs | 39 ++++++++++++++----- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/influxdb3_processing_engine/src/virtualenv.rs b/influxdb3_processing_engine/src/virtualenv.rs index f16df2719d1..56ed71b83ea 100644 --- a/influxdb3_processing_engine/src/virtualenv.rs +++ b/influxdb3_processing_engine/src/virtualenv.rs @@ -17,7 +17,14 @@ pub enum VenvError { } fn get_python_version() -> Result<(u8, u8), std::io::Error> { - let output = Command::new("python3") + // linux/osx have python3, but windows only has python. Use python since it is in all of them + let python_exe = if cfg!(target_os = "windows") { + "python" + } else { + "python3" + }; + + let output = Command::new(python_exe) .args([ "-c", "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')", @@ -63,11 +70,17 @@ pub fn init_pyo3() { }); } -#[cfg(unix)] +// FIXME: this still doesn't work right on windows (sys.path isn't adding the +// venv's site-packages). Perhaps look at /path/to/venv/pyvenv.cfg? pub(crate) fn initialize_venv(venv_path: &Path) -> Result<(), VenvError> { use std::process::Command; - let activate_script = venv_path.join("bin").join("activate"); + let activate_script = if cfg!(target_os = "windows") { + venv_path.join("Scripts").join("activate") + } else { + venv_path.join("bin").join("activate") + }; + if !activate_script.exists() { return Err(VenvError::InitError(format!( "Activation script not found at {:?}", @@ -75,13 +88,19 @@ pub(crate) fn initialize_venv(venv_path: &Path) -> Result<(), VenvError> { ))); } - let output = Command::new("bash") - .arg("-c") - .arg(format!( - "source {} && env", - activate_script.to_str().unwrap() - )) - .output()?; + let output = if cfg!(target_os = "windows") { + Command::new("cmd") + .args(["/C", activate_script.to_str().unwrap()]) + .output()? + } else { + Command::new("bash") + .arg("-c") + .arg(format!( + "source {} && env", + activate_script.to_str().unwrap() + )) + .output()? + }; if !output.status.success() { return Err(VenvError::InitError( From 4c43985eecb41027cecc35f81848da4d4ad060c1 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Wed, 5 Feb 2025 14:26:55 -0600 Subject: [PATCH 03/19] feat(ci): build with python-build-standalone (and drop musl) --- .circleci/config.yml | 48 ++++++++++++++----- .circleci/packages/config.yaml | 9 ++-- .../influxdb3/fs/usr/lib/influxdb3/.keepdir | 0 3 files changed, 43 insertions(+), 14 deletions(-) create mode 100644 .circleci/packages/influxdb3/fs/usr/lib/influxdb3/.keepdir diff --git a/.circleci/config.yml b/.circleci/config.yml index 695dd4c745c..03db7d1804d 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -266,12 +266,23 @@ jobs: type: string steps: - checkout + - attach_workspace: + at: /tmp/workspace + - run: + name: Extract python for this target + command: | + tar -C /tmp/workspace/python-artifacts -zxvf /tmp/workspace/python-artifacts/all.tar.gz ./<< parameters.target >> + - run: + name: Show PYO3_CONFIG_FILE + command: cat /tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt - run: name: Install Target command: rustup target add << parameters.target >> - run: name: Cargo build - command: target-env cargo build --target=<< parameters.target >> --workspace + command: | + export PYO3_CONFIG_FILE=/tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt + target-env cargo build --target=<< parameters.target >> --features="system-py" --workspace - when: condition: not: @@ -319,19 +330,32 @@ jobs: default: release steps: - checkout + - attach_workspace: + at: /tmp/workspace + - run: + name: Extract python for this target + command: | + tar -C /tmp/workspace/python-artifacts -zxvf /tmp/workspace/python-artifacts/all.tar.gz ./<< parameters.target >> + - run: + name: Show PYO3_CONFIG_FILE + command: cat /tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt - run: name: Install Target command: rustup target add << parameters.target >> - run: name: Cargo release build - command: target-env cargo build --target=<< parameters.target >> --profile=<< parameters.profile >> --workspace + command: | + export PYO3_CONFIG_FILE=/tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt + target-env cargo build --target=<< parameters.target >> --features="system-py" --profile=<< parameters.profile >> --workspace # linking might take a while and doesn't produce CLI output no_output_timeout: 30m - run: name: tar and gzip build artifacts command: | mkdir -p artifacts - tar --ignore-failed-read -czvf "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar.gz" -C "${PWD}/target/<< parameters.target >>/<< parameters.profile >>" influxdb3{,.exe} + tar --ignore-failed-read -cvf "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar" -C "${PWD}/target/<< parameters.target >>/<< parameters.profile >>" influxdb3{,.exe} + tar --ignore-failed-read -rvf "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar" -C "/tmp/workspace/python-artifacts/<< parameters.target >>" python + gzip "${PWD}/artifacts/influxdb3-core_<< parameters.target >>.tar" - store_artifacts: path: artifacts - persist_to_workspace: @@ -543,6 +567,8 @@ workflows: version: 2 snapshot: jobs: + - fetch-python: + <<: *main_filter - build-release: <<: *main_filter name: build-snapshot-<< matrix.target >> @@ -553,10 +579,10 @@ workflows: target: - aarch64-apple-darwin - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-pc-windows-gnu - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl + requires: + - fetch-python - build-packages: <<: *main_filter requires: @@ -583,10 +609,10 @@ workflows: <<: *any_filter - cargo-audit: <<: *any_filter - - test: - <<: *any_filter - fetch-python: <<: *any_filter + - test: + <<: *any_filter - build-dev: # This workflow requires secrets stored in the environment. # These are not passed to workflows executed on forked @@ -599,10 +625,10 @@ workflows: target: - aarch64-apple-darwin - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-pc-windows-gnu - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl + requires: + - fetch-python - doc: <<: *any_filter - build-release: @@ -613,10 +639,10 @@ workflows: target: - aarch64-apple-darwin - aarch64-unknown-linux-gnu - - aarch64-unknown-linux-musl - x86_64-pc-windows-gnu - x86_64-unknown-linux-gnu - - x86_64-unknown-linux-musl + requires: + - fetch-python - build-packages: <<: *release_filter requires: diff --git a/.circleci/packages/config.yaml b/.circleci/packages/config.yaml index 5162612bf5b..71c34f54044 100644 --- a/.circleci/packages/config.yaml +++ b/.circleci/packages/config.yaml @@ -5,19 +5,19 @@ version: value: '3.0.0+snapshot-{{env.CIRCLE_SHA1[:8]}}' sources: - - binary: /tmp/workspace/artifacts/influxdb3-core_x86_64-unknown-linux-musl.tar.gz + - binary: /tmp/workspace/artifacts/influxdb3-core_x86_64-unknown-linux-gnu.tar.gz target: artifacts/ arch: amd64 plat: linux - - binary: /tmp/workspace/artifacts/influxdb3-core_aarch64-unknown-linux-musl.tar.gz + - binary: /tmp/workspace/artifacts/influxdb3-core_aarch64-unknown-linux-gnu.tar.gz target: artifacts/ arch: arm64 plat: linux - binary: /tmp/workspace/artifacts/influxdb3-core_aarch64-apple-darwin.tar.gz target: artifacts/ - arch: amd64 + arch: arm64 plat: darwin - binary: /tmp/workspace/artifacts/influxdb3-core_x86_64-pc-windows-gnu.tar.gz @@ -37,6 +37,9 @@ packages: binaries: - influxdb3 - influxdb3.exe + python-runtimes: + - source: python + target: usr/lib/influxdb3 extras: - source: LICENSE-APACHE target: usr/share/influxdb3/LICENSE-APACHE diff --git a/.circleci/packages/influxdb3/fs/usr/lib/influxdb3/.keepdir b/.circleci/packages/influxdb3/fs/usr/lib/influxdb3/.keepdir new file mode 100644 index 00000000000..e69de29bb2d From 90d75faad2ef1bf1e2aea7aa025b945752b837b4 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Mon, 10 Feb 2025 12:09:46 -0600 Subject: [PATCH 04/19] fix(ci): set rpath on Linux and libpath on OSX in ci --- .circleci/config.yml | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 03db7d1804d..2d3b1cece46 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -244,7 +244,7 @@ jobs: # Build a dev binary. build-dev: docker: - - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3:latest + - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3@sha256:63726f571865bfb13232006bbca7aac42d2178f4c19a3526a7e5ee02ada836f8 auth: username: _json_key password: $CISUPPORT_GCS_AUTHORIZATION @@ -305,7 +305,7 @@ jobs: # Compile cargo "release" profile binaries for influxdb3 edge releases build-release: docker: - - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3:latest + - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-cross-influxdb3@sha256:63726f571865bfb13232006bbca7aac42d2178f4c19a3526a7e5ee02ada836f8 auth: username: _json_key password: $CISUPPORT_GCS_AUTHORIZATION @@ -349,6 +349,36 @@ jobs: target-env cargo build --target=<< parameters.target >> --features="system-py" --profile=<< parameters.profile >> --workspace # linking might take a while and doesn't produce CLI output no_output_timeout: 30m + - when: + condition: + or: + - equal: [ << parameters.target >>, aarch64-unknown-linux-gnu ] + - equal: [ << parameters.target >>, x86_64-unknown-linux-gnu ] + steps: + - run: + # XXX: better to use 'cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/python/lib' + name: adjust RPATH for linux + command: | + # for tarballs + echo "Running: patchelf --add-rpath '$ORIGIN/python/lib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + patchelf --add-rpath '$ORIGIN/python/lib' "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" + # for deb/rpm installs + echo "Running: patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" + - when: + condition: + equal: [ << parameters.target >>, aarch64-apple-darwin ] + steps: + - run: + # XXX: better to use 'cargo:rustc-link-arg=-Wl,-rpath,@executable_path/python/lib' + name: adjust LC_LOAD_DYLIB path for darwin + command: | + export PBS_LIBPYTHON=$(grep '^lib_name=' /tmp/workspace/python-artifacts/<< parameters.target >>/pyo3_config_file.txt | cut -d = -f 2) + echo "Running: /osxcross/bin/aarch64-apple-darwin22.2-install_name_tool -change '/install/lib/lib${PBS_LIBPYTHON}.dylib' '@executable_path/python/lib/lib${PBS_LIBPYTHON}.dylib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + /osxcross/bin/aarch64-apple-darwin22.2-install_name_tool -change "/install/lib/lib${PBS_LIBPYTHON}.dylib" "@executable_path/python/lib/lib${PBS_LIBPYTHON}.dylib" "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" + # re-sign after install_name_tool since osxcross won't do it + echo "Running: /usr/local/bin/rcodesign sign '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + /usr/local/bin/rcodesign sign "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" - run: name: tar and gzip build artifacts command: | From 896f844fb382bfd23b999998bbac933375af2a38 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Mon, 10 Feb 2025 13:59:22 -0600 Subject: [PATCH 05/19] fix: set PYTHONHOME everywhere and PYTHONPATH on Windows --- influxdb3/src/main.rs | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/influxdb3/src/main.rs b/influxdb3/src/main.rs index 3c291b297a4..cb6b420ffb2 100644 --- a/influxdb3/src/main.rs +++ b/influxdb3/src/main.rs @@ -14,6 +14,8 @@ use dotenvy::dotenv; use influxdb3_clap_blocks::tokio::TokioIoConfig; use influxdb3_process::VERSION_STRING; use observability_deps::tracing::warn; +use std::env; +use std::path::{Path, PathBuf}; use trogging::{ cli::LoggingConfigBuilderExt, tracing_subscriber::{prelude::*, Registry}, @@ -120,6 +122,12 @@ fn main() -> Result<(), std::io::Error> { #[cfg(unix)] install_crash_handler(); // attempt to render a useful stacktrace to stderr + #[cfg(feature = "system-py")] + set_pythonhome(); + + #[cfg(all(target_os = "windows", feature = "system-py"))] + set_pythonpath(); + // load all environment variables from .env before doing anything load_dotenv(); @@ -301,3 +309,62 @@ fn init_logs_and_tracing( let subscriber = Registry::default().with(layers); trogging::install_global(subscriber) } + +// XXX: this should be somewhere more appropriate +#[cfg(feature = "system-py")] +fn set_pythonhome() { + // This would ideally be detected by pyo3, but it isn't + match env::var("PYTHONHOME") { + Ok(_) => {} + Err(env::VarError::NotPresent) => { + let exe_path = env::current_exe().unwrap(); + let exe_dir = exe_path.parent().unwrap(); + + let pythonhome: PathBuf = if cfg!(target_os = "linux") + && (exe_dir == Path::new("/usr/bin") || exe_dir == Path::new("/usr/local/bin")) + { + // Official Linux builds may be in /usr or /usr/local + // XXX: handle this for local build and install (eg DESTDIR) + let parent_dir = exe_dir.parent().unwrap(); + parent_dir.join("lib/influxdb3/python") + } else { + exe_dir.join("python") + }; + + if pythonhome.is_dir() { + unsafe { env::set_var("PYTHONHOME", pythonhome.to_str().unwrap()) }; + //println!("Set PYTHONHOME to '{}'", env::var("PYTHONHOME").unwrap()); + } else { + // TODO: use logger + eprintln!("Could not find python installation. May need to set PYTHONHOME"); + } + } + Err(e) => { + eprintln!("Failed to retrieve PYTHONHOME: {e}"); + } + }; +} + +// XXX: this should be somewhere more appropriate +#[cfg(target_os = "windows")] +fn set_pythonpath() { + let exe_path = env::current_exe().unwrap(); + let exe_dir = exe_path.parent().unwrap(); + let pythonpath = exe_dir.join("python/Lib"); + + // This shouldn't be needed, but it is on Windows + match env::var("PYTHONPATH") { + Ok(v) => { + let new_path = format!("{};{}", pythonpath.display(), v); + unsafe { env::set_var("PYTHONPATH", &new_path) }; + //println!("Updated PYTHONPATH to: {}", env::var("PYTHONPATH").unwrap()); + } + Err(env::VarError::NotPresent) => { + unsafe { env::set_var("PYTHONPATH", &pythonpath) }; + //println!("Updated PYTHONPATH to: {}", env::var("PYTHONPATH").unwrap()); + } + Err(e) => { + eprintln!("Failed to retrieve PYTHONPATH: {e}"); + } + } +} From 444cf8dc981022fe1a8334cc274151ec79260d94 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Tue, 11 Feb 2025 13:44:48 -0600 Subject: [PATCH 06/19] chore(ci): update to use more recent ci-packager-next --- .circleci/config.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2d3b1cece46..c684ee6a053 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -394,7 +394,7 @@ jobs: - artifacts build-packages: docker: - - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-packager-next:latest + - image: us-east1-docker.pkg.dev/influxdata-team-edge/ci-support/ci-packager-next@sha256:db0cd91a5445c4287154cea1d4d5566735cb0d3b7b9e2a95724a83f9d979d497 auth: username: _json_key password: $CISUPPORT_GCS_AUTHORIZATION From e708d4a7fa842dd063b09b0abaf00d02aedaab93 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Tue, 11 Feb 2025 15:02:08 -0600 Subject: [PATCH 07/19] fix(ci): adjust validate to allow certain dynamically linked libraries --- .circleci/scripts/package-validation/validate | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/.circleci/scripts/package-validation/validate b/.circleci/scripts/package-validation/validate index f6ca77b0830..f76e5555479 100755 --- a/.circleci/scripts/package-validation/validate +++ b/.circleci/scripts/package-validation/validate @@ -79,12 +79,25 @@ fi if [[ "${NEEDED:-}" ]] then - cat <<'EOF' + if echo "$NEEDED" | grep -Eq "Shared library: \[libpython" ; then + # if have libpython, ensure we are only linking things we expect + if echo "$NEEDED" | grep -Ev "Shared library: \[(ld-linux.*|libc|libdl|libgcc_s|libm|libpthread|libpython3.[0-9]{2})\.so" ; then + cat <<'EOF' +ERROR: found unexpected dynamically linked libraries! This may + prevent all platforms from running influxdb3 without + installing these dependencies. +EOF + exit 2 + fi + else + # if no libpython, then complain if any are NEEDED + cat <<'EOF' ERROR: influxdb3 not statically linked! This may prevent all platforms from running influxdb3 without installing separate dependencies. EOF - exit 2 + exit 2 + fi fi printf 'Finished validating influxdb3!\n' From 361e70618f5ef3573c90ea082dd41fbf68110ab9 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Tue, 11 Feb 2025 15:09:23 -0600 Subject: [PATCH 08/19] chore: remove install_influxdb.sh (using install_influxdb3.sh instead) --- install_influxdb.sh | 394 -------------------------------------------- 1 file changed, 394 deletions(-) delete mode 100644 install_influxdb.sh diff --git a/install_influxdb.sh b/install_influxdb.sh deleted file mode 100644 index 6acbd09f043..00000000000 --- a/install_influxdb.sh +++ /dev/null @@ -1,394 +0,0 @@ -#!/bin/sh -e - -readonly GREEN='\033[0;32m' -readonly BOLD='\033[1m' -readonly BOLDGREEN='\033[1;32m' -readonly DIM='\033[2m' -readonly NC='\033[0m' # No Color - -ARCHITECTURE=$(uname -m) -ARTIFACT="" -IS_MUSL="" -OS="" -INSTALL_LOC=~/.influxdb -BINARY_NAME="influxdb3" -PORT=8181 - -EDITION="Core" -EDITION_TAG="core" -if [ "$1" = "enterprise" ]; then - EDITION="Enterprise" - EDITION_TAG="enterprise" - shift 1 -fi - -### OS AND ARCHITECTURE DETECTION ### -case "$(uname -s)" in - Linux*) OS="Linux";; - Darwin*) OS="Darwin";; - *) OS="UNKNOWN";; -esac - -if [ "${OS}" = "Linux" ]; then - # ldd is a shell script but on some systems (eg Ubuntu) security hardening - # prevents it from running when invoked directly. Since we only want to - # use '--verbose', find the path to ldd, then invoke under sh to bypass ldd - # hardening. - # XXX: use 'uname -o | grep GNU' instead? - ldd_exec=$(command -v ldd) - if [ "${ARCHITECTURE}" = "x86_64" ] || [ "${ARCHITECTURE}" = "amd64" ]; then - # Check if we're on a GNU/Linux system, otherwise default to musl - if [ -n "$ldd_exec" ] && sh -c "$ldd_exec --version" 2>&1 | grep -Eq "(GNU|GLIBC)"; then - ARTIFACT="x86_64-unknown-linux-gnu" - else - ARTIFACT="x86_64-unknown-linux-musl" - IS_MUSL="yes" - fi - elif [ "${ARCHITECTURE}" = "aarch64" ] || [ "${ARCHITECTURE}" = "arm64" ]; then - if [ -n "$ldd_exec" ] && sh -c "$ldd_exec --version" 2>&1 | grep -Eq "(GNU|GLIBC)"; then - ARTIFACT="aarch64-unknown-linux-gnu" - else - ARTIFACT="aarch64-unknown-linux-musl" - IS_MUSL="yes" - fi - fi -elif [ "${OS}" = "Darwin" ]; then - if [ "${ARCHITECTURE}" = "x86_64" ]; then - printf "Intel Mac support is coming soon!\n" - printf "Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" - printf "View alternative binaries on our Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" - exit 1 - else - ARTIFACT="aarch64-apple-darwin" - fi -fi - -# Exit if unsupported system -[ -n "${ARTIFACT}" ] || { - printf "Unfortunately this script doesn't support your '${OS}' | '${ARCHITECTURE}' setup, or was unable to identify it correctly.\n" - printf "Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" - printf "View alternative binaries on our Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" - exit 1 -} - -URL="https://dl.influxdata.com/influxdb/snapshots/influxdb3-${EDITION_TAG}_${ARTIFACT}.tar.gz" - -START_TIME=$(date +%s) - -# Attempt to clear screen and show welcome message -clear 2>/dev/null || true # clear isn't available everywhere -printf "┌───────────────────────────────────────────────────┐\n" -printf "│ ${BOLD}Welcome to InfluxDB!${NC} We'll make this quick. │\n" -printf "└───────────────────────────────────────────────────┘\n" - -echo -printf "${BOLD}Select Installation Type${NC}\n" -echo -printf "1) ${GREEN}Docker Image${NC} ${DIM}(More Powerful, More Complex)${NC}\n" -printf " ├─ Requires knowledge of Docker and Docker management\n" -printf " └─ Includes the Processing Engine for real-time data transformation,\n" -printf " enrichment, and general custom Python code execution.\n\n" -printf "2) ${GREEN}Simple Download${NC} ${DIM}(Automated Install, Quick Setup)${NC}\n" -printf " ├─ No external dependencies required\n" -printf " └─ The Processing Engine will be available soon for binary installations,\n" -printf " bringing the same powerful processing capabilities to local deployments.\n" -echo -printf "Enter your choice (1-2): " -read -r INSTALL_TYPE - -case "$INSTALL_TYPE" in - 1) - printf "\n\n${BOLD}Download and Tag Docker Image${NC}\n" - printf "├─ ${DIM}docker pull quay.io/influxdb/influxdb3-${EDITION_TAG}:latest${NC}\n" - printf "└─ ${DIM}docker tag quay.io/influxdb/influxdb3-${EDITION_TAG}:latest influxdb3-${EDITION_TAG}${NC}\n\n" - if ! docker pull "quay.io/influxdb/influxdb3-${EDITION_TAG}:latest"; then - printf "└─ Error: Failed to download Docker image.\n" - exit 1 - fi - docker tag quay.io/influxdb/influxdb3-${EDITION_TAG}:latest influxdb3-${EDITION_TAG} - # Exit script after Docker installation - echo - printf "${BOLD}NEXT STEPS${NC}\n" - printf "1) Run the Docker image:\n" - printf " ├─ ${BOLD}mkdir plugins${NC} ${DIM}(To store and access plugins)${NC}\n" - printf " └─ ${BOLD}docker run -it -p ${PORT}:${PORT} -v ./plugins:/plugins influxdb3-${EDITION_TAG} serve --object-store memory --node-id node0 --plugin-dir /plugins${NC} ${DIM}(To start)${NC}\n" - printf "2) View documentation at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}\n\n" - - END_TIME=$(date +%s) - DURATION=$((END_TIME - START_TIME)) - - out=" Time is everything. This process took $DURATION seconds. " - mid="" - for _ in $(seq 1 ${#out}); do - mid="${mid}─" - done - printf "┌%s┐\n" "$mid" - printf "│%s│\n" "$out" - printf "└%s┘\n" "$mid" - exit 0 - ;; - 2) - printf "\n\n" - ;; - *) - printf "Invalid choice. Defaulting to binary installation.\n\n" - ;; -esac - -# attempt to find the user's shell config -shellrc= -if [ -n "$SHELL" ]; then - tmp=~/.$(basename "$SHELL")rc - if [ -e "$tmp" ]; then - shellrc="$tmp" - fi -fi - -printf "${BOLD}Downloading InfluxDB 3 %s to %s${NC}\n" "$EDITION" "$INSTALL_LOC" -printf "├─${DIM} mkdir -p '%s'${NC}\n" "$INSTALL_LOC" -mkdir -p "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s' -o '%s/influxdb3.tar.gz'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}" -o "$INSTALL_LOC/influxdb3.tar.gz" - -echo -printf "${BOLD}Verifying '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s.sha256' -o '%s/influxdb3.tar.gz.sha256'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}.sha256" -o "$INSTALL_LOC/influxdb3.tar.gz.sha256" -dl_sha=$(cut -d ' ' -f 1 "$INSTALL_LOC/influxdb3.tar.gz.sha256" | grep -E '^[0-9a-f]{64}$') -if [ -z "$dl_sha" ]; then - printf "Could not find properly formatted SHA256 in '%s/influxdb3.tar.gz.sha256'. Aborting.\n" "$INSTALL_LOC" - exit 1 -fi -printf "└─${DIM} sha256sum '%s/influxdb3.tar.gz'" "$INSTALL_LOC" -ch_sha=$(sha256sum "$INSTALL_LOC/influxdb3.tar.gz" | cut -d ' ' -f 1) -if [ "$ch_sha" = "$dl_sha" ]; then - printf " (OK: %s = %s)${NC}\n" "$ch_sha" "$dl_sha" -else - printf " (ERROR: %s != %s). Aborting.${NC}\n" "$ch_sha" "$dl_sha" - exit 1 -fi -printf "└─${DIM} rm '%s/influxdb3.tar.gz.sha256'${NC}\n" "$INSTALL_LOC" -rm "$INSTALL_LOC/influxdb3.tar.gz.sha256" - -echo -printf "${BOLD}Extracting and Processing${NC}\n" -printf "├─${DIM} tar -xf '%s/influxdb3.tar.gz' -C '%s'${NC}\n" "$INSTALL_LOC" "$INSTALL_LOC" -tar -xf "$INSTALL_LOC/influxdb3.tar.gz" -C "$INSTALL_LOC" -printf "└─${DIM} rm '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" -rm "$INSTALL_LOC/influxdb3.tar.gz" - -if [ -n "$shellrc" ] && ! grep -q "export PATH=.*$INSTALL_LOC" "$shellrc"; then - echo - printf "${BOLD}Adding InfluxDB to '%s'${NC}\n" "$shellrc" - printf "└─${DIM} export PATH=\"\$PATH:%s/\" >> '%s'${NC}\n" "$INSTALL_LOC" "$shellrc" - echo "export PATH=\"\$PATH:$INSTALL_LOC/\"" >> "$shellrc" -fi - -if [ "${EDITION}" = "Core" ]; then - # Prompt user to start the service - echo - printf "${BOLD}Configuration Options${NC}\n" - - - printf "└─ Start InfluxDB Now? (y/n): " - read -r START_SERVICE - if echo "$START_SERVICE" | grep -q "^[Yy]$" ; then - # Prompt for Node ID - echo - printf "${BOLD}Enter Your Node ID${NC}\n" - printf "├─ A Node ID is a unique, uneditable identifier for a service.\n" - printf "└─ Enter a Node ID (default: node0): " - read -r NODE_ID - NODE_ID=${NODE_ID:-node0} - - # Prompt for storage solution - echo - printf "${BOLD}Select Your Storage Solution${NC}\n" - printf "├─ 1) In-memory storage (Fastest, data cleared on restart)\n" - printf "├─ 2) File storage (Persistent local storage)\n" - printf "├─ 3) Object storage (Cloud-compatible storage)\n" - printf "└─ Enter your choice (1-3): " - read -r STORAGE_CHOICE - - case "$STORAGE_CHOICE" in - 1) - STORAGE_TYPE="memory" - STORAGE_FLAGS="--object-store=memory" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - 2) - STORAGE_TYPE="File Storage" - echo - printf "Enter storage path (default: %s/data): " "${INSTALL_LOC}" - read -r STORAGE_PATH - STORAGE_PATH=${STORAGE_PATH:-"${INSTALL_LOC}/data"} - STORAGE_FLAGS="--object-store=file --data-dir ${STORAGE_PATH}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - 3) - STORAGE_TYPE="Object Storage" - echo - printf "${BOLD}Select Cloud Provider${NC}\n" - printf "├─ 1) Amazon S3\n" - printf "├─ 2) Azure Storage\n" - printf "├─ 3) Google Cloud Storage\n" - printf "└─ Enter your choice (1-3): " - read -r CLOUD_CHOICE - - case $CLOUD_CHOICE in - 1) # AWS S3 - echo - printf "${BOLD}AWS S3 Configuration${NC}\n" - printf "├─ Enter AWS Access Key ID: " - read -r AWS_KEY - - printf "├─ Enter AWS Secret Access Key: " - stty -echo - read -r AWS_SECRET - stty echo - - echo - printf "├─ Enter S3 Bucket: " - read -r AWS_BUCKET - - printf "└─ Enter AWS Region (default: us-east-1): " - read -r AWS_REGION - AWS_REGION=${AWS_REGION:-"us-east-1"} - - STORAGE_FLAGS="--object-store=s3 --bucket=${AWS_BUCKET}" - if [ -n "$AWS_REGION" ]; then - STORAGE_FLAGS="$STORAGE_FLAGS --aws-default-region=${AWS_REGION}" - fi - STORAGE_FLAGS="$STORAGE_FLAGS --aws-access-key-id=${AWS_KEY}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS --aws-secret-access-key=..." - STORAGE_FLAGS="$STORAGE_FLAGS --aws-secret-access-key=${AWS_SECRET}" - ;; - - 2) # Azure Storage - echo - printf "${BOLD}Azure Storage Configuration${NC}\n" - printf "├─ Enter Storage Account Name: " - read -r AZURE_ACCOUNT - - printf "└─ Enter Storage Access Key: " - stty -echo - read -r AZURE_KEY - stty echo - - echo - STORAGE_FLAGS="--object-store=azure --azure-storage-account=${AZURE_ACCOUNT}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS --azure-storage-access-key=..." - STORAGE_FLAGS="$STORAGE_FLAGS --azure-storage-access-key=${AZURE_KEY}" - ;; - - 3) # Google Cloud Storage - echo - printf "${BOLD}Google Cloud Storage Configuration${NC}\n" - printf "└─ Enter path to service account JSON file: " - read -r GOOGLE_SA - STORAGE_FLAGS="--object-store=google --google-service-account=${GOOGLE_SA}" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - - *) - printf "Invalid cloud provider choice. Defaulting to file storage.\n" - STORAGE_TYPE="File Storage" - STORAGE_FLAGS="--object-store=file --data-dir ${INSTALL_LOC}/data" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - esac - ;; - - *) - printf "Invalid choice. Defaulting to in-memory.\n" - STORAGE_TYPE="Memory" - STORAGE_FLAGS="--object-store=memory" - STORAGE_FLAGS_ECHO="$STORAGE_FLAGS" - ;; - esac - - # Ensure port is available; if not, find a new one. If IS_MUSL is set, - # assume we are on a busybox-like system whose lsof doesn't support the - # args we need - lsof_exec=$(command -v lsof) && { - while [ -n "$lsof_exec" ] && [ "$IS_MUSL" != "yes" ] && lsof -i:"$PORT" -t >/dev/null 2>&1; do - printf "├─${DIM} Port %s is in use. Finding new port.${NC}\n" "$PORT" - PORT=$((PORT + 1)) - if [ "$PORT" -gt 32767 ]; then - printf "└─${DIM} Could not find an available port. Aborting.${NC}\n" - exit 1 - fi - if ! "$lsof_exec" -i:"$PORT" -t >/dev/null 2>&1; then - printf "└─${DIM} Found an available port: %s${NC}\n" "$PORT" - break - fi - done - } - - # Start and give up to 30 seconds to respond - echo - printf "${BOLD}Starting InfluxDB${NC}\n" - printf "├─${DIM} Node ID: %s${NC}\n" "$NODE_ID" - printf "├─${DIM} Storage: %s${NC}\n" "$STORAGE_TYPE" - printf "├─${DIM} '%s' serve --node-id='%s' --http-bind='0.0.0.0:%s' %s${NC}\n" "$INSTALL_LOC/$BINARY_NAME" "$NODE_ID" "$PORT" "$STORAGE_FLAGS_ECHO" - "$INSTALL_LOC/$BINARY_NAME" serve --node-id="$NODE_ID" --http-bind="0.0.0.0:$PORT" $STORAGE_FLAGS > /dev/null & - PID="$!" - - SUCCESS=0 - for _ in $(seq 1 30); do - # on systems without a usable lsof, sleep a second to see if the pid is - # still there to give influxdb a chance to error out in case an already - # running influxdb is running on this port - if [ -z "$lsof_exec" ] || [ "$IS_MUSL" = "yes" ]; then - sleep 1 - fi - - if ! kill -0 "$PID" 2>/dev/null ; then - break - fi - - if curl --max-time 3 -s "http://localhost:$PORT/health" >/dev/null 2>&1; then - printf "└─${BOLDGREEN} ✓ InfluxDB 3 ${EDITION} is now installed and running on port %s. Nice!${NC}\n" "$PORT" - SUCCESS=1 - break - fi - sleep 1 - done - - if [ $SUCCESS -eq 0 ]; then - printf "└─${BOLD} ERROR: InfluxDB failed to start; check permissions or other potential issues.${NC}\n" "$PORT" - exit 1 - fi - - else - echo - printf "${BOLDGREEN}✓ InfluxDB 3 ${EDITION} is now installed. Nice!${NC}\n" - fi -else - echo - printf "${BOLDGREEN}✓ InfluxDB 3 ${EDITION} is now installed. Nice!${NC}\n" -fi - -### SUCCESS INFORMATION ### -echo -printf "${BOLD}Further Info${NC}\n" -if [ -n "$shellrc" ]; then - printf "├─ Run ${BOLD}source '%s'${NC}, then access InfluxDB with ${BOLD}influxdb3${NC} command.\n" "$shellrc" -else - printf "├─ Access InfluxDB with the ${BOLD}%s${NC} command.\n" "$INSTALL_LOC/$BINARY_NAME" -fi -printf "├─ View the Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" -printf "└─ Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" -echo - -END_TIME=$(date +%s) -DURATION=$((END_TIME - START_TIME)) - -out=" Time is everything. This process took $DURATION seconds. " -mid="" -for _ in $(seq 1 ${#out}); do - mid="${mid}─" -done -printf "┌%s┐\n" "$mid" -printf "│%s│\n" "$out" -printf "└%s┘\n" "$mid" From 0bf083b1f294211e3feceaaa3f3ac5613bad246f Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Tue, 11 Feb 2025 15:56:07 -0600 Subject: [PATCH 09/19] chore(install_influxdb3.sh): update for processing engine and release builds --- install_influxdb3.sh | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/install_influxdb3.sh b/install_influxdb3.sh index 6acbd09f043..72b983a08e6 100644 --- a/install_influxdb3.sh +++ b/install_influxdb3.sh @@ -85,13 +85,9 @@ echo printf "${BOLD}Select Installation Type${NC}\n" echo printf "1) ${GREEN}Docker Image${NC} ${DIM}(More Powerful, More Complex)${NC}\n" -printf " ├─ Requires knowledge of Docker and Docker management\n" -printf " └─ Includes the Processing Engine for real-time data transformation,\n" -printf " enrichment, and general custom Python code execution.\n\n" +printf " └─ Requires knowledge of Docker and Docker management\n" printf "2) ${GREEN}Simple Download${NC} ${DIM}(Automated Install, Quick Setup)${NC}\n" -printf " ├─ No external dependencies required\n" -printf " └─ The Processing Engine will be available soon for binary installations,\n" -printf " bringing the same powerful processing capabilities to local deployments.\n" +printf " └─ No external dependencies required\n" echo printf "Enter your choice (1-2): " read -r INSTALL_TYPE @@ -147,13 +143,13 @@ fi printf "${BOLD}Downloading InfluxDB 3 %s to %s${NC}\n" "$EDITION" "$INSTALL_LOC" printf "├─${DIM} mkdir -p '%s'${NC}\n" "$INSTALL_LOC" mkdir -p "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s' -o '%s/influxdb3.tar.gz'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}" -o "$INSTALL_LOC/influxdb3.tar.gz" +printf "└─${DIM} curl -sSL '%s' -o '%s/influxdb3.tar.gz'${NC}\n" "${URL}" "$INSTALL_LOC" +curl -sSL "${URL}" -o "$INSTALL_LOC/influxdb3.tar.gz" echo printf "${BOLD}Verifying '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" -printf "└─${DIM} curl -sS '%s.sha256' -o '%s/influxdb3.tar.gz.sha256'${NC}\n" "${URL}" "$INSTALL_LOC" -curl -sS "${URL}.sha256" -o "$INSTALL_LOC/influxdb3.tar.gz.sha256" +printf "└─${DIM} curl -sSL '%s.sha256' -o '%s/influxdb3.tar.gz.sha256'${NC}\n" "${URL}" "$INSTALL_LOC" +curl -sSL "${URL}.sha256" -o "$INSTALL_LOC/influxdb3.tar.gz.sha256" dl_sha=$(cut -d ' ' -f 1 "$INSTALL_LOC/influxdb3.tar.gz.sha256" | grep -E '^[0-9a-f]{64}$') if [ -z "$dl_sha" ]; then printf "Could not find properly formatted SHA256 in '%s/influxdb3.tar.gz.sha256'. Aborting.\n" "$INSTALL_LOC" @@ -172,8 +168,15 @@ rm "$INSTALL_LOC/influxdb3.tar.gz.sha256" echo printf "${BOLD}Extracting and Processing${NC}\n" -printf "├─${DIM} tar -xf '%s/influxdb3.tar.gz' -C '%s'${NC}\n" "$INSTALL_LOC" "$INSTALL_LOC" -tar -xf "$INSTALL_LOC/influxdb3.tar.gz" -C "$INSTALL_LOC" + +# some tarballs have a leading component, check for that +TAR_LEVEL=0 +if tar -tf "$INSTALL_LOC/influxdb3.tar.gz" | grep -q '[a-zA-Z0-9]/influxdb3$' ; then + TAR_LEVEL=1 +fi +printf "├─${DIM} tar -xf '%s/influxdb3.tar.gz' --strip-components=${TAR_LEVEL} -C '%s'${NC}\n" "$INSTALL_LOC" "$INSTALL_LOC" +tar -xf "$INSTALL_LOC/influxdb3.tar.gz" --strip-components="${TAR_LEVEL}" -C "$INSTALL_LOC" + printf "└─${DIM} rm '%s/influxdb3.tar.gz'${NC}\n" "$INSTALL_LOC" rm "$INSTALL_LOC/influxdb3.tar.gz" @@ -378,7 +381,9 @@ else printf "├─ Access InfluxDB with the ${BOLD}%s${NC} command.\n" "$INSTALL_LOC/$BINARY_NAME" fi printf "├─ View the Getting Started guide at \033[4;94mhttps://docs.influxdata.com/influxdb3/${EDITION_TAG}/${NC}.\n" -printf "└─ Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" +printf "├─ Visit our public Discord at \033[4;94mhttps://discord.gg/az4jPm8x${NC} for additional guidance.\n" +printf "└─ The Processing Engine is now included for real-time data transformation,\n" +printf " enrichment, and general custom Python code execution.\n\n" echo END_TIME=$(date +%s) From 277252eb5c0227782f2687e784dd1b3b5c258373 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Wed, 12 Feb 2025 08:56:32 -0600 Subject: [PATCH 10/19] fix: temporarily use rpm --nodeps until compile with old GLIBC --- .circleci/scripts/package-validation/validate | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.circleci/scripts/package-validation/validate b/.circleci/scripts/package-validation/validate index f76e5555479..d8bfbe08d23 100755 --- a/.circleci/scripts/package-validation/validate +++ b/.circleci/scripts/package-validation/validate @@ -45,7 +45,11 @@ install_rpm() { # see "install_deb" for "update" yum update -y yum install -y binutils - yum localinstall -y "$(realpath "${PACKAGE_PATH}")" + # temporary install with rpm --nodeps until we compile with older glibc + #yum localinstall -y "$(realpath "${PACKAGE_PATH}")" + yum install -y shadow-utils # for useradd + yum install -y libxcrypt-compat # for libcrypt.so.1 + rpm -ivh --nodeps "$(realpath "${PACKAGE_PATH}")" } case ${PACKAGE_TYPE} From f4a58623c45ff0fcbad96f621a746ff39fe06b8f Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Wed, 12 Feb 2025 11:25:39 -0600 Subject: [PATCH 11/19] feat(ci): build docker with python-build-standalone --- .circleci/config.yml | 18 +++++++++++-- .circleci/scripts/docker_build_release.bash | 10 ++++--- Dockerfile | 30 +++++++++++++++++---- Dockerfile.dockerignore | 1 + 4 files changed, 49 insertions(+), 10 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index c684ee6a053..f49a19e67d2 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -218,6 +218,10 @@ jobs: machine: image: ubuntu-2204:current resource_class: medium + # environment variables for python-build-standalone. Should correspond to + # https://github.com/astral-sh/python-build-standalone/releases. See + # scripts/fetch-python-standalone.bash for details. This should match the + # 'build-docker' job, below. environment: PBS_DATE: "20250106" PBS_VERSION: "3.11.11" @@ -226,10 +230,11 @@ jobs: - run: name: pull Python Build Standalone command: | + echo "PBS_DATE=$PBS_DATE" .circleci/scripts/fetch-python-standalone.bash \ "python-artifacts" \ - "${PBS_DATE}" \ - "${PBS_VERSION}" + "$PBS_DATE" \ + "$PBS_VERSION" - store_artifacts: path: python-artifacts - persist_to_workspace: @@ -535,6 +540,13 @@ jobs: image_name: type: string default: influxdb3-core + # environment variables for python-build-standalone. Should correspond to + # https://github.com/astral-sh/python-build-standalone/releases. See + # scripts/fetch-python-standalone.bash for details. This should match + # the 'fetch-python' job, above. + environment: + PBS_DATE: "20250106" + PBS_VERSION: "3.11.11" machine: image: default resource_class: << parameters.resource_class >> @@ -555,6 +567,8 @@ jobs: "influxdb3" \ "aws,gcp,azure,jemalloc_replacing_malloc,tokio_console,system-py" \ "<< parameters.image_name >>:latest-<< parameters.platform >>" \ + "$PBS_DATE" \ + "$PBS_VERSION" \ "<< parameters.platform >>" \ "$DOCKER_PROFILE" diff --git a/.circleci/scripts/docker_build_release.bash b/.circleci/scripts/docker_build_release.bash index abe7c1eb69c..045a8eb0276 100755 --- a/.circleci/scripts/docker_build_release.bash +++ b/.circleci/scripts/docker_build_release.bash @@ -5,8 +5,10 @@ set -euo pipefail readonly PACKAGE="$1" readonly FEATURES="$2" readonly TAG="$3" -readonly ARCH="${4:-amd64}" # Default to amd64 if not specified -readonly PROFILE="${5:-release}" # Default to release if not specified +readonly PBS_DATE="$4" +readonly PBS_VERSION="$5" +readonly ARCH="${6:-amd64}" # Default to amd64 if not specified +readonly PROFILE="${7:-release}" # Default to release if not specified RUST_VERSION="$(sed -E -ne 's/channel = "(.*)"/\1/p' rust-toolchain.toml)" COMMIT_SHA="$(git rev-parse HEAD)" @@ -24,6 +26,8 @@ exec docker buildx build \ --build-arg RUST_VERSION="$RUST_VERSION" \ --build-arg PACKAGE="$PACKAGE" \ --build-arg PROFILE="$PROFILE" \ + --build-arg PBS_DATE="$PBS_DATE" \ + --build-arg PBS_VERSION="$PBS_VERSION" \ --platform "$PLATFORM" \ --label org.opencontainers.image.created="$NOW" \ --label org.opencontainers.image.url="$REPO_URL" \ @@ -35,4 +39,4 @@ exec docker buildx build \ --label com.influxdata.image.package="$PACKAGE" \ --progress plain \ --tag "$TAG" \ - . \ No newline at end of file + . diff --git a/Dockerfile b/Dockerfile index 0bb3044a665..3ded96b702a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,9 +6,10 @@ FROM rust:${RUST_VERSION}-slim-bookworm as build USER root RUN apt update \ - && apt install --yes binutils build-essential pkg-config libssl-dev clang lld git protobuf-compiler python3 python3-dev python3-pip \ + && apt install --yes binutils build-essential curl pkg-config libssl-dev clang lld git patchelf protobuf-compiler zstd \ && rm -rf /var/lib/{apt,dpkg,cache,log} + # Build influxdb3 COPY . /influxdb3 WORKDIR /influxdb3 @@ -18,11 +19,24 @@ ARG CARGO_NET_GIT_FETCH_WITH_CLI=false ARG PROFILE=release ARG FEATURES=aws,gcp,azure,jemalloc_replacing_malloc,system-py ARG PACKAGE=influxdb3 +ARG PBS_DATE=unset +ARG PBS_VERSION=unset ENV CARGO_INCREMENTAL=$CARGO_INCREMENTAL \ CARGO_NET_GIT_FETCH_WITH_CLI=$CARGO_NET_GIT_FETCH_WITH_CLI \ PROFILE=$PROFILE \ FEATURES=$FEATURES \ - PACKAGE=$PACKAGE + PACKAGE=$PACKAGE \ + PBS_TARGET=x86_64-unknown-linux-gnu \ + PBS_DATE=$PBS_DATE \ + PBS_VERSION=$PBS_VERSION + +# obtain python-build-standalone and configure PYO3_CONFIG_FILE +RUN \ + sed -i "s/^readonly TARGETS=.*/readonly TARGETS=${PBS_TARGET}/" ./.circleci/scripts/fetch-python-standalone.bash && \ + ./.circleci/scripts/fetch-python-standalone.bash /influxdb3/python-artifacts "${PBS_DATE}" "${PBS_VERSION}" && \ + tar -C /influxdb3/python-artifacts -zxf /influxdb3/python-artifacts/all.tar.gz "./${PBS_TARGET}" && \ + sed -i 's#tmp/workspace#influxdb3#' "/influxdb3/python-artifacts/${PBS_TARGET}/pyo3_config_file.txt" && \ + cat "/influxdb3/python-artifacts/${PBS_TARGET}/pyo3_config_file.txt" RUN \ --mount=type=cache,id=influxdb3_rustup,sharing=locked,target=/usr/local/rustup \ @@ -30,16 +44,18 @@ RUN \ --mount=type=cache,id=influxdb3_git,sharing=locked,target=/usr/local/cargo/git \ --mount=type=cache,id=influxdb3_target,sharing=locked,target=/influxdb3/target \ du -cshx /usr/local/rustup /usr/local/cargo/registry /usr/local/cargo/git /influxdb3/target && \ - cargo build --target-dir /influxdb3/target --package="$PACKAGE" --profile="$PROFILE" --no-default-features --features="$FEATURES" && \ + PYO3_CONFIG_FILE="/influxdb3/python-artifacts/$PBS_TARGET/pyo3_config_file.txt" cargo build --target-dir /influxdb3/target --package="$PACKAGE" --profile="$PROFILE" --no-default-features --features="$FEATURES" && \ objcopy --compress-debug-sections "target/$PROFILE/$PACKAGE" && \ - cp "/influxdb3/target/$PROFILE/$PACKAGE" /root/$PACKAGE && \ + cp "/influxdb3/target/$PROFILE/$PACKAGE" "/root/$PACKAGE" && \ + patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' "/root/$PACKAGE" && \ + cp -a "/influxdb3/python-artifacts/$PBS_TARGET/python" /root/python && \ du -cshx /usr/local/rustup /usr/local/cargo/registry /usr/local/cargo/git /influxdb3/target FROM debian:bookworm-slim RUN apt update \ - && apt install --yes ca-certificates gettext-base libssl3 python3 python3-dev python3-pip python3-venv wget curl --no-install-recommends \ + && apt install --yes ca-certificates gettext-base libssl3 wget curl --no-install-recommends \ && rm -rf /var/lib/{apt,dpkg,cache,log} \ && groupadd --gid 1500 influxdb3 \ && useradd --uid 1500 --gid influxdb3 --shell /bin/bash --create-home influxdb3 @@ -47,6 +63,10 @@ RUN apt update \ RUN mkdir /var/lib/influxdb3 && \ chown influxdb3:influxdb3 /var/lib/influxdb3 +RUN mkdir -p /usr/lib/influxdb3 +COPY --from=build /root/python /usr/lib/influxdb3/python +RUN chown -R root:root /usr/lib/influxdb3 + USER influxdb3 RUN mkdir ~/.influxdb3 diff --git a/Dockerfile.dockerignore b/Dockerfile.dockerignore index 34d63e928ee..e4d57a509bd 100644 --- a/Dockerfile.dockerignore +++ b/Dockerfile.dockerignore @@ -3,5 +3,6 @@ target/ tests/ docker/ !.cargo/ +!.circleci/scripts/fetch-python-standalone.bash !.git/ !docker/entrypoint.sh From a321f4946e3b67454445845cea9a64cc325eb19f Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Wed, 12 Feb 2025 16:41:00 -0600 Subject: [PATCH 12/19] chore: add README_processing_engine.md --- README_processing_engine.md | 366 ++++++++++++++++++++++++++++++++++++ 1 file changed, 366 insertions(+) create mode 100644 README_processing_engine.md diff --git a/README_processing_engine.md b/README_processing_engine.md new file mode 100644 index 00000000000..d11c3bba810 --- /dev/null +++ b/README_processing_engine.md @@ -0,0 +1,366 @@ +The InfluxDB 3 Processing engine is an embedded Python VM for running code +inside the database to process and transform data. This document discusses how +the processing engine is built within InfluxDB. For usage instructions, see: +https://docs.influxdata.com/influxdb3/core/ + +See the 'Discussion' section for more information on why the processing engine +is implemented the way it is. + + +## Implementation + +InfluxDB uses the [PYO3 crate](https://crates.io/crates/pyo3) to build InfluxDB +with an embedded python and the processing engine is enabled during the build +by specifying the `system-py` feature to `cargo build`. Eg: + +``` +$ cargo build --features system-py +``` + +PYO3 will then inspect the system to find a python runtime to build and link +against. The resulting `influxdb3` binary will be dynamically linked to the +`libpython` that PYO3 found during the build. Eg, on a typical Debian or Ubuntu +system, if you install the following, then InfluxDB can be built against the +system's python: + +```sh +# build dependencies +$ sudo apt-get install build-essential pkg-config libssl-dev clang lld \ + git protobuf-compiler python3 python3-dev python3-pip + +# runtime dependencies +$ sudo apt-get install python3 python3-pip python3-venv + +# build +$ cargo build --features system-py +``` + +The choice of python can be influenced by setting the `PYTHONHOME` environment +variable for `cargo build` or creating a `PYO3_CONFIG_FILE` file for more +specialized setups (such as 'Official builds', below). For details, see +https://pyo3.rs/main/building-and-distribution.html + +In order for InfluxDB to successfully use the python it was built against, the +same `libpython` version as well as the full runtime environment of the python +install (ie, its standard library) must be available to InfluxDB in a location +that it can find it. Building against the system python can be a reasonable +choice for users who target their builds to a specific release of an OS as +InfluxDB will simply use the installed python from the system. + + +## Official builds + +To provide a consistent, robust and maintained python environment for InfluxDB +that is portable across a range of operating systems, InfluxData's official +InfluxDB is built against a pre-built release of +[python-build-standalone](https://astral.sh/blog/python-build-standalone) (a +CPython standalone python distribution). For a given release of InfluxDB, +official builds will use the same version of python for all install methods and +operating systems. + +The following operating systems and architectures are currently supported: + + * Linux amd64/arm64 (`tar.gz`, `deb` and `rpm`) + * Darwin arm64 (`tar.gz`) + * Windows amd64 (`zip`) + * Docker (Linux amd64) + +Due to constraints with `python-build-standalone` and statically linking, all +builds are dynamically linked to `python-build-standalone`'s `libpython` as +well as a few OS-specific libraries. Specifically: + + * Linux (seen with `ldd` and `strings` on the binary): + * `python-build-standalone` is linked against `glibc` and is compatible with + `glibc` [2.17+](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst#linux) + * `influxdb3` is linked against `libpython` from `python-build-standalone` as + well as `glibc` (currently compatible with `glibc` 2.36+ (though 2.35 is + known to work; future releases will be built against an earlier `glibc` + release to improve compatibility)) + * Darwin (seen with `otool -L`; cross-compiled with `osxcross`): + * `python-build-standalone` is linked against: + * `CoreFoundation.framework/Versions/A/CoreFoundation` compatibility + version 150.0.0 + * `libSystem.B.dylib` compatibility version 1.0.0 + * `influxdb3` is linked against: + * `CoreFoundation.framework/Versions/A/CoreFoundation` compatibility + version 150.0.0 + * `IOKit.framework/Versions/A/IOKit` compatibility version 1.0.0 + * `libiconv.2.dylib` compatibility version 7.0.0 + * `libobjc.A.dylib` compatibility version 1.0.0 + * `libSystem.B.dylib` compatibility version 1.0.0 + * `Security.framework/Versions/A/Security` compatibility version 1.0.0 + * `SystemConfiguration.framework/Versions/A/SystemConfiguration` + compatibility version 1.0.0 + * Windows (seen with `dumpbin /HEADERS ...` and `dumpbin /DEPENDENTS ...`): + * `python-build-standalone` claims [Windows 8/Windows Server 2012](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst#windows) or newer. Specifically, it has: + * 14.42 linker version + * 6.00 operating system version + * 6.00 subsystem version + * `influxdb3` has: + * 2.40 linker version + * 4.00 operating system version + * 5.02 subsystem version + +At a high level, the build process for Official builds consists of: + 1. downloading an appropriate build of `python-build-standalone` for the + target OS and architecture from https://github.com/astral-sh/python-build-standalone/releases + 2. unpacking the `python-build-standalone` build on disk + 3. creating a `pyo3` build configuration file to point to the unpacked + directory and setting the `PYO3_CONFIG_FILE` environment variable to point + to it. Eg (on Linux): + + ``` + implementation=CPython + version=3.11 + shared=true + abi3=false + lib_name=python3.11 + lib_dir=/path/to/python-standalone/python/lib + executable=/path/to/python-standalone/python/bin/python3.11 + pointer_width=64 + build_flags= + suppress_build_script_link_lines=false + ``` + + PYO3 will try to auto-detect the location which can work well with a system + python, but not with an unpacked `python-build-standalone`. While the + `PYO3_PYTHON` environment variable can be used to point to the unpacked + directory (eg, `PYO3_PYTHON=/path/to/python-standalone/python/bin/python3`), + this was not sufficient. Defining the build configuration in the + `PYO3_CONFIG_FILE` correctly worked for all supported environments with our + current build process + 4. run `PYO3_CONFIG_FILE=/path/to/pyo3_config_file.txt cargo build --features=system-py` + to build InfluxDB + 5. adjust the library search paths for Linux and Darwin so `libpython` can + found (see 'Discussion', below) + 6. create official build artifacts: + + * Linux/Darwin `tar.gz` contain `influxdb3` and `python/...` + * Linux `deb` and `rpm` contain `/usr/bin/influxdb3` and + `/usr/lib/influxdb3/python` + * Windows `zip` contains `influxdb3`, `*.dll` files from `python/...` and + `python/...` (see 'Discussion', below) + +Licensing information for `python-build-standalone` as distributed by official +builds of InfluxDB can found in the `python/LICENSE.md`. + +With the above, `influxdb3` can be run in the normal way. Eg, on Linux: + +``` +# unpack tarball to /here +$ tar -C /here --strip-components=1 -zxvf /path/to/build/influxdb3-_linux_amd64.tar.gz + +# without processing engine +$ /here/influxdb3 serve ... +$ /here/influxdb3 query ... + +# with the processing engine without an activated venv +$ mkdir /path/to/plugins +$ /here/influxdb3 serve --plugin-dir /path/to/plugins ... # server +$ /here/influxdb3 create database foo # client +$ /here/influxdb3 test schedule_plugin -d foo testme.py # client + +# create a venv +$ /here/python/bin/python3 -m venv /path/to/venv +$ source /path/to/venv/bin/activate +(venv)$ pip install requests +... +(venv)$ deactivate + +# start server in the venv +$ source /path/to/venv/bin/activate # server +(venv)$ /here/influxdb3 serve --plugin-dir /path/to/plugins ... # server +... ... + +$ /here/influxdb3 test schedule_plugin -d foo test-requests.py # client +``` + +## Discussion + +### Why python-build-standalone? + +`python-build-standalone` is designed to be +[portable](https://astral.sh/blog/python-build-standalone#whats-a-standalone-python-distribution), +[maintained](https://astral.sh/blog/python-build-standalone#the-future-of-standalone-python-distributions) +and [permissively licensed](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst#licensing). +It is purpose-built for embedding and being redistributable and has a good +upstream maintenance story (https://github.com/astral-sh) with lots of users +and a corporate sponsor. + +An alternative to using a standalone python distribution is to use the system +python. While this can be a reasonable choice on systems where the python +version and installation locations can be relied upon, it is not a good choice +for official builds since users would have to ensure they had a python +installation that met InfluxDB's requirements and because the myriad of +operating systems, architectures and installed python versions would be a +problem to support. + +By choosing `python-build-standalone`, InfluxDB should deliver a consistent +experience across OSes and architectures for all users as well as providing a +reasonable maintenance story. + + +### Which builds of python-build-standalone are used? + +`python-build-standalone` provides [many different builds](https://github.com/astral-sh/python-build-standalone/blob/main/docs/distributions.rst). +Official InfluxDB builds use the following `python-build-standalone` +[recommended](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) +builds: + + * `aarch64-apple-darwin-install_only_stripped.tar.gz` + * `aarch64-unknown-linux-gnu-install_only_stripped.tar.gz` + * `x86_64-unknown-linux-gnu-install_only_stripped.tar.gz` + * `x86_64-pc-windows-msvc-shared-install_only_stripped.tar.gz` + + +### How will InfluxData maintain the embedded interpreter? + +The https://github.com/astral-sh project performs timely builds of CPython +micro-releases for `python-build-standalone` based on the release cadence of +upstream Python. InfluxData need only update the build to pull in the new +micro-release for security and maintenance releases. This is done by updating +the `PBS_DATE` and `PBS_VERSION` environment variables in +`.circleci/config.yaml`. See that file and +`.circleci/scripts/fetch-python-standalone.bash` for details. + +astral-sh creates new builds for CPython minor releases as they become +available from upstream Python. Updating the official builds to pull in a new +minor release is straightforward, but processes for verifying builds of +InfluxDB with the new `python-build-standalone` minor release are TBD. + + +### How is python-build-standalone licensed? + +Release builds of `python-build-standalone` are +[permissively licensed](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst#licensing) +and contain no copyleft code. + +The licensing information from release builds of `python-build-standalone` are +obtained by extracting the `python/PYTHON.json` and `python/licenses/*` files +from the `-debug-full.tar.zst` (Linux/Darwin) and +`-pgo-full.tar.zst` release tarballs, placing them in the +`python/licenses` directory of the InfluxDB build and generating a +`python/LICENSE.md` file with provenance information. + +Linux builds are dynamically linked against [`glibc`](https://www.gnu.org/software/libc/) +(which is permitted by the LGPL without copyleft attachment). InfluxDB does not +statically link against `glibc` nor does it redistribute `libc` (et al) in +official builds. + + +### Why not just statically link with, eg, MUSL? + +In an ideal world, InfluxDB would build against a version of +`python-build-standalone` and statically link against it and not have to worry +about dynamic library compatibility. Unfortunately, this is not possible for +many reasons: + + * static `python-build-standalone` builds for Darwin are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + * static `python-build-standalone` builds for Windows are [not stable](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + * static `python-build-standalone` builds for Linux/arm64 (aarch64) are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + * static `python-build-standalone` builds for Linux/amd64 (x86_64) are + available using MUSL libc, but: + * because they are static, they [cannot load compiled Python extensions](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + (aka, 'wheels' that have compiled C, Rust, etc code instead of pure python) + outside of the Python standard library, greatly diminishing the utility of + the processing engine + * there are historical [performance issues](https://edu.chainguard.dev/chainguard/chainguard-images/about/images-compiled-programs/glibc-vs-musl/#python-builds) with python and MUSL + +It is theoretically possible to statically link `glibc`, but in practice this +is technically very problematic and statically linking `glibc` has copyleft +attachment. + + +### What about alpine? + +Because MUSL can't be used with `python-build-standalone` without crippling the +InfluxDB processing engine, MUSL builds that are compatible with Alpine are not +available at this time. Alpine users can choose one of: + + * build InfluxDB on Alpine against Alpine's python + * run InfluxDB within a chroot that contains `glibc` + * run InfluxDB with [gcompat](https://git.adelielinux.org/adelie/gcompat) (untested) + +See https://wiki.alpinelinux.org/wiki/Running_glibc_programs for details. + +InfluxData may provide Alpine builds at a future date. + + +### GLIBC portability is a problem. How will you address that? + +`glibc` is designed with portability and uses 'compat symbols' to achieve +[backward compatibility](https://developers.redhat.com/blog/2019/08/01/how-the-gnu-c-library-handles-backward-compatibility). +Most 3rd party applications for Linux use the system's `glibc` in some fashion +and this is possible because of 'compat symbols' and this has worked very well +for many, many years. + +In essence, 'compat symbols' let `glibc` and the linker choose a particular +implementation of the function. All symbols in `glibc` are versioned and when a +library function changes in an incompatible way, `glibc` keeps the old +implementation in place (with the old symbol version) while adding the new +implementation with a new symbol version. In this manner, if an application is +compiled and linked against `glibc` 2.27, it will only ever lookup symbols that +are 2.27 or earlier. When 2.28 comes out, it updates any symbols it needs to to +2.28, leaving the rest as they are. When the application linked against 2.27 +runs on a system with 2.28, everything is ok since 2.28 will resolve all the +2.27 symbols in the expected way the application needs. + +Where portability becomes a problem is when the application is linked against a +newer version of `glibc` than is on the system. If the aforementioned +application compiled and linked against 2.27 was run on a system with 2.19, it +would fail to run because the symbol versions it is looking up (ie, anything +from 2.20 and later) are not available. + +Unfortunately for developers seeking portability, compiling and linking against +the system's `glibc` means the application will reference the latest available +symbols in that `glibc`. There is no facility for telling the linker to only +use symbols from a particular `glibc` version and earlier. It's also difficult +to tell the linker to use an alternate `glibc` separate from the system's. As a +result, `glibc`-using software seeking wide Linux portability typically needs +to be compiled on an older system with a `glibc` with the desired version. + +`python-build-standalone` and `rust` both support systems with `glibc` 2.17+, +which is covers distributions going back to 2014 (CentOS/RHEL 7 (EOL), Debian 8 +(Jessie; EOL), Ubuntu 14.04 LTS (EOL), Fedora 21, etc. + +Certain InfluxDB alpha releases are compiled against a too new `glibc` (2.36). +This will be addressed before release. + + +### How does InfluxDB find the correct libpython and the python runtime? + +For the best user experience, users should not have to perform any extra setup +to use the InfluxDB processing engine. This is achieved by: + + * Using an appropriate `PYO3_CONFIG_FILE` file during the build (see 'Official + builds', above) + * Build artifacts putting the runtime in an expected location (see 'Official + builds, above) + * At runtime, ensuring that Linux and Darwin binaries look for the runtime in + the expected location. Ideally this would be done with linker arguments at + builds time, but current (alpha) builds adjust the library search paths like + so: + + ```sh + # linux + $ patchelf --add-rpath '$ORIGIN/python/lib' target/.../influxdb3 + $ patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' target/.../influxdb3 + + # osx + $ install_name_tool -change '/install/lib/libpython3.NN.dylib' \ + '@executable_path/python/lib/libpythonNN.dylib' target/.../influxdb3 + ``` + + * The Windows `zip` file for the current (alpha) builds has copies of the + top-level DLL files from the 'python/' directory alongside `influxdb3`. + Windows requires that the dynamically linked DLLs needed by the application + are either in the same directory as the binary or found somewhere in `PATH` + (and open source tooling doesn't seem to support modifying this). For user + convenience, the `*.dll` files are shipped alongside the binary on Windows + to avoid having to setup the `PATH`. This may be addressed in a future + release + + +### What limitations are there? + +See https://github.com/influxdata/influxdb/issues?q=is%3Aissue%20state%3Aopen%20label%3Av3 From 0cdcf96932f92361b9430905dada662cc2eaa384 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 09:10:18 -0600 Subject: [PATCH 13/19] chore: add a few more details to README_processing_engine.md --- README_processing_engine.md | 35 +++++++++++++++++++++++++++++------ 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/README_processing_engine.md b/README_processing_engine.md index d11c3bba810..3c33006e4e5 100644 --- a/README_processing_engine.md +++ b/README_processing_engine.md @@ -76,7 +76,7 @@ well as a few OS-specific libraries. Specifically: well as `glibc` (currently compatible with `glibc` 2.36+ (though 2.35 is known to work; future releases will be built against an earlier `glibc` release to improve compatibility)) - * Darwin (seen with `otool -L`; cross-compiled with `osxcross`): + * Darwin (seen with `otool -L`; cross-compiled with [osxcross](https://github.com/tpoechtrager/osxcross)): * `python-build-standalone` is linked against: * `CoreFoundation.framework/Versions/A/CoreFoundation` compatibility version 150.0.0 @@ -255,12 +255,12 @@ In an ideal world, InfluxDB would build against a version of about dynamic library compatibility. Unfortunately, this is not possible for many reasons: - * static `python-build-standalone` builds for Darwin are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) - * static `python-build-standalone` builds for Windows are [not stable](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + * static `python-build-standalone` builds for Darwin are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) and doing so may have [license implications](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#linking-static-library-on-macos) + * static `python-build-standalone` builds for Windows are [not stable](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) and considered [brittle](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#windows-static-distributions-are-extremely-brittle) * static `python-build-standalone` builds for Linux/arm64 (aarch64) are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) * static `python-build-standalone` builds for Linux/amd64 (x86_64) are available using MUSL libc, but: - * because they are static, they [cannot load compiled Python extensions](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) + * because they are static, they [cannot load compiled Python extensions](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) which is a limitation of [ELF](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#static-linking-of-musl-libc-prevents-extension-module-library-loading) (aka, 'wheels' that have compiled C, Rust, etc code instead of pure python) outside of the Python standard library, greatly diminishing the utility of the processing engine @@ -349,16 +349,39 @@ to use the InfluxDB processing engine. This is achieved by: # osx $ install_name_tool -change '/install/lib/libpython3.NN.dylib' \ '@executable_path/python/lib/libpythonNN.dylib' target/.../influxdb3 + $ rcodesign sign target/.../influxdb3 # only with osxcross' install_name_tool ``` + This is required, in part, due to how `python-build-standalone` is + [built](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#references-to-build-time-paths). + When using `osxcross`'s version of `install_name_tool`, must also use + `rcodesign` from [apple-codesign](https://crates.io/crates/apple-codesign) + to re-sign the binaries (Apple's `install_name_tool` does this + automatically). Rust may gain [support](https://github.com/rust-lang/cargo/issues/5077) + for setting arbitrary rpaths at some point. + * The Windows `zip` file for the current (alpha) builds has copies of the top-level DLL files from the 'python/' directory alongside `influxdb3`. Windows requires that the dynamically linked DLLs needed by the application are either in the same directory as the binary or found somewhere in `PATH` (and open source tooling doesn't seem to support modifying this). For user convenience, the `*.dll` files are shipped alongside the binary on Windows - to avoid having to setup the `PATH`. This may be addressed in a future - release + to avoid having to setup the `PATH`. Rust believes this shouldn't be handled + by [rustc](https://github.com/rust-lang/cargo/issues/1500). This may be + addressed in a future release + + +### There is no `pip.exe` on Windows. Why? + +From [upstream](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#no-pipexe-on-windows): +"The Windows distributions have pip installed however no `Scripts/pip.exe`, +`Scripts/pip3.exe`, and `Scripts/pipX.Y.exe` files are provided because the way +these executables are built isn't portable. (It might be possible to change how +these are built to make them portable.) + +To use pip, run `python.exe -m pip`. (It is generally a best practice to invoke +pip via `python -m pip` on all platforms so you can be explicit about the +python executable that pip uses.)" ### What limitations are there? From 1b4cbd1bfc3c0482101ff63452340422c4ad50cb Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 12:42:05 -0600 Subject: [PATCH 14/19] fix(ci): use patchelf --set-rpath Not all patchelf versions support --add-rpath for appending to the RPATH, but --set-path can be used with a colon-separated list. Use --set-rpath first for maximum compatibility. --- .circleci/config.yml | 9 +++------ README_processing_engine.md | 3 +-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index f49a19e67d2..20c6dbfa296 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -364,12 +364,9 @@ jobs: # XXX: better to use 'cargo:rustc-link-arg=-Wl,-rpath,$ORIGIN/python/lib' name: adjust RPATH for linux command: | - # for tarballs - echo "Running: patchelf --add-rpath '$ORIGIN/python/lib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" - patchelf --add-rpath '$ORIGIN/python/lib' "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" - # for deb/rpm installs - echo "Running: patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" - patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" + # tarballs need $ORIGIN/python/lib, deb/rpm need $ORIGIN/../lib/influxdb3/python/lib + echo "Running: patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' '${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3'" + patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' "${PWD}/target/<< parameters.target >>/<< parameters.profile >>/influxdb3" - when: condition: equal: [ << parameters.target >>, aarch64-apple-darwin ] diff --git a/README_processing_engine.md b/README_processing_engine.md index 3c33006e4e5..8eacc8c9b78 100644 --- a/README_processing_engine.md +++ b/README_processing_engine.md @@ -343,8 +343,7 @@ to use the InfluxDB processing engine. This is achieved by: ```sh # linux - $ patchelf --add-rpath '$ORIGIN/python/lib' target/.../influxdb3 - $ patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' target/.../influxdb3 + $ patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' target/.../influxdb3 # osx $ install_name_tool -change '/install/lib/libpython3.NN.dylib' \ From a7fbacc2c2af04f2890c1daeedd6378d770f1d76 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 13:19:30 -0600 Subject: [PATCH 15/19] chore: update README_processing_engine.md for standalone local builds --- README_processing_engine.md | 64 +++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/README_processing_engine.md b/README_processing_engine.md index 8eacc8c9b78..772872f7e1b 100644 --- a/README_processing_engine.md +++ b/README_processing_engine.md @@ -175,6 +175,70 @@ $ source /path/to/venv/bin/activate # server $ /here/influxdb3 test schedule_plugin -d foo test-requests.py # client ``` +### Local development with python-build-standalone + +Local development with python-build-standalone currently consists of: + +1. download python-build-standalone and unpack it somewhere + * get from https://github.com/astral-sh/python-build-standalone/releases + * based on your host OS, choose one of `aarch64-apple-darwin-install_only_stripped.tar.gz`, `aarch64-unknown-linux-gnu-install_only_stripped.tar.gz`, `x86_64-pc-windows-msvc-shared-install_only_stripped.tar.gz`, `x86_64-unknown-linux-gnu-install_only_stripped.tar.gz` +2. create `pyo3_config_file.txt` to match the unpacked dir and downloaded python version. Eg, if downloaded and unpacked a 3.11.x version to `/tmp/python`: + + ``` + $ cat ./pyo3_config_file.txt + implementation=CPython + version=3.11 + shared=true + abi3=false + lib_name=python3.11 + lib_dir=/tmp/python/lib + executable=/tmp/python/bin/python3.11 + pointer_width=64 + build_flags= + suppress_build_script_link_lines=false + ``` + +3. build with: + + ``` + # note: PYO3_CONFIG_FILE must be an absolute path + $ PYO3_CONFIG_FILE=${PWD}/pyo3_config_file.txt cargo build --features "aws,gcp,azure,jemalloc_replacing_malloc,system-py" + ``` + +4. Linux/OSX: patch up the binary to find libpython: + + ``` + # linux + $ patchelf --set-rpath '$ORIGIN/python/lib' ./target//influxdb3 + + # osx (be sure to match the libpython version with what you downloaded) + $ install_name_tool -change '/install/lib/libpython3.11.dylib' '@executable_path/python/lib/libpython3.11.dylib' ./target//influxdb3 + ``` + +5. Linux/OSX: put the python runtime in the expected location (XXX: may be + possible at run time to see where the libpython we are using is and adjust + the code to base the location of the runtime on that). Eg, if unpacked to + `/tmp/python`: + + ``` + $ test -e ./target//python || ln -s /tmp/python ./target//python + ``` + +6. run with: + + ``` + $ mkdir -p /path/to/plugin/dir + + # linux and osx (if can't find libpython or the runtime, check previous steps) + $ ./target//influxdb3 ... --plugin-dir /path/to/plugin/dir + + # windows requires moving the binary into the python-build-standalone unpack directory + $ cp ./target//influxdb3 \path\to\python-standalone\python + # run influxdb with + $ \path\to\python-standalone\python\influxdb3.exe ... --plugin-dir \path\to\plugin\dir + ``` + + ## Discussion ### Why python-build-standalone? From a99dd8ec0c672bb5313066cca0468f257978d103 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 14:46:17 -0600 Subject: [PATCH 16/19] fix(Dockerfile): also use patchelf --set-rpath --- Dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3ded96b702a..b5018dff2e6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -9,7 +9,6 @@ RUN apt update \ && apt install --yes binutils build-essential curl pkg-config libssl-dev clang lld git patchelf protobuf-compiler zstd \ && rm -rf /var/lib/{apt,dpkg,cache,log} - # Build influxdb3 COPY . /influxdb3 WORKDIR /influxdb3 @@ -47,7 +46,7 @@ RUN \ PYO3_CONFIG_FILE="/influxdb3/python-artifacts/$PBS_TARGET/pyo3_config_file.txt" cargo build --target-dir /influxdb3/target --package="$PACKAGE" --profile="$PROFILE" --no-default-features --features="$FEATURES" && \ objcopy --compress-debug-sections "target/$PROFILE/$PACKAGE" && \ cp "/influxdb3/target/$PROFILE/$PACKAGE" "/root/$PACKAGE" && \ - patchelf --add-rpath '$ORIGIN/../lib/influxdb3/python/lib' "/root/$PACKAGE" && \ + patchelf --set-rpath '$ORIGIN/python/lib:$ORIGIN/../lib/influxdb3/python/lib' "/root/$PACKAGE" && \ cp -a "/influxdb3/python-artifacts/$PBS_TARGET/python" /root/python && \ du -cshx /usr/local/rustup /usr/local/cargo/registry /usr/local/cargo/git /influxdb3/target From 08bc73988df2b5687da50db2d7dc3dba2e134c06 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 14:47:27 -0600 Subject: [PATCH 17/19] chore: update code comment for accuracy --- influxdb3_processing_engine/src/virtualenv.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/influxdb3_processing_engine/src/virtualenv.rs b/influxdb3_processing_engine/src/virtualenv.rs index 56ed71b83ea..fb233aa83c8 100644 --- a/influxdb3_processing_engine/src/virtualenv.rs +++ b/influxdb3_processing_engine/src/virtualenv.rs @@ -17,7 +17,7 @@ pub enum VenvError { } fn get_python_version() -> Result<(u8, u8), std::io::Error> { - // linux/osx have python3, but windows only has python. Use python since it is in all of them + // linux/osx have python3, but windows only has python let python_exe = if cfg!(target_os = "windows") { "python" } else { From 38f4f8342c34645e6cf05f3ff5b3158578eab876 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 14:52:57 -0600 Subject: [PATCH 18/19] chore: typos, grammar and formatting change in README_processing_engine.md --- README_processing_engine.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/README_processing_engine.md b/README_processing_engine.md index 772872f7e1b..41799efc400 100644 --- a/README_processing_engine.md +++ b/README_processing_engine.md @@ -122,15 +122,15 @@ At a high level, the build process for Official builds consists of: suppress_build_script_link_lines=false ``` - PYO3 will try to auto-detect the location which can work well with a system - python, but not with an unpacked `python-build-standalone`. While the - `PYO3_PYTHON` environment variable can be used to point to the unpacked - directory (eg, `PYO3_PYTHON=/path/to/python-standalone/python/bin/python3`), - this was not sufficient. Defining the build configuration in the - `PYO3_CONFIG_FILE` correctly worked for all supported environments with our - current build process - 4. run `PYO3_CONFIG_FILE=/path/to/pyo3_config_file.txt cargo build --features=system-py` - to build InfluxDB + PYO3 will try to auto-detect the location which can work well with a system + python, but not with an unpacked `python-build-standalone`. While the + `PYO3_PYTHON` environment variable can be used to point to the unpacked + directory (eg, + `PYO3_PYTHON=/path/to/python-standalone/python/bin/python3`), this was not + sufficient. Defining the build configuration in the `PYO3_CONFIG_FILE` + correctly worked for all supported environments with our current build + process + 4. run `PYO3_CONFIG_FILE=/path/to/pyo3_config_file.txt cargo build --features=system-py` to build InfluxDB 5. adjust the library search paths for Linux and Darwin so `libpython` can found (see 'Discussion', below) 6. create official build artifacts: @@ -180,8 +180,8 @@ $ /here/influxdb3 test schedule_plugin -d foo test-requests.py # client Local development with python-build-standalone currently consists of: 1. download python-build-standalone and unpack it somewhere - * get from https://github.com/astral-sh/python-build-standalone/releases - * based on your host OS, choose one of `aarch64-apple-darwin-install_only_stripped.tar.gz`, `aarch64-unknown-linux-gnu-install_only_stripped.tar.gz`, `x86_64-pc-windows-msvc-shared-install_only_stripped.tar.gz`, `x86_64-unknown-linux-gnu-install_only_stripped.tar.gz` + * get from https://github.com/astral-sh/python-build-standalone/releases + * based on your host OS, choose one of `aarch64-apple-darwin-install_only_stripped.tar.gz`, `aarch64-unknown-linux-gnu-install_only_stripped.tar.gz`, `x86_64-pc-windows-msvc-shared-install_only_stripped.tar.gz`, `x86_64-unknown-linux-gnu-install_only_stripped.tar.gz` 2. create `pyo3_config_file.txt` to match the unpacked dir and downloaded python version. Eg, if downloaded and unpacked a 3.11.x version to `/tmp/python`: ``` @@ -324,10 +324,10 @@ many reasons: * static `python-build-standalone` builds for Linux/arm64 (aarch64) are [not available](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) * static `python-build-standalone` builds for Linux/amd64 (x86_64) are available using MUSL libc, but: - * because they are static, they [cannot load compiled Python extensions](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) which is a limitation of [ELF](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#static-linking-of-musl-libc-prevents-extension-module-library-loading) + * because they are static, they [cannot load compiled Python extensions](https://github.com/astral-sh/python-build-standalone/blob/main/docs/running.rst) (aka, 'wheels' that have compiled C, Rust, etc code instead of pure python) outside of the Python standard library, greatly diminishing the utility of - the processing engine + the processing engine. This is a limitation of [ELF](https://github.com/astral-sh/python-build-standalone/blob/main/docs/quirks.rst#static-linking-of-musl-libc-prevents-extension-module-library-loading) * there are historical [performance issues](https://edu.chainguard.dev/chainguard/chainguard-images/about/images-compiled-programs/glibc-vs-musl/#python-builds) with python and MUSL It is theoretically possible to statically link `glibc`, but in practice this @@ -341,9 +341,9 @@ Because MUSL can't be used with `python-build-standalone` without crippling the InfluxDB processing engine, MUSL builds that are compatible with Alpine are not available at this time. Alpine users can choose one of: - * build InfluxDB on Alpine against Alpine's python - * run InfluxDB within a chroot that contains `glibc` - * run InfluxDB with [gcompat](https://git.adelielinux.org/adelie/gcompat) (untested) + * build InfluxDB locally on Alpine against Alpine's system python + * run official InfluxDB within a chroot that contains `glibc` + * run official InfluxDB with [gcompat](https://git.adelielinux.org/adelie/gcompat) (untested) See https://wiki.alpinelinux.org/wiki/Running_glibc_programs for details. @@ -364,7 +364,7 @@ library function changes in an incompatible way, `glibc` keeps the old implementation in place (with the old symbol version) while adding the new implementation with a new symbol version. In this manner, if an application is compiled and linked against `glibc` 2.27, it will only ever lookup symbols that -are 2.27 or earlier. When 2.28 comes out, it updates any symbols it needs to to +are 2.27 or earlier. When 2.28 comes out, it updates any symbols it needs to 2.28, leaving the rest as they are. When the application linked against 2.27 runs on a system with 2.28, everything is ok since 2.28 will resolve all the 2.27 symbols in the expected way the application needs. From 21a6b6c5f5798273f0f57c2bb28de9935f9bab54 Mon Sep 17 00:00:00 2001 From: Jamie Strandboge Date: Thu, 13 Feb 2025 15:58:57 -0600 Subject: [PATCH 19/19] chore: update README_processing_engine.md for Docker arm64 (thanks Jackson) --- README_processing_engine.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README_processing_engine.md b/README_processing_engine.md index 41799efc400..843cef5c61e 100644 --- a/README_processing_engine.md +++ b/README_processing_engine.md @@ -63,7 +63,7 @@ The following operating systems and architectures are currently supported: * Linux amd64/arm64 (`tar.gz`, `deb` and `rpm`) * Darwin arm64 (`tar.gz`) * Windows amd64 (`zip`) - * Docker (Linux amd64) + * Docker (Linux amd64/arm64) Due to constraints with `python-build-standalone` and statically linking, all builds are dynamically linked to `python-build-standalone`'s `libpython` as