From 4e0d94264e7db3e3b8abc8b56c1558b54fc020de Mon Sep 17 00:00:00 2001 From: Benjamin Pannell Date: Mon, 9 Dec 2024 00:04:26 +0000 Subject: [PATCH] feat: Add support for backing up single repositories using from: repos// --- README.md | 15 +++ examples/config.yaml | 9 ++ src/helpers/github.rs | 60 ++++++++++- src/sources/github_releases.rs | 176 +++++++++++++++++++-------------- src/sources/github_repo.rs | 77 ++++++++++----- 5 files changed, 237 insertions(+), 100 deletions(-) diff --git a/README.md b/README.md index 589a877..acfe48e 100644 --- a/README.md +++ b/README.md @@ -47,18 +47,33 @@ backups: password: "" properties: query: "affiliation=owner" # Additional query parameters to pass to GitHub when fetching repositories + - kind: github/repo from: "users/another-user" to: /backups/friend credentials: !Token "your_github_token" + - kind: github/repo from: "orgs/my-org" to: /backups/work filter: '!repo.fork && repo.name contains "awesome"' + - kind: github/release from: "orgs/my-org" to: /backups/releases filter: '!release.prerelease && !asset.source-code' + + # You can also backup single repositories directly if you wish + - kind: github/repo + from: "repos/my-org/repo" + to: /backups/work + + # This is particularly useful for backing up release artifacts for + # specific projects. + - kind: github/release + from: "repos/my-org/repo" + to: /backups/releases + filter: '!release.prerelease' ``` ### OpenTelemetry Reporting diff --git a/examples/config.yaml b/examples/config.yaml index f5fea37..09a1934 100644 --- a/examples/config.yaml +++ b/examples/config.yaml @@ -29,3 +29,12 @@ backups: - kind: github/star from: users/notheotherben to: /backup/github + + - kind: github/repo + from: repos/SierraSoftworks/github-backup + to: /backup/github + + - kind: github/release + from: repos/SierraSoftworks/github-backup + to: /backup/github-releases + filter: '!release.prerelease' diff --git a/src/helpers/github.rs b/src/helpers/github.rs index 0e27300..cf9f21e 100644 --- a/src/helpers/github.rs +++ b/src/helpers/github.rs @@ -571,8 +571,53 @@ impl MetadataSource for GitHubReleaseAsset { } } +#[derive(Clone, Debug, PartialEq)] +pub enum GitHubRepoSourceKind { + CurrentUser, + User(String), + Org(String), + Repo(String), +} + +impl GitHubRepoSourceKind { + pub fn api_endpoint(&self, artifact_kind: GitHubArtifactKind) -> String { + match self { + GitHubRepoSourceKind::CurrentUser => format!("user/{}", artifact_kind.api_endpoint()), + GitHubRepoSourceKind::User(u) => { + format!("users/{}/{}", u, artifact_kind.api_endpoint()) + } + GitHubRepoSourceKind::Org(o) => format!("orgs/{}/{}", o, artifact_kind.api_endpoint()), + GitHubRepoSourceKind::Repo(r) => format!("repos/{}", r), + } + } +} + +impl std::str::FromStr for GitHubRepoSourceKind { + type Err = crate::Error; + + fn from_str(s: &str) -> Result { + let num_of_slashes = s.chars().filter(|c| *c == '/').count(); + + match s { + "user" => Ok(GitHubRepoSourceKind::CurrentUser), + s if s.starts_with("users/") && num_of_slashes == 1 => { + Ok(GitHubRepoSourceKind::User(s[6..].to_string())) + } + s if s.starts_with("orgs/") && num_of_slashes == 1 => { + Ok(GitHubRepoSourceKind::Org(s[5..].to_string())) + } + s if s.starts_with("repos/") && num_of_slashes == 2 => { + Ok(GitHubRepoSourceKind::Repo(s[6..].to_string())) + } + _ => Err(errors::user( + &format!("The 'from' declaration '{}' was not valid for a GitHub repository source.", s), + "Make sure you provide either 'user', 'users/', 'orgs/', or 'repos//'")), + } + } +} + #[allow(dead_code)] -#[derive(PartialEq, Debug, Clone, serde::Serialize, serde::Deserialize)] +#[derive(PartialEq, Debug, Copy, Clone, serde::Serialize, serde::Deserialize)] pub enum GitHubArtifactKind { #[serde(rename = "github/repo")] Repo, @@ -734,4 +779,17 @@ mod tests { assert_eq!(kind.as_str(), kind_str); assert_eq!(kind.api_endpoint(), url); } + + #[rstest] + #[case("user", GitHubRepoSourceKind::CurrentUser)] + #[case("users/notheotherben", GitHubRepoSourceKind::User("notheotherben".into()))] + #[case("orgs/sierrasoftworks", GitHubRepoSourceKind::Org("sierrasoftworks".into()))] + #[case("repos/sierrasoftworks/github-backup", GitHubRepoSourceKind::Repo("sierrasoftworks/github-backup".into()))] + fn test_deserialize_gh_repo_source_kind( + #[case] kind_str: &str, + #[case] expected_kind: GitHubRepoSourceKind, + ) { + let kind: GitHubRepoSourceKind = kind_str.parse().unwrap(); + assert_eq!(kind, expected_kind); + } } diff --git a/src/sources/github_releases.rs b/src/sources/github_releases.rs index 64c7c87..6a451bf 100644 --- a/src/sources/github_releases.rs +++ b/src/sources/github_releases.rs @@ -6,7 +6,7 @@ use crate::{ entities::{Credentials, HttpFile}, errors::{self}, helpers::{ - github::{GitHubArtifactKind, GitHubRelease, GitHubRepo}, + github::{GitHubArtifactKind, GitHubRelease, GitHubRepo, GitHubRepoSourceKind}, GitHubClient, }, policy::BackupPolicy, @@ -25,29 +25,103 @@ impl GitHubReleasesSource { } } +impl GitHubReleasesSource { + fn load_releases<'a>( + &'a self, + policy: &'a BackupPolicy, + repo: &'a GitHubRepo, + cancel: &'a AtomicBool, + ) -> impl Stream> + 'a { + async_stream::stream! { + if !repo.has_downloads { + return; + } + + let releases_url = format!("{}/releases", repo.url); + + for await release in self.client.get_paginated::(releases_url, &policy.credentials, cancel) { + if let Err(e) = release { + yield Err(e); + continue; + } + + let release: GitHubRelease = release.unwrap(); + + if let Some(tarball_url) = &release.tarball_url { + yield Ok(HttpFile::new(format!("{}/{}/source.tar.gz", &repo.full_name, &release.tag_name), tarball_url) + .with_metadata_source(repo) + .with_metadata_source(&release) + .with_metadata("asset.source-code", true) + .with_credentials(match &policy.credentials { + Credentials::Token(token) => Credentials::UsernamePassword { + username: token.clone(), + password: "".to_string(), + }, + creds => creds.clone(), + }) + .with_last_modified(release.published_at)); + } + + for asset in release.assets.iter() { + if cancel.load(std::sync::atomic::Ordering::Relaxed) { + return; + } + + if asset.state != "uploaded" { + continue; + } + + let asset_url = format!("{}/releases/assets/{}", repo.url, asset.id); + + yield Ok(HttpFile::new(format!("{}/{}/{}", &repo.full_name, &release.tag_name, &asset.name), asset_url) + .with_content_type(Some("application/octet-stream".to_string())) + .with_credentials(match &policy.credentials { + Credentials::Token(token) => Credentials::UsernamePassword { + username: token.clone(), + password: "".to_string(), + }, + creds => creds.clone(), + }) + .with_last_modified(Some(asset.updated_at)) + .with_metadata_source(repo) + .with_metadata_source(&release) + .with_metadata_source(asset)); + } + } + } + } +} + impl BackupSource for GitHubReleasesSource { fn kind(&self) -> &str { GitHubArtifactKind::Release.as_str() } fn validate(&self, policy: &BackupPolicy) -> Result<(), crate::Error> { - let target = policy.from.as_str().trim_matches('/'); + let target: GitHubRepoSourceKind = policy.from.as_str().parse()?; + match target { - "" => Err(errors::user( - "The target field is required for GitHub repository backups.", - "Please provide a target field in the policy using the format 'users/' or 'orgs/'.", + GitHubRepoSourceKind::User(u) if u.is_empty() => Err(errors::user( + &format!( + "Your 'from' target '{}' is not a valid GitHub username.", + policy.from.as_str() + ), + "Make sure you provide a valid GitHub username in the 'from' field of your policy.", )), - - t if t.chars().filter(|c| *c == '/').count() > 1 => Err(errors::user( - &format!("The target field '{target}' contains too many segments."), - "Please provide a target field in the policy using the format 'users/' or 'orgs/'.", + GitHubRepoSourceKind::Org(org) if org.is_empty() => Err(errors::user( + &format!( + "Your 'from' target '{}' is not a valid GitHub organization name.", + policy.from.as_str() + ), + "Make sure you provide a valid GitHub organization name in the 'from' field of your policy.", )), - - t if !t.starts_with("users/") && !t.starts_with("orgs/") => Err(errors::user( - &format!("The target field '{target}' does not include a valid user or org specifier."), - "Please specify either 'users/' or 'orgs/' as your target.", + GitHubRepoSourceKind::Repo(repo) if repo.is_empty() => Err(errors::user( + &format!( + "Your 'from' target '{}' is not a fully qualified GitHub repository name.", + policy.from.as_str() + ), + "Make sure you provide a fully qualified GitHub repository name in the 'from' field of your policy.", )), - _ => Ok(()), } } @@ -57,80 +131,38 @@ impl BackupSource for GitHubReleasesSource { policy: &'a BackupPolicy, cancel: &'a AtomicBool, ) -> impl Stream> + 'a { + let target: GitHubRepoSourceKind = policy.from.as_str().parse().unwrap(); let url = format!( - "{}/{}/{}?{}", + "{}/{}?{}", policy .properties .get("api_url") .unwrap_or(&"https://api.github.com".to_string()) .trim_end_matches('/'), - &policy.from.trim_matches('/'), - GitHubArtifactKind::Release.api_endpoint(), + target.api_endpoint(GitHubArtifactKind::Release), policy.properties.get("query").unwrap_or(&"".to_string()) - ); + ) + .trim_end_matches('?') + .to_string(); async_stream::stream! { - for await repo in self.client.get_paginated::(url, &policy.credentials, cancel) { - if let Err(e) = repo { - yield Err(e); - continue; - } - - let repo: GitHubRepo = repo.unwrap(); + if matches!(target, GitHubRepoSourceKind::Repo(_)) { + let repo: GitHubRepo = self.client.get(url, &policy.credentials, cancel).await?; - if !repo.has_downloads { - continue; + for await file in self.load_releases(policy, &repo, cancel) { + yield file; } - - let releases_url = format!("{}/releases", repo.url); - - for await release in self.client.get_paginated::(releases_url, &policy.credentials, cancel) { - if let Err(e) = release { + } else { + for await repo in self.client.get_paginated::(url, &policy.credentials, cancel) { + if let Err(e) = repo { yield Err(e); continue; } - let release: GitHubRelease = release.unwrap(); - - if let Some(tarball_url) = &release.tarball_url { - yield Ok(HttpFile::new(format!("{}/{}/source.tar.gz", &repo.full_name, &release.tag_name), tarball_url) - .with_metadata_source(&repo) - .with_metadata_source(&release) - .with_metadata("asset.source-code", true) - .with_credentials(match &policy.credentials { - Credentials::Token(token) => Credentials::UsernamePassword { - username: token.clone(), - password: "".to_string(), - }, - creds => creds.clone(), - }) - .with_last_modified(release.published_at)); - } + let repo: GitHubRepo = repo.unwrap(); - for asset in release.assets.iter() { - if cancel.load(std::sync::atomic::Ordering::Relaxed) { - return; - } - - if asset.state != "uploaded" { - continue; - } - - let asset_url = format!("{}/releases/assets/{}", repo.url, asset.id); - - yield Ok(HttpFile::new(format!("{}/{}/{}", &repo.full_name, &release.tag_name, &asset.name), asset_url) - .with_content_type(Some("application/octet-stream".to_string())) - .with_credentials(match &policy.credentials { - Credentials::Token(token) => Credentials::UsernamePassword { - username: token.clone(), - password: "".to_string(), - }, - creds => creds.clone(), - }) - .with_last_modified(Some(asset.updated_at)) - .with_metadata_source(&repo) - .with_metadata_source(&release) - .with_metadata_source(asset)); + for await file in self.load_releases(policy, &repo, cancel) { + yield file; } } } diff --git a/src/sources/github_repo.rs b/src/sources/github_repo.rs index 5baa6f2..047ce40 100644 --- a/src/sources/github_repo.rs +++ b/src/sources/github_repo.rs @@ -5,7 +5,11 @@ use tokio_stream::Stream; use crate::{ entities::GitRepo, errors::{self}, - helpers::{github::GitHubArtifactKind, github::GitHubRepo, GitHubClient}, + helpers::{ + github::GitHubRepo, + github::{GitHubArtifactKind, GitHubRepoSourceKind}, + GitHubClient, + }, policy::BackupPolicy, BackupSource, }; @@ -22,31 +26,39 @@ impl BackupSource for GitHubRepoSource { } fn validate(&self, policy: &BackupPolicy) -> Result<(), crate::Error> { - let target = policy.from.as_str().trim_matches('/'); + let target: GitHubRepoSourceKind = policy.from.as_str().parse()?; + match target { - "" => Err(errors::user( - "The target field is required for GitHub repository backups.", - "Please provide a target field in the policy using the format 'users/' or 'orgs/'.", + GitHubRepoSourceKind::Org(_) if self.artifact_kind == GitHubArtifactKind::Star => return Err(errors::user( + "You cannot use an organization as the source for a starred repository backup.", + "Either use `from: user` or `from: users/` when using a github/stars source kind.", )), - - t if t.chars().filter(|c| *c == '/').count() > 1 => Err(errors::user( - &format!("The target field '{target}' contains too many segments."), - "Please provide a target field in the policy using the format 'users/' or 'orgs/'.", + GitHubRepoSourceKind::Repo(_) if self.artifact_kind == GitHubArtifactKind::Star => return Err(errors::user( + "You cannot use a repository as the source for a starred repository backup.", + "Either use `from: user` or `from: users/` when using a github/stars source kind.", )), - - t if t == "user" => Ok(()), - t if t.starts_with("users/") => Ok(()), - - t if t.starts_with("orgs/") && self.artifact_kind == GitHubArtifactKind::Star => Err(errors::user( - &format!("The target field '{target}' specifies an org which is not support for kind 'github/star'."), - "Please specify either 'users/' as your target when using 'github/star' as kind.", + GitHubRepoSourceKind::User(u) if u.is_empty() => Err(errors::user( + &format!( + "Your 'from' target '{}' is not a valid GitHub username.", + policy.from.as_str() + ), + "Make sure you provide a valid GitHub username in the 'from' field of your policy.", )), - t if t.starts_with("orgs/") => Ok(()), - - _ => Err(errors::user( - &format!("The target field '{target}' does not include a valid user or org specifier."), - "Please specify either 'user', 'users/' or 'orgs/' as your target.", + GitHubRepoSourceKind::Org(org) if org.is_empty() => Err(errors::user( + &format!( + "Your 'from' target '{}' is not a valid GitHub organization name.", + policy.from.as_str() + ), + "Make sure you provide a valid GitHub organization name in the 'from' field of your policy.", + )), + GitHubRepoSourceKind::Repo(repo) if repo.is_empty() => Err(errors::user( + &format!( + "Your 'from' target '{}' is not a fully qualified GitHub repository name.", + policy.from.as_str() + ), + "Make sure you provide a fully qualified GitHub repository name in the 'from' field of your policy.", )), + _ => Ok(()), } } @@ -55,24 +67,35 @@ impl BackupSource for GitHubRepoSource { policy: &'a BackupPolicy, cancel: &'a AtomicBool, ) -> impl Stream> + 'a { + let target: GitHubRepoSourceKind = policy.from.as_str().parse().unwrap(); let url = format!( - "{}/{}/{}?{}", + "{}/{}?{}", policy .properties .get("api_url") .unwrap_or(&"https://api.github.com".to_string()) .trim_end_matches('/'), - &policy.from.trim_matches('/'), - self.artifact_kind.api_endpoint(), + target.api_endpoint(self.artifact_kind), policy.properties.get("query").unwrap_or(&"".to_string()) - ); + ) + .trim_end_matches('?') + .to_string(); + + tracing_batteries::prelude::debug!("Calling {} to fetch repos", &url); async_stream::try_stream! { - for await repo in self.client.get_paginated::(url, &policy.credentials, cancel) { - let repo = repo?; + if matches!(target, GitHubRepoSourceKind::Repo(_)) { + let repo = self.client.get::(url, &policy.credentials, cancel).await?; yield GitRepo::new(repo.full_name.as_str(), repo.clone_url.as_str()) .with_credentials(policy.credentials.clone()) .with_metadata_source(&repo); + } else { + for await repo in self.client.get_paginated::(url, &policy.credentials, cancel) { + let repo = repo?; + yield GitRepo::new(repo.full_name.as_str(), repo.clone_url.as_str()) + .with_credentials(policy.credentials.clone()) + .with_metadata_source(&repo); + } } } }