Skip to content

Commit

Permalink
[WIP]
Browse files Browse the repository at this point in the history
  • Loading branch information
NULLx76 committed Oct 21, 2024
1 parent c27c706 commit dd071a8
Show file tree
Hide file tree
Showing 9 changed files with 847 additions and 638 deletions.
293 changes: 282 additions & 11 deletions Cargo.lock

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,18 @@ authors = ["Pietro Albini <[email protected]>"]
failure = "0.1.8"
reqwest = { version = "0.12.8", features = ["blocking", "json", "rustls-tls"], default-features = false }
serde = "1.0.210"
serde_derive = "1.0.210"
serde_json = "1.0.128"
serde_derive = "1"
serde_json = "1"
log = "0.4.22"
env_logger = "0.11.5"
csv = "1.3.0"
ctrlc = "3.4.5"
crossbeam-utils = "0.8.20"

tokio = { version = "1.40", features = ["full", "tracing"] }
dotenvy = "0.15.7"
color-eyre = "0.6.3"
tracing = "0.1.40"
tracing-subscriber = "0.3"
enum-map = { version = "2.7.3", features = ["serde"] }
thiserror = "1"
3 changes: 2 additions & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@

use std::path::PathBuf;

#[derive(Debug, Clone)]
pub struct Config {
pub github_token: String,
pub github_token: Vec<String>,
pub data_dir: PathBuf,
pub timeout: Option<u64>,
}
163 changes: 113 additions & 50 deletions src/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,99 +18,162 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

use enum_map::{Enum, EnumMap};
use serde_derive::{Deserialize, Serialize};
use tokio::sync::Mutex;
use tokio::task::{spawn_blocking, JoinSet};

use crate::config::Config;
use crate::prelude::*;
use std::collections::HashMap;
use std::collections::BTreeMap;
use std::fs::OpenOptions;
use std::path::PathBuf;
use std::sync::{Arc, Mutex};
use std::sync::atomic::AtomicUsize;
use std::sync::Arc;
use std::{
fs::{self, File, OpenOptions},
fs::{self, File},
io::{prelude::*, BufWriter},
};

#[derive(Default, Serialize, Deserialize)]
struct State {
last_id: HashMap<String, usize>,
#[derive(Debug, Enum, Serialize, Deserialize, Copy, Clone)]
pub enum Forge {
Github,
}

#[derive(Serialize, Deserialize)]
#[derive(Debug, Default, Serialize, Deserialize)]
struct State(EnumMap<Forge, AtomicUsize>);

#[derive(Debug, Serialize, Deserialize, Clone)]
pub struct Repo {
pub id: String,
pub name: String,
pub has_cargo_toml: bool,
pub has_cargo_lock: bool,
}

#[derive(Debug, Clone)]
pub struct Data {
base_dir: PathBuf,
data_dir: PathBuf,

csv_write_lock: Arc<Mutex<()>>,
state_lock: Arc<Mutex<()>>,

state_path: PathBuf,
state_cache: Arc<Mutex<Option<State>>>,
state_cache: Arc<State>,

repos_state: Arc<Mutex<EnumMap<Forge, BTreeMap<String, Repo>>>>,
}

impl Data {
pub fn new(config: &Config) -> Self {
Data {
base_dir: config.data_dir.clone(),
pub fn new(config: &Config) -> color_eyre::Result<Self> {
let mut data = Data {
data_dir: config.data_dir.clone(),

csv_write_lock: Arc::new(Mutex::new(())),

state_path: config.data_dir.join("state.json"),
state_cache: Arc::new(Mutex::new(None)),
}
}
state_lock: Arc::new(Mutex::new(())),
state_cache: Arc::new(State::default()),
repos_state: Arc::new(Mutex::new(EnumMap::default())),
};

// TODO: create CSV files if not exist


fn edit_state<T, F: Fn(&mut State) -> Fallible<T>>(&self, f: F) -> Fallible<T> {
let mut state_cache = self.state_cache.lock().unwrap();
let state_path = data.state_path();
if state_path.exists() {
let state_cache: State = serde_json::from_slice(&fs::read(&state_path)?)?;

if state_cache.is_none() {
if self.state_path.exists() {
*state_cache = Some(serde_json::from_slice(&fs::read(&self.state_path)?)?);
} else {
*state_cache = Some(Default::default());
}
data.state_cache = Arc::new(state_cache)
}

let state = state_cache.as_mut().unwrap();
let result = f(state)?;
Ok(data)
}

let mut file = BufWriter::new(File::create(&self.state_path)?);
serde_json::to_writer_pretty(&mut file, &state)?;
file.write_all(&[b'\n'])?;
pub fn state_path(&self) -> PathBuf {
self.data_dir.join("state.json")
}

Ok(result)
pub fn csv_path(&self, forge: Forge) -> PathBuf {
match forge {
Forge::Github => self.data_dir.join("github"),
}
}

pub fn get_last_id(&self, platform: &str) -> Fallible<Option<usize>> {
self.edit_state(|state| Ok(state.last_id.get(platform).cloned()))
pub fn get_last_id(&self, forge: Forge) -> usize {
self.state_cache.0[forge].load(std::sync::atomic::Ordering::SeqCst)
}

pub fn set_last_id(&self, platform: &str, id: usize) -> Fallible<()> {
self.edit_state(|state| {
state.last_id.insert(platform.to_string(), id);
/// Store the state cache to disk, i.e. last fetched ids
async fn store_state_cache(&self) -> color_eyre::Result<()> {
let state = self.state_cache.clone();
let lock = self.state_lock.clone();
let state_path = self.state_path();
spawn_blocking(move || -> color_eyre::Result<()> {
let guard = lock.blocking_lock();

let file = File::create(state_path)?;
let mut file = BufWriter::new(file);
serde_json::to_writer_pretty(&mut file, state.as_ref())?;
file.write_all(b"\n")?;

drop(guard);

Ok(())
})
.await
.unwrap()
}

pub fn store_repo(&self, platform: &str, repo: Repo) -> Fallible<()> {
// Ensure only one thread can write to CSV files at once
let _lock = self.csv_write_lock.lock().unwrap();
/// Stores the repos found to disk in a CSV
async fn store_csv(&self) -> color_eyre::Result<()> {
let mut repos = self.repos_state.lock().await;

let file = self.base_dir.join(format!("{}.csv", platform));
let mut js = JoinSet::new();

// Create the new file or append to it
let mut csv = if file.exists() {
csv::WriterBuilder::new()
.has_headers(false)
.from_writer(OpenOptions::new().append(true).open(&file)?)
} else {
csv::WriterBuilder::new().from_path(&file)?
};
for (forge, repos) in repos.iter() {
let path = self.csv_path(forge);
let repos = repos.clone(); // is this necessary?
js.spawn_blocking(|| -> color_eyre::Result<()> {
let mut write_headers = false;
if !path.exists() {
File::create(&path)?;
write_headers = true;
}

csv.serialize(repo)?;
let file = OpenOptions::new()
.append(true)
.open(path)?;

let mut writer = csv::WriterBuilder::new()
.has_headers(write_headers)
.from_writer(file);

for (_, repo) in repos {
writer.serialize(repo)?;
}

Ok(())
});
}

js.join_all().await.into_iter().collect::<Result<(), _>>()?;

// Clear the map
repos.iter_mut().for_each(|(_, m)| m.clear());

Ok(())
}

pub async fn set_last_id(&self, forge: Forge, n: usize) -> color_eyre::Result<()> {
self.state_cache.0[forge].store(n, std::sync::atomic::Ordering::SeqCst);

self.store_csv().await?;
self.store_state_cache().await?;

Ok(())
}

pub async fn store_repo(&self, forge: Forge, repo: Repo) {
let mut repos_state = self.repos_state.lock().await;
repos_state[forge].insert(repo.name.clone(), repo);
}
}

Loading

0 comments on commit dd071a8

Please sign in to comment.