Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: set bootstrap data locally instead of downloading #153

Merged
merged 1 commit into from
Sep 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ pest_derive = "2.6.0"
bytes = "1.4.0"
itertools = "0.10.5"
console = "0.15.7"
brotli = "3.3.4"

[dev-dependencies]
assert_cmd = "2.0.2" # contains helpers make executing the main binary on integration tests easier.
Expand Down
139 changes: 27 additions & 112 deletions src/bootstrap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@
use std::{
collections::HashMap,
error, fmt,
fs::{create_dir_all, read_to_string, File},
io::{copy, Error as IOError, Write},
path::PathBuf,
io::{Cursor, Error as IOError, Read},
thread, time,
};

Expand All @@ -30,8 +28,8 @@ use rusoto_dynamodb::{
AttributeValue, BatchWriteItemError, CreateTableError, PutRequest, WriteRequest,
};
use rusoto_signature::Region;
use tempfile::Builder;

use brotli::Decompressor;
use serde_json::Value as JsonValue;

use super::app;
Expand All @@ -43,8 +41,6 @@ use super::data;
struct / enum / const
================================================= */

const USER_AGENT: &str = concat!("dynein/", env!("CARGO_PKG_VERSION"));

#[derive(Debug)]
pub enum DyneinBootstrapError {
LoadData(IOError),
Expand Down Expand Up @@ -152,32 +148,20 @@ e.g. https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/GettingSta
"
);

// Step 1. Create tables
// Step 1. create tables
prepare_table(&cx, "Movie", vec!["year,N", "title,S"].as_ref()).await;

// Step 2. Download & unzip data. The sampledata.zip contains 4 files.
let url =
"https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/samples/moviedata.zip";
let download_dir: tempfile::TempDir = download_and_extract_zip(url).await?;
let content = read_to_string(download_dir.path().join("moviedata.json"))?;
/*
moviedata.json (103494 lines, 3.5M bytes)
The JSON file is not a DynamoDB style JSON, but standard JSON format like below:
[
{
"year": 2013,
"title": "Rush",
"info": {
"directors": ["Ron Howard"],
"release_date": "2013-09-02T00:00:00Z",
"rating": 8.3,
"genres": [
"Action",
*/

// Step 3. wait tables to be created and in ACTIVE status
// Step 2. wait tables to be created and in ACTIVE status
wait_table_creation(&cx, vec!["Movie"]).await;

// Step 3. decompress data
let compressed_data = include_bytes!("./resources/bootstrap/moviedata.json.br");
let cursor = Cursor::new(compressed_data);
let mut decompressor = Decompressor::new(cursor, 4096);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As in this example, set the buffer size to 4096

let mut content = String::new();
decompressor.read_to_string(&mut content)?;

// Step 4. load data into tables
/*
Array([
Object({
Expand Down Expand Up @@ -268,36 +252,26 @@ https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/AppendixSampleT
prepare_table(&cx, table_name, keys).await
}

/* Step 2. Download & unzip data. The sampledata.zip contains 4 files.
https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/samples/sampledata.zip
- Forum.json (23 lines)
- ProductCatalog.json (306 lines)
- Reply.json (75 lines)
- Thread.json (129 lines)

These JSON files are already BatchWriteItem format. e.g. Forum.json
{ "Forum": [
{ "PutRequest":
{ "Item": {
"Name": {"S":"Amazon DynamoDB"},
"Category": {"S":"Amazon Web Services"},
"Threads": {"N":"2"},
"Messages": {"N":"4"}, ...
*/
let url =
"https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/samples/sampledata.zip";
let download_dir: tempfile::TempDir = download_and_extract_zip(url).await?;

// Step 3. wait tables to be created and in ACTIVE status
// Step 2. wait tables to be created and in ACTIVE status
let creating_table_names: Vec<&str> = tables.clone().iter().map(|pair| pair.0).collect();
wait_table_creation(&cx, creating_table_names).await;

// Step 4. load data into tables
println!("Tables are ready and retrieved sample data locally. Now start writing data into samle tables...");
for (table_name, _) in &tables {
let content: String =
read_to_string(download_dir.path().join(format!("{}.json", table_name)))?;
let request_items = batch::build_batch_request_items(content)?;
// Step 3. decompress data
let compressed_data = match *table_name {
"ProductCatalog" => &include_bytes!("./resources/bootstrap/ProductCatalog.json.br")[..],
"Forum" => &include_bytes!("./resources/bootstrap/Forum.json.br")[..],
"Thread" => &include_bytes!("./resources/bootstrap/Thread.json.br")[..],
"Reply" => &include_bytes!("./resources/bootstrap/Reply.json.br")[..],
_ => panic!("No such table name: {}", table_name),
};
let cursor = Cursor::new(compressed_data);
let mut decompressor = Decompressor::new(cursor, 4096);
let mut content = String::new();
decompressor.read_to_string(&mut content)?;
// Step 4. load data into tables
let request_items = batch::build_batch_request_items(content.to_string())?;
batch::batch_write_untill_processed(cx.clone(), request_items).await?;
}
println!(
Expand Down Expand Up @@ -360,65 +334,6 @@ async fn prepare_table(cx: &app::Context, table_name: &str, keys: &[&str]) {
}
}

async fn download_and_extract_zip(target: &str) -> Result<tempfile::TempDir, DyneinBootstrapError> {
let tmpdir: tempfile::TempDir = Builder::new().tempdir()?;
debug!("temporary download & unzip directory: {:?}", &tmpdir);

println!("Temporarily downloading sample data from {}", target);

let clinet = reqwest::ClientBuilder::new()
.user_agent(USER_AGENT)
.build()?;
let res_bytes = clinet
.get(target)
.send()
.await?
.error_for_status()?
.bytes()
.await?;
let fpath: PathBuf = tmpdir.path().join("downloaded_sampledata.zip");
debug!("Downloading the file at: {}", &fpath.display());
let mut zfile: File = File::create(fpath.clone())?;
zfile.write_all(&res_bytes)?;
debug!(
"Finished writing content of the downloaded data into '{}'",
&fpath.display()
);

let mut zarchive = zip::ZipArchive::new(File::open(fpath)?)?;
debug!("Opened the zip archive File just written: {:?}", zarchive);

for i in 0..zarchive.len() {
let mut f: zip::read::ZipFile<'_> = zarchive.by_index(i)?;
debug!("target ZipFile name: {}", f.name());
let unzipped_fpath = tmpdir.path().join(f.name());
debug!(
"[file #{}] file in the archive is: {}",
&i,
unzipped_fpath.display()
);

// create a directory if target file is a directory (ends with '/').
if (*f.name()).ends_with('/') {
create_dir_all(&unzipped_fpath)?
} else {
// create missing parent directory before diving into actual file
if let Some(p) = unzipped_fpath.parent() {
if !p.exists() {
create_dir_all(p)?;
}
}

// create unzipped file
let mut out = File::create(&unzipped_fpath)?;
copy(&mut f, &mut out)?;
debug!("[file #{}] done extracting file.", &i);
}
}

Ok(tmpdir)
}

async fn wait_table_creation(cx: &app::Context, mut processing_tables: Vec<&str>) {
debug!("tables in progress: {:?}", processing_tables);
loop {
Expand Down
Binary file added src/resources/bootstrap/Forum.json.br
Binary file not shown.
Binary file added src/resources/bootstrap/ProductCatalog.json.br
Binary file not shown.
Binary file added src/resources/bootstrap/Reply.json.br
Binary file not shown.
Binary file added src/resources/bootstrap/Thread.json.br
Binary file not shown.
Binary file added src/resources/bootstrap/moviedata.json.br
Binary file not shown.
Loading
Loading