diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 0000000..0036339 --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,11 @@ +on: push + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Lint + run: cargo fmt --all -- --check + - name: Build + run: cargo build diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..2959236 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,55 @@ +run-name: Release ${{ github.ref_name }} + +permissions: + contents: write + +on: + push: + tags: + - "v*" + +jobs: + build-linux: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Build + run: cargo build --release + - name: Upload binary + uses: actions/upload-artifact@v3 + with: + name: bin-linux + path: ./target/release/jsonfilter + + build-windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v4 + - name: Build + run: cargo build --release + - name: Upload binary + uses: actions/upload-artifact@v3 + with: + name: bin-windows + path: ./target/release/jsonfilter.exe + + create-release: + needs: + - build-linux + - build-windows + runs-on: ubuntu-latest + steps: + - name: Download Linux binary + uses: actions/download-artifact@v3 + with: + name: bin-linux + - name: Download Windows binary + uses: actions/download-artifact@v3 + with: + name: bin-windows + - name: Create release + uses: softprops/action-gh-release@v1 + with: + files: | + jsonfilter + jsonfilter.exe diff --git a/README.md b/README.md index 959e542..56c824c 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ filters a big file with JSON lines by a key/value and writes to another file usage: ```text -Usage: jsonfilter --archive --input --output --key --filter +Usage: jsonfilter [OPTIONS] --archive --input --output --key --filter Options: -a, --archive Optionally provide a zst archive to unpack. If provided, input is the path within the archive @@ -13,6 +13,7 @@ Options: -o, --output Output file - newline-separated json will be written to this path -k, --key JSON key to filter on -f, --filter JSON value to filter on + -q, --quiet If set, do not log progress -h, --help Print help -V, --version Print version ``` diff --git a/src/main.rs b/src/main.rs index 24f7598..abde246 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,8 +11,8 @@ mod progress; struct Args { /// Optionally provide a zst archive to unpack. /// If provided, input is the path within the archive. - // #[arg(short, long)] - // archive: String, + #[arg(short, long)] + archive: String, /// Input file - must be newline-separated json. #[arg(short, long)] @@ -45,19 +45,28 @@ fn main() -> Result<(), Box> { let total_size = io::get_size(&args.input)?; let progress = create_progress_bar(total_size)?; let mut total_read: u64 = 0; + let mut total_wrote: u64 = 0; let mut writer = io::write_lines(&args.output)?; let lines = io::read_lines_buf(&args.input)?; let key = args.key.as_str(); let filter = args.filter.as_str(); for line_result in lines { let line = line_result?; + let line_len = line.len() as u64 + 1; let mut line_clone = line.clone(); let row: simd_json::BorrowedValue = to_borrowed_value(line_clone.as_mut_slice())?; if is_filtered(&row, key, filter).unwrap_or(false) { writer.write_all(&line)?; + total_wrote += line_len; } - total_read += (line.len() + 1) as u64; + total_read += line_len; progress.set_position(total_read); } + println!( + "Finished filtering {} GB into {} MB in {} seconds", + total_size, + total_wrote, + progress.elapsed().as_secs() + ); Ok(()) }