Skip to content

Commit

Permalink
Add Github workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
aldahick committed Sep 23, 2023
1 parent 89136b0 commit af877f2
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 4 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
on: push

jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Lint
run: cargo fmt --all -- --check
- name: Build
run: cargo build
55 changes: 55 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
run-name: Release ${{ github.ref_name }}

permissions:
contents: write

on:
push:
tags:
- "v*"

jobs:
build-linux:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build --release
- name: Upload binary
uses: actions/upload-artifact@v3
with:
name: bin-linux
path: ./target/release/jsonfilter

build-windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
- name: Build
run: cargo build --release
- name: Upload binary
uses: actions/upload-artifact@v3
with:
name: bin-windows
path: ./target/release/jsonfilter.exe

create-release:
needs:
- build-linux
- build-windows
runs-on: ubuntu-latest
steps:
- name: Download Linux binary
uses: actions/download-artifact@v3
with:
name: bin-linux
- name: Download Windows binary
uses: actions/download-artifact@v3
with:
name: bin-windows
- name: Create release
uses: softprops/action-gh-release@v1
with:
files: |
jsonfilter
jsonfilter.exe
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@ filters a big file with JSON lines by a key/value and writes to another file
usage:

```text
Usage: jsonfilter --archive <ARCHIVE> --input <INPUT> --output <OUTPUT> --key <KEY> --filter <FILTER>
Usage: jsonfilter [OPTIONS] --archive <ARCHIVE> --input <INPUT> --output <OUTPUT> --key <KEY> --filter <FILTER>
Options:
-a, --archive <ARCHIVE> Optionally provide a zst archive to unpack. If provided, input is the path within the archive
-i, --input <INPUT> Input file - must be newline-separated json
-o, --output <OUTPUT> Output file - newline-separated json will be written to this path
-k, --key <KEY> JSON key to filter on
-f, --filter <FILTER> JSON value to filter on
-q, --quiet If set, do not log progress
-h, --help Print help
-V, --version Print version
```
15 changes: 12 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ mod progress;
struct Args {
/// Optionally provide a zst archive to unpack.
/// If provided, input is the path within the archive.
// #[arg(short, long)]
// archive: String,
#[arg(short, long)]
archive: String,

/// Input file - must be newline-separated json.
#[arg(short, long)]
Expand Down Expand Up @@ -45,19 +45,28 @@ fn main() -> Result<(), Box<dyn Error>> {
let total_size = io::get_size(&args.input)?;
let progress = create_progress_bar(total_size)?;
let mut total_read: u64 = 0;
let mut total_wrote: u64 = 0;
let mut writer = io::write_lines(&args.output)?;
let lines = io::read_lines_buf(&args.input)?;
let key = args.key.as_str();
let filter = args.filter.as_str();
for line_result in lines {
let line = line_result?;
let line_len = line.len() as u64 + 1;
let mut line_clone = line.clone();
let row: simd_json::BorrowedValue = to_borrowed_value(line_clone.as_mut_slice())?;
if is_filtered(&row, key, filter).unwrap_or(false) {
writer.write_all(&line)?;
total_wrote += line_len;
}
total_read += (line.len() + 1) as u64;
total_read += line_len;
progress.set_position(total_read);
}
println!(
"Finished filtering {} GB into {} MB in {} seconds",
total_size,
total_wrote,
progress.elapsed().as_secs()
);
Ok(())
}

0 comments on commit af877f2

Please sign in to comment.