From b83ad3aae1cdf9a1750201cbf9b3ccd4ac3a4192 Mon Sep 17 00:00:00 2001 From: Joel Natividad <1980690+jqnatividad@users.noreply.github.com> Date: Thu, 25 Apr 2024 05:10:32 -0400 Subject: [PATCH] `benchmarks`: v4.2.0 addl benchmarks for `excel` & `frequency` renamed sqlp_vs_duckdb benchmarks so they're next to each other for comparison [skip ci[ --- scripts/benchmarks.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/benchmarks.sh b/scripts/benchmarks.sh index 31ac96b30..cf764357c 100755 --- a/scripts/benchmarks.sh +++ b/scripts/benchmarks.sh @@ -42,14 +42,14 @@ arg_pat="$1" # the version of this script -bm_version=4.1.0 +bm_version=4.2.0 # CONFIGURABLE VARIABLES --------------------------------------- # change as needed to reflect your environment/workloads # the path to the qsv binary, change this if you're not using the prebuilt binaries # e.g. you compiled a tuned version of qsv with different features and/or CPU optimizations enabled -# qsv_bin=../target/release/qsv +# qsv_bin=../target/release/qsvlite # qsv_bin=../target/debug/qsv qsv_bin=qsv # the path to the qsv binary that we dogfood to run the benchmarks @@ -471,6 +471,7 @@ run enum_uuid "$qsv_bin" enum --uuid "$data" run enum_constant "$qsv_bin" enum --constant "NYC" "$data" run enum_copy "$qsv_bin" enum --copy Agency "$data" run excel "$qsv_bin" excel benchmark_data.xlsx +run excel_error_format_formula "$qsv_bin" excel --error-format both benchmark_data.xlsx run excel_metadata "$qsv_bin" excel --metadata c benchmark_data.xlsx run excel_metadata_short "$qsv_bin" excel --metadata s benchmark_data.xlsx run exclude "$qsv_bin" exclude \'Incident Zip\' "$data" \'Incident Zip\' data_to_exclude.csv @@ -494,6 +495,10 @@ run fmt_no_final_newline "$qsv_bin" fmt --no-final-newline "$data" run foreach "$qsv_bin" foreach City "echo {}" "$data" run frequency "$qsv_bin" frequency "$data" run --index frequency_index "$qsv_bin" frequency "$data" +run frequency_no_limit "$qsv_bin" frequency --limit 0 "$data" +run --index frequency_no_limit_index "$qsv_bin" frequency --limit 0 "$data" +run frequency_other_sorted "$qsv_bin" frequency --other-sorted "$data" +run --index frequency_other_sorted_index "$qsv_bin" frequency --other-sorted "$data" run frequency_selregex "$qsv_bin" frequency -s /^R/ "$data" run frequency_j1 "$qsv_bin" frequency -j 1 "$data" run frequency_ignorecase "$qsv_bin" frequency -i "$data" @@ -579,9 +584,9 @@ run --index split_chunks_index "$qsv_bin" split --chunks 20 split_tempdir_chunks run --index split_chunks_index_j1 "$qsv_bin" split --chunks 20 -j 1 split_tempdir_chunks_idx_j1 run sqlp "$qsv_bin" sqlp "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"' run sqlp_aggregations "$qsv_bin" sqlp "$data" -Q '"select Borough, count(*) from _t_1 where \"Complaint Type\"='\''Noise'\'' group by Borough"' -run sqlp_vs_duckdb_aggregations duckdb :memory: '"select Borough, count(*) from read_csv_auto('\'''$data''\'') where \"Complaint Type\"='\''Noise'\'' group by Borough"' +run sqlp_aggregations_vs_duckdb duckdb :memory: '"select Borough, count(*) from read_csv_auto('\'''$data''\'') where \"Complaint Type\"='\''Noise'\'' group by Borough"' run sqlp_aggregations_expensive "$qsv_bin" sqlp SKIP_INPUT -Q expensive.sql -run sqlp_vs_duckdb_aggregations_expensive duckdb :memory: -c \".read expensiveduckdb.sql\" +run sqlp_aggregations_expensive_vs_duckdb duckdb :memory: -c \".read expensiveduckdb.sql\" run sqlp_format_arrow "$qsv_bin" sqlp --format arrow "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"' run sqlp_format_avro "$qsv_bin" sqlp --format avro "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"' run sqlp_format_json "$qsv_bin" sqlp --format json "$data" -Q '"select * from _t_1 where \"Complaint Type\"='\''Noise'\'' and Borough='\''BROOKLYN'\''"'