diff --git a/python/kvikio/kvikio/benchmarks/http_io.py b/python/kvikio/kvikio/benchmarks/http_io.py index 68d4643004..446fb80665 100644 --- a/python/kvikio/kvikio/benchmarks/http_io.py +++ b/python/kvikio/kvikio/benchmarks/http_io.py @@ -48,7 +48,7 @@ def main(args): cupy.arange(10) # Make sure CUDA is initialized kvikio.defaults.num_threads_reset(args.nthreads) - print("Roundtrip benchmark") + print("Http IO benchmark") print("--------------------------------------") print(f"nelem | {args.nelem} ({format_bytes(args.nbytes)})") print(f"dtype | {args.dtype}") diff --git a/python/kvikio/kvikio/benchmarks/s3_io.py b/python/kvikio/kvikio/benchmarks/s3_io.py index 4311f5f012..acca5ba55b 100644 --- a/python/kvikio/kvikio/benchmarks/s3_io.py +++ b/python/kvikio/kvikio/benchmarks/s3_io.py @@ -102,32 +102,9 @@ def run() -> float: yield run() -def run_cudf(args, libcudf_s3_io: bool): - import cudf - - cudf.set_option("libcudf_s3_io", libcudf_s3_io) - - # Upload data to S3 server - create_client_and_bucket() - data = cupy.random.rand(args.nelem).astype(args.dtype) - df = cudf.DataFrame({"a": data}) - df.to_parquet(f"s3://{args.bucket}/data1") - - def run() -> float: - t0 = time.perf_counter() - cudf.read_parquet(f"s3://{args.bucket}/data1") - t1 = time.perf_counter() - return t1 - t0 - - for _ in range(args.nruns): - yield run() - - API = { - "cupy-kvikio": partial(run_numpy_like, xp=cupy), - "numpy-kvikio": partial(run_numpy_like, xp=numpy), - "cudf-kvikio": partial(run_cudf, libcudf_s3_io=True), - "cudf-fsspec": partial(run_cudf, libcudf_s3_io=False), + "cupy": partial(run_numpy_like, xp=cupy), + "numpy": partial(run_numpy_like, xp=numpy), } @@ -138,7 +115,7 @@ def main(args): os.environ["KVIKIO_NTHREADS"] = str(args.nthreads) kvikio.defaults.num_threads_reset(args.nthreads) - print("Roundtrip benchmark") + print("S3 IO benchmark") print("--------------------------------------") print(f"nelem | {args.nelem} ({format_bytes(args.nbytes)})") print(f"dtype | {args.dtype}") diff --git a/python/kvikio/tests/test_benchmarks.py b/python/kvikio/tests/test_benchmarks.py index 5b5602e53a..307b0b258d 100644 --- a/python/kvikio/tests/test_benchmarks.py +++ b/python/kvikio/tests/test_benchmarks.py @@ -109,3 +109,32 @@ def test_http_io(run_cmd, api): cwd=benchmarks_path, ) assert retcode == 0 + + +@pytest.mark.parametrize( + "api", + [ + "cupy", + "numpy", + ], +) +def test_s3_io(run_cmd, api): + """Test benchmarks/s3_io.py""" + + if not kvikio.is_remote_file_available(): + pytest.skip( + "RemoteFile not available, please build KvikIO " + "with libcurl (-DKvikIO_REMOTE_SUPPORT=ON)" + ) + retcode = run_cmd( + cmd=[ + sys.executable, + "http_io.py", + "-n", + "1000", + "--api", + api, + ], + cwd=benchmarks_path, + ) + assert retcode == 0