|
| 1 | +import os |
| 2 | + |
| 3 | +import pytest |
| 4 | +from smart_open import open as smart_open |
| 5 | + |
| 6 | +import src.util.file_util as file_util |
| 7 | + |
| 8 | + |
| 9 | +def create_file(root_path, file_path): |
| 10 | + full_path = os.path.join(root_path, file_path) |
| 11 | + |
| 12 | + if not file_util.is_s3_path(str(full_path)): |
| 13 | + os.makedirs(os.path.dirname(full_path), exist_ok=True) |
| 14 | + |
| 15 | + with smart_open(full_path, mode="w") as outfile: |
| 16 | + outfile.write("hello") |
| 17 | + |
| 18 | + return full_path |
| 19 | + |
| 20 | + |
| 21 | +@pytest.mark.parametrize( |
| 22 | + "path,is_s3", |
| 23 | + [ |
| 24 | + ("s3://bucket/folder/test.txt", True), |
| 25 | + ("./relative/folder/test.txt", False), |
| 26 | + ("http://example.com/test.txt", False), |
| 27 | + ], |
| 28 | +) |
| 29 | +def test_is_s3_path(path, is_s3): |
| 30 | + assert file_util.is_s3_path(path) is is_s3 |
| 31 | + |
| 32 | + |
| 33 | +@pytest.mark.parametrize( |
| 34 | + "path,bucket,prefix", |
| 35 | + [ |
| 36 | + ("s3://my_bucket/my_key", "my_bucket", "my_key"), |
| 37 | + ("s3://my_bucket/path/to/directory/", "my_bucket", "path/to/directory/"), |
| 38 | + ("s3://my_bucket/path/to/file.txt", "my_bucket", "path/to/file.txt"), |
| 39 | + ], |
| 40 | +) |
| 41 | +def test_split_s3_url(path, bucket, prefix): |
| 42 | + assert file_util.split_s3_url(path) == (bucket, prefix) |
| 43 | + |
| 44 | + |
| 45 | +@pytest.mark.parametrize( |
| 46 | + "path,bucket", |
| 47 | + [ |
| 48 | + ("s3://bucket/folder/test.txt", "bucket"), |
| 49 | + ("s3://bucket_x/folder", "bucket_x"), |
| 50 | + ("s3://bucket-y/folder/", "bucket-y"), |
| 51 | + ("s3://bucketz", "bucketz"), |
| 52 | + ], |
| 53 | +) |
| 54 | +def test_get_s3_bucket(path, bucket): |
| 55 | + assert file_util.get_s3_bucket(path) == bucket |
| 56 | + |
| 57 | + |
| 58 | +@pytest.mark.parametrize( |
| 59 | + "path,file_key", |
| 60 | + [ |
| 61 | + ("s3://bucket/folder/test.txt", "folder/test.txt"), |
| 62 | + ("s3://bucket_x/file.csv", "file.csv"), |
| 63 | + ("s3://bucket-y/folder/path/to/abc.zip", "folder/path/to/abc.zip"), |
| 64 | + ("./folder/path", "/folder/path"), |
| 65 | + ("sftp://folder/filename", "filename"), |
| 66 | + ], |
| 67 | +) |
| 68 | +def test_get_s3_file_key(path, file_key): |
| 69 | + assert file_util.get_s3_file_key(path) == file_key |
| 70 | + |
| 71 | + |
| 72 | +@pytest.mark.parametrize( |
| 73 | + "path,file_name", |
| 74 | + [ |
| 75 | + ("s3://bucket/folder/test.txt", "test.txt"), |
| 76 | + ("s3://bucket_x/file.csv", "file.csv"), |
| 77 | + ("s3://bucket-y/folder/path/to/abc.zip", "abc.zip"), |
| 78 | + ("./folder/path", "path"), |
| 79 | + ("sftp://filename", "filename"), |
| 80 | + ], |
| 81 | +) |
| 82 | +def test_get_s3_file_name(path, file_name): |
| 83 | + assert file_util.get_file_name(path) == file_name |
0 commit comments