Skip to content

Commit 0e7132c

Browse files
committed
Python/Smart_Open improve read/write API.
Signed-off-by: Pascal Spörri <[email protected]>
1 parent 29c57ef commit 0e7132c

File tree

3 files changed

+37
-31
lines changed

3 files changed

+37
-31
lines changed

src/python/geds_smart_open/src/geds_smart_open/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
from . import geds
1010
from .geds import register_object_store
11+
from .geds import relocate
1112

1213
register_transport(geds)
1314

14-
__all__ = ["GEDS", "register_object_store"]
15+
__all__ = ["GEDS", "register_object_store", "relocate"]

src/python/geds_smart_open/src/geds_smart_open/geds.py

+19-16
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ def __init__(
4949
self.bucket = bucket
5050
self.key = key
5151
self.position = 0
52+
self._size = file.size
5253
self.file = file
5354
self.raw = None
5455
self.line_terminator = line_terminator
@@ -66,7 +67,7 @@ def close(self) -> None:
6667

6768
@property
6869
def size(self) -> int:
69-
return self.file.size
70+
return self._size
7071

7172
@property
7273
def closed(self) -> bool:
@@ -102,7 +103,7 @@ def seek(self, offset: int, whence: int = io.SEEK_SET) -> int:
102103
elif whence == io.SEEK_CUR:
103104
self.position += offset
104105
elif whence == io.SEEK_END:
105-
self.position = self.file.size + offset
106+
self.position = self.size + offset
106107
return self.position
107108

108109
def tell(self) -> int:
@@ -119,24 +120,22 @@ def read(self, limit: int = -1):
119120
"""
120121
self.checkClosed()
121122
self.checkReadable()
122-
maxcount = self.file.size - self.position
123+
maxcount = self.size - self.position
123124
assert maxcount >= 0
124125
count = limit
125126
if limit == 0:
126127
return b""
127128
if limit < 0 or limit > maxcount:
128129
count = maxcount
129-
buffer = bytearray(count)
130-
count = self.readinto(buffer)
131-
if count < len(buffer):
132-
return buffer[0:count]
133-
return buffer
130+
131+
count = maxcount - self.position
132+
return self.file.read1(self.position, count)
134133

135134
def readinto(self, buffer):
136135
self.checkReadable()
137136
if self.closed:
138137
return -1
139-
count = self.file.read(buffer, self.position, len(buffer))
138+
count = self.file.readinto1(buffer, self.position, len(buffer))
140139
self.position += count
141140
return count
142141

@@ -148,13 +147,13 @@ def readline(self, limit: int = -1) -> bytes:
148147
print("readline " + limit)
149148
if limit != -1:
150149
raise NotImplementedError("limits other than -1 not implemented yet")
151-
buffer = bytearray(self.buffer_size)
150+
# buffer = bytearray(self.buffer_size)
152151
line = io.BytesIO()
153152

154153
while True:
155154
previous_position = self.position
156-
count = self.readinto(buffer)
157-
if count == 0:
155+
buffer = self.file.read(self.position, self.buffer_size)
156+
if len(buffer) == 0:
158157
break
159158
index = buffer.find(self.line_terminator, 0)
160159
if index > 0:
@@ -167,7 +166,7 @@ def readline(self, limit: int = -1) -> bytes:
167166
def readall(self) -> bytes:
168167
self.checkClosed()
169168

170-
length = self.file.size - self.position
169+
length = self.size - self.position
171170
buffer = bytearray(length)
172171
count = self.readinto(buffer)
173172
return buffer[0:count]
@@ -290,6 +289,8 @@ def register_object_store(
290289
):
291290
GEDSInstance.register_object_store(bucket, endpoint_url, access_key, secret_key)
292291

292+
def relocate(force: bool = False):
293+
GEDSInstance.get().relocate(force)
293294

294295
def parse_uri(uri: str):
295296
path = uri.removeprefix("geds://")
@@ -328,10 +329,12 @@ def open(bucket: str, key: str, mode: str, client=None):
328329
if mode == constants.READ_BINARY:
329330
f = client.open(bucket, key)
330331
elif mode == constants.WRITE_BINARY:
331-
try:
332+
f = client.create(bucket, key, True)
333+
elif mode == 'ab':
334+
f = client.open(bucket, key)
335+
if not f.writable():
336+
client.copy(bucket, key, bucket, key)
332337
f = client.open(bucket, key)
333-
except:
334-
f = client.create(bucket, key)
335338
else:
336339
raise ValueError(f"Invalid argument for mode: {mode}")
337340
return GEDSRawInputBase(

src/python/wrapper.cpp

+16-14
Original file line numberDiff line numberDiff line change
@@ -188,25 +188,27 @@ PYBIND11_MODULE(pygeds, m) {
188188
return self.setMetadata(reinterpret_cast<const uint8_t *>(buffer), length, seal);
189189
},
190190
py::arg("buffer"), py::arg("length") = std::nullopt, py::arg("seal") = true)
191-
.def("read",
192-
[](GEDSFile &self, py::buffer buffer, size_t position,
193-
size_t length) -> absl::StatusOr<size_t> {
191+
.def("read1",
192+
[](GEDSFile &self, size_t position,
193+
size_t length) -> absl::StatusOr<py::array_t<uint8_t>> {
194+
auto result = py::array_t<uint8_t>(length);
195+
py::buffer_info buffer = result.request(true);
196+
py::gil_scoped_release release;
197+
auto status = self.read(static_cast<uint8_t *>(buffer.ptr), position, length);
198+
if (!status.ok()) {
199+
return status.status();
200+
}
201+
result.resize({*status});
202+
return result;
203+
})
204+
.def("readinto1",
205+
[](GEDSFile &self, py::buffer buffer, size_t position) -> absl::StatusOr<size_t> {
194206
py::buffer_info info = buffer.request(true);
195207
if (info.ndim != 1) {
196208
return absl::FailedPreconditionError("Buffer has wrong dimensions!");
197209
}
198-
if ((size_t)info.size < length) {
199-
return absl::FailedPreconditionError("The buffer does not have sufficient space!");
200-
}
201-
length = std::min<size_t>(info.size, length);
202-
py::gil_scoped_release release;
203-
return self.read(static_cast<uint8_t *>(info.ptr), position, length);
204-
})
205-
.def("read",
206-
[](GEDSFile &self, char *array, size_t position,
207-
size_t length) -> absl::StatusOr<size_t> {
208210
py::gil_scoped_release release;
209-
return self.read(reinterpret_cast<uint8_t *>(array), position, length);
211+
return self.read(static_cast<uint8_t *>(info.ptr), position, info.size);
210212
})
211213
.def("write",
212214
[](GEDSFile &self, const py::buffer buffer, size_t position,

0 commit comments

Comments
 (0)