Skip to content

Commit

Permalink
Switch to writing segmented profiles as v1 format. (#1359)
Browse files Browse the repository at this point in the history
## Description

Platform side WhyLabs can handle processing segmented profiles in v1
format, so we should stop converting automatically on upload in the
whylabs writer.

Fixes #1360

## Changes

* Switch default behavior to writing segmented profiles as v1 format in
whylabs writer.
* minor simpification
* log warning if passing use_v0 parameter.
Also published a dev build of these changes here:
```
!pip install whylogs[all]==1.3.4.dev0
```

- [x] I have reviewed the [Guidelines for Contributing](CONTRIBUTING.md)
and the [Code of Conduct](CODE_OF_CONDUCT.md).
  • Loading branch information
jamie256 authored Sep 7, 2023
1 parent 5a2122c commit 0f96daf
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 11 deletions.
13 changes: 2 additions & 11 deletions python/whylogs/api/writer/whylabs.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,10 +526,7 @@ def _write_segmented_reference_result_set(self, file: SegmentedResultSet, **kwar
upload_statuses = list()
for view, url in zip(files, upload_urls):
with tempfile.NamedTemporaryFile() as tmp_file:
if kwargs.get("use_v0") is None or kwargs.get("use_v0"):
view.write(file=tmp_file, use_v0=True)
else:
view.write(file=tmp_file)
view.write(file=tmp_file)
tmp_file.flush()
tmp_file.seek(0)

Expand Down Expand Up @@ -587,13 +584,7 @@ def write(self, file: Writable, **kwargs: Any) -> Tuple[bool, str]:
self._dataset_id = kwargs.get("dataset_id")

with tempfile.NamedTemporaryFile() as tmp_file:
# currently whylabs is not ingesting the v1 format of segmented profiles as segmented
# so we default to sending them as v0 profiles if the override `use_v0` is not defined,
# if `use_v0` is defined then pass that through to control the serialization format.
if has_segments and (kwargs.get("use_v0") is None or kwargs.get("use_v0")):
view.write(file=tmp_file, use_v0=True)
else:
view.write(file=tmp_file)
view.write(file=tmp_file)
tmp_file.flush()
tmp_file.seek(0)
utc_now = datetime.datetime.now(datetime.timezone.utc)
Expand Down
1 change: 1 addition & 0 deletions python/whylogs/core/view/segmented_dataset_profile_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ def _write_v1(self, path: Optional[str] = None, **kwargs: Any) -> Tuple[bool, st

def write(self, path: Optional[str] = None, **kwargs: Any) -> Tuple[bool, str]:
if kwargs.get("use_v0"):
logger.warning("writing segmented profile as v0 format, some info may be converted")
return self._write_as_v0_message(path, **kwargs)
else:
return self._write_v1(path, **kwargs)
5 changes: 5 additions & 0 deletions python/whylogs/migration/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,11 @@ def v1_to_dataset_profile_message_v0(
raise NotImplementedError(
f"Conversion of custom v1 segmented profiles to v0 not supported! please use column value segmentation, found tags: {segment_tags}"
)
if profile_view.metadata:
if segment_metadata is not None:
segment_metadata.update(profile_view.metadata)
else:
segment_metadata = profile_view.metadata

properties_v0 = DatasetPropertiesV0(
schema_major_version=1, # https://github.com/whylabs/whylogs/blob/maintenance/0.7.x/src/whylogs/core/datasetprofile.py#L37-L38
Expand Down

0 comments on commit 0f96daf

Please sign in to comment.