diff --git a/pyproject.toml b/pyproject.toml index 344efde25..ce53f35c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"] [project] name = "kili" -version = "2.156.2" +version = "2.157.4" description = "Python client for Kili Technology labeling tool" readme = "README.md" authors = [{ name = "Kili Technology", email = "contact@kili-technology.com" }] diff --git a/src/kili/__init__.py b/src/kili/__init__.py index 13d75e2a0..f99cb465a 100644 --- a/src/kili/__init__.py +++ b/src/kili/__init__.py @@ -1,3 +1,3 @@ """Kili Python SDK.""" -__version__ = "2.156.2" +__version__ = "2.157.4" diff --git a/src/kili/entrypoints/mutations/asset/__init__.py b/src/kili/entrypoints/mutations/asset/__init__.py index f5b8f182d..a3019df4b 100644 --- a/src/kili/entrypoints/mutations/asset/__init__.py +++ b/src/kili/entrypoints/mutations/asset/__init__.py @@ -40,7 +40,7 @@ class MutationsAsset(BaseOperationEntrypointMixin): def append_many_to_dataset( self, project_id: str, - content_array: Optional[Union[List[str], List[dict]]] = None, + content_array: Optional[Union[List[str], List[dict], List[List[dict]]]] = None, multi_layer_content_array: Optional[List[List[dict]]] = None, external_id_array: Optional[List[str]] = None, id_array: Optional[List[str]] = None, diff --git a/src/kili/presentation/client/label.py b/src/kili/presentation/client/label.py index e9eb82733..6c4bc3663 100644 --- a/src/kili/presentation/client/label.py +++ b/src/kili/presentation/client/label.py @@ -1229,7 +1229,7 @@ def is_rectangle(coco_annotation, coco_image, kili_annotation): self, # pyright: ignore[reportGeneralTypeIssues] asset_ids=resolved_asset_ids, project_id=ProjectId(project_id), - export_type="latest", + export_type="normal" if fmt == "llm_v1" else "latest", label_format=fmt, split_option=layout, single_file=single_file, diff --git a/src/kili/services/asset_import/helpers.py b/src/kili/services/asset_import/helpers.py index 3de100de0..cc0415888 100644 --- a/src/kili/services/asset_import/helpers.py +++ b/src/kili/services/asset_import/helpers.py @@ -2,6 +2,8 @@ import warnings +SEPARATOR = "___" + def is_chat_format(data, required_keys): """Checks if llm file data is in chat format.""" @@ -43,8 +45,8 @@ def process_json(data): if item["content"] is None: raise ValueError("Chat item content cannot be null.") - # Check if the model is null (indicating a prompt) - if item["model"] is None: + # Check if the prompt comes from a user or a model + if item["role"] in ["user", "system"]: # If there's an existing prompt being processed, add it to the prompts list if current_prompt is not None: transformed_data["prompts"].append( @@ -68,8 +70,9 @@ def process_json(data): "title": item["role"], } ) - # Collect model for this item - models.append(item["model"]) + # Add model if not None + if item["model"] is not None: + models.append(item["model"]) if current_prompt is None: raise ValueError( @@ -85,12 +88,12 @@ def process_json(data): } ) - chat_item_ids = "_".join(item_ids) + chat_item_ids = SEPARATOR.join(item_ids) # Prepare additional_json_metadata additional_json_metadata = { "chat_id": chat_id, - "models": "_".join(models[-2:]), # Join the last two models + "models": SEPARATOR.join(models[-len(completions) :]), # Join the evaluated models "chat_item_ids": chat_item_ids, # Concatenate all item IDs "text": f"Chat_id: {chat_id}\n\nChat_item_ids: {chat_item_ids}", } diff --git a/src/kili/services/export/format/llm/__init__.py b/src/kili/services/export/format/llm/__init__.py index f719b2422..6390f548b 100644 --- a/src/kili/services/export/format/llm/__init__.py +++ b/src/kili/services/export/format/llm/__init__.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Dict, List, Optional, Union +from kili.services.asset_import.helpers import SEPARATOR from kili.services.export.exceptions import NotCompatibleInputType from kili.services.export.format.base import AbstractExporter from kili.services.types import Job @@ -53,8 +54,6 @@ def process(self, assets: List[Dict]) -> List[Dict[str, Union[List[str], str]]]: def _process(self, assets: List[Dict]) -> List[Dict[str, Union[List[str], str]]]: result = [] for asset in assets: - jobs_config = self.project["jsonInterface"]["jobs"] - latest_label = asset["latestLabel"] result.append( { "raw_data": _format_raw_data(asset), @@ -62,14 +61,19 @@ def _process(self, assets: List[Dict]) -> List[Dict[str, Union[List[str], str]]] "external_id": asset["externalId"], "metadata": asset["jsonMetadata"], "labels": [ - { - "author": latest_label["author"]["email"], - "created_at": latest_label["createdAt"], - "label_type": latest_label["labelType"], - "label": _format_json_response( - jobs_config, latest_label["jsonResponse"] - ), - } + list( + map( + lambda label: { + "author": label["author"]["email"], + "created_at": label["createdAt"], + "label_type": label["labelType"], + "label": _format_json_response( + self.project["jsonInterface"]["jobs"], label["jsonResponse"] + ), + }, + asset["labels"], + ) + ) ], } ) @@ -110,7 +114,7 @@ def _format_raw_data(asset) -> List[Dict]: and isinstance(asset["jsonMetadata"]["chat_item_ids"], str) and len(asset["jsonMetadata"]["chat_item_ids"]) > 0 ): - chat_items_ids = str.split(asset["jsonMetadata"]["chat_item_ids"], "_") + chat_items_ids = str.split(asset["jsonMetadata"]["chat_item_ids"], SEPARATOR) else: chat_items_ids = [] @@ -119,7 +123,7 @@ def _format_raw_data(asset) -> List[Dict]: and isinstance(asset["jsonMetadata"]["models"], str) and len(asset["jsonMetadata"]["models"]) > 0 ): - models = str.split(asset["jsonMetadata"]["models"], "_") + models = str.split(asset["jsonMetadata"]["models"], SEPARATOR) else: models = [] diff --git a/tests/unit/services/asset_import/test_import_llm.py b/tests/unit/services/asset_import/test_import_llm.py index 096c0427c..ac3ed5a40 100644 --- a/tests/unit/services/asset_import/test_import_llm.py +++ b/tests/unit/services/asset_import/test_import_llm.py @@ -45,7 +45,7 @@ def test_upload_from_one_local_file_in_chat_format(self, *_): [False], [""], [ - '{"chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large_model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' + '{"chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large___model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' ], ) self.kili.graphql_client.execute.assert_called_with(*expected_parameters) @@ -69,7 +69,7 @@ def test_upload_from_one_local_file_in_chat_format_with_given_json_metadata(self [False], [""], [ - '{"customKey": "customValue", "chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large_model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' + '{"customKey": "customValue", "chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large___model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' ], ) self.kili.graphql_client.execute.assert_called_with(*expected_parameters) @@ -155,7 +155,7 @@ def test_upload_from_dict_in_chat_format(self, *_): [False], [""], [ - '{"chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large_model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' + '{"chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large___model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' ], ) self.kili.graphql_client.execute.assert_called_with(*expected_parameters) @@ -213,7 +213,7 @@ def test_upload_from_dict_in_chat_format_with_json_metadata(self, *_): [False], [""], [ - '{"customKey": "customValue", "chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large_model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' + '{"customKey": "customValue", "chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", "models": "model-large___model-medium", "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\\n\\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2"}' ], ) self.kili.graphql_client.execute.assert_called_with(*expected_parameters) @@ -255,8 +255,8 @@ def test_process_json(self, *_): }, { "chat_id": "6e4094947af4902cd252421aba9a077e8e4402dd", - "models": "model-large_model-medium", - "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", - "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\n\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571_4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d_7326ff17cbfe7e3cb91b008cf0c496fcd17a1074_375b55d44af2c8c801992089c797df8e12605dfb_9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", + "models": "model-large___model-medium", + "chat_item_ids": "0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", + "text": "Chat_id: 6e4094947af4902cd252421aba9a077e8e4402dd\n\nChat_item_ids: 0455df65c2d6bb821a9dc9108ac1d79964a0f571___4c28c86c4b22b3397691ce5cf27197fcf7e8fb2d___7326ff17cbfe7e3cb91b008cf0c496fcd17a1074___375b55d44af2c8c801992089c797df8e12605dfb___9231d8819ac96cc8a6c4b7780c301b796c3f8bf2", }, )