roboflow
diff --git a/‎roboflow/__init__.py
+1-1 b/‎roboflow/__init__.py
+1-1
diff --git a/‎roboflow/roboflowpy.py
+2-1 b/‎roboflow/roboflowpy.py
+2-1
diff --git a/‎roboflow/util/folderparser.py
+32-9 b/‎roboflow/util/folderparser.py
+32-9
diff --git a/‎tests/datasets/paligemma/README.dataset.txt
+5 b/‎tests/datasets/paligemma/README.dataset.txt
+5
diff --git a/‎tests/datasets/paligemma/dataset/5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg
39.3 KB b/‎tests/datasets/paligemma/dataset/5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg
39.3 KB
diff --git a/‎tests/datasets/paligemma/dataset/5e2369e237c0c612d09181b63fb20480_png.rf.5be427175f28f7042e34636bd0dd89cc.jpg
33.8 KB b/‎tests/datasets/paligemma/dataset/5e2369e237c0c612d09181b63fb20480_png.rf.5be427175f28f7042e34636bd0dd89cc.jpg
33.8 KB
diff --git a/‎tests/datasets/paligemma/dataset/63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg
34.9 KB b/‎tests/datasets/paligemma/dataset/63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg
34.9 KB
diff --git a/‎tests/datasets/paligemma/dataset/900e8ea2a3c336686c23978e800af239_png.rf.011b80b16a622820ca92b91543f5a44d.jpg
18.7 KB b/‎tests/datasets/paligemma/dataset/900e8ea2a3c336686c23978e800af239_png.rf.011b80b16a622820ca92b91543f5a44d.jpg
18.7 KB
diff --git a/‎tests/datasets/paligemma/dataset/_annotations.test.jsonl
+9 b/‎tests/datasets/paligemma/dataset/_annotations.test.jsonl
+9
diff --git a/‎tests/datasets/paligemma/dataset/_annotations.train.jsonl
+4 b/‎tests/datasets/paligemma/dataset/_annotations.train.jsonl
+4
diff --git a/‎tests/datasets/paligemma/dataset/_annotations.valid.jsonl
+3 b/‎tests/datasets/paligemma/dataset/_annotations.valid.jsonl
+3
diff --git a/‎tests/datasets/paligemma/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg
26.7 KB b/‎tests/datasets/paligemma/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg
26.7 KB
diff --git a/‎tests/datasets/paligemma/dataset/de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg
30 KB b/‎tests/datasets/paligemma/dataset/de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg
30 KB
diff --git a/‎tests/datasets/paligemma/dataset/fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg
36.7 KB b/‎tests/datasets/paligemma/dataset/fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg
36.7 KB
diff --git a/‎tests/manual/debugme.py
+2-1 b/‎tests/manual/debugme.py
+2-1
diff --git a/‎tests/manual/uselocal
+4-1 b/‎tests/manual/uselocal
+4-1
diff --git a/‎tests/manual/useprod
+3-1 b/‎tests/manual/useprod
+3-1
diff --git a/‎tests/manual/usestaging
+3-1 b/‎tests/manual/usestaging
+3-1
diff --git a/‎tests/util/test_folderparser.py
+14 b/‎tests/util/test_folderparser.py
+14
@@ -15,7 +15,7 @@
 from roboflow.models import CLIPModel, GazeModel  # noqa: F401
 from roboflow.util.general import write_line
 
-__version__ = "1.1.44"
+__version__ = "1.1.45"
 
 
 def check_key(api_key, model, notebook, num_retries=0):
 
@@ -47,7 +47,8 @@ def download(args):
 
 
 def import_dataset(args):
-    rf = roboflow.Roboflow()
+    api_key = load_roboflow_api_key(args.workspace)
+    rf = roboflow.Roboflow(api_key)
     workspace = rf.workspace(args.workspace)
     workspace.upload_dataset(
         dataset_path=args.folder,
 
@@ -8,7 +8,7 @@
 from .image_utils import load_labelmap
 
 IMAGE_EXTENSIONS = {".jpg", ".jpeg", ".png", ".bmp"}
-ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv"}
+ANNOTATION_EXTENSIONS = {".txt", ".json", ".xml", ".csv", ".jsonl"}
 LABELMAPS_EXTENSIONS = {".labels", ".yaml", ".yml"}
 
 
@@ -107,13 +107,14 @@ def _map_annotations_to_images_1tomany(images, annotationFiles):
         dirname = image["dirname"]
         annotationsInSameDir = annotationsByDirname.get(dirname, [])
         if annotationsInSameDir:
-            if len(annotationsInSameDir) > 1:
-                print(f"warning: found multiple annotation files on dir {dirname}")
-            annotationFile = annotationsInSameDir[0]
-            format = annotationFile["parsedType"]
-            image["annotationfile"] = _filterIndividualAnnotations(
-                image, annotationFile, format, imgRefMap, annotationMap
-            )
+            for annotationFile in annotationsInSameDir:
+                format = annotationFile["parsedType"]
+                filtered_annotations = _filterIndividualAnnotations(
+                    image, annotationFile, format, imgRefMap, annotationMap
+                )
+                if filtered_annotations:
+                    image["annotationfile"] = filtered_annotations
+                    break
 
 
 def _build_image_and_annotation_maps(annotationFiles):
@@ -182,11 +183,16 @@ def _filterIndividualAnnotations(image, annotation, format, imgRefMap, annotatio
             return _annotation
         else:
             return None
+    elif format == "jsonl":
+        jsonlLines = [json.dumps(line) for line in parsed if line["image"] == image["name"]]
+        if jsonlLines:
+            _annotation = {"name": "annotation.jsonl", "rawText": "\n".join(jsonlLines)}
+            return _annotation
     return None
 
 
 def _loadAnnotations(folder, annotations):
-    valid_extensions = {".json", ".csv"}
+    valid_extensions = {".json", ".csv", ".jsonl"}
     annotations = [a for a in annotations if a["extension"] in valid_extensions]
     for ann in annotations:
         extension = ann["extension"]
@@ -197,12 +203,29 @@ def _loadAnnotations(folder, annotations):
                 if parsedType:
                     ann["parsed"] = parsed
                     ann["parsedType"] = parsedType
+        elif extension == ".jsonl":
+            ann["parsed"] = _read_jsonl(f"{folder}{ann['file']}")
+            ann["parsedType"] = "jsonl"
         elif extension == ".csv":
             ann["parsedType"] = "csv"
             ann["parsed"] = _parseAnnotationCSV(f"{folder}{ann['file']}")
     return annotations
 
 
+def _read_jsonl(path):
+    data = []
+    with open(path) as file:
+        for linenum, line in enumerate(file, 1):
+            if not line:
+                continue
+            try:
+                json_object = json.loads(line.strip())
+                data.append(json_object)
+            except json.JSONDecodeError:
+                print(f"Warning: Skipping invalid JSON line in {path}:{linenum}")
+    return data
+
+
 def _parseAnnotationCSV(filename):
     # TODO: use a proper CSV library?
     with open(filename) as f:
 
@@ -0,0 +1,5 @@
+# ChartQA > 2024-08-28 7:21pm
+https://universe.roboflow.com/roboflow-jvuqo/chartqa-c9zny
+
+Provided by a Roboflow user
+License: CC BY 4.0
@@ -0,0 +1,9 @@
+{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"What region in Italy had the highest number of mafia crimes in 2018?","suffix":"Calabria"}
+{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"How many criminal reports were recorded in the region of Calabria in 2018?","suffix":"896"}
+{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"What region in Italy had the highest number of mafia crimes in 2018?","suffix":"Calabria"}
+{"image":"de960ddd58344041754d5f984f8f82c2_png.rf.011864613b53c6b6a0c0a7086b657a71.jpg","prefix":"How many criminal reports were recorded in the region of Calabria in 2018?","suffix":"896"}
+{"image":"de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg","prefix":"Which sector had the highest ROI in 2013?","suffix":"Retail"}
+{"image":"de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg","prefix":"Which sector had the highest ROI in 2014?","suffix":"Electronics"}
+{"image":"e1893eee3f64bda1eac88da795ad3a00_png.rf.01248d761c27015da1fa5f3c4daea759.jpg","prefix":"How much did Hermes' national general cargo revenue add up to in 2009?","suffix":"100"}
+{"image":"e1893eee3f64bda1eac88da795ad3a00_png.rf.01248d761c27015da1fa5f3c4daea759.jpg","prefix":"How much did Hermes' national general cargo revenue add up to in 2009?","suffix":"100"}
+{"image":"eaab023f1ce380c4c9163415facc3c0d_png.rf.01c5a1f19653c056bbb3b0c8fc2d752d.jpg","prefix":"What's the percentage value of leftmost bar?","suffix":"24"}
@@ -0,0 +1,4 @@
+{"image":"63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg","prefix":"How many research and public policy oriented organizations were there among the registered environmental and conservation organizations in the United States in 2005?","suffix":"372"}
+{"image":"63a6c783083d5c7c7290bc81877a4ee9_png.rf.5c02d037f48bc3df56e6d0e3e6e053e4.jpg","prefix":"How many research and public policy oriented organizations were there among the registered environmental and conservation organizations in the United States in 2005?","suffix":"372"}
+{"image":"5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg","prefix":"What was the crude birth rate in Costa Rica in 2019?","suffix":"13.69"}
+{"image":"5964b4c268577652f171d52dc317d82d_png.rf.5bf49f8aa575f586001710b1d79968fd.jpg","prefix":"What was the crude birth rate in Costa Rica in 2019?","suffix":"13.69"}
@@ -0,0 +1,3 @@
+{"image":"fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg","prefix":"How many murders and manslaughters were recorded by the Belgian police in 2020?","suffix":"874"}
+{"image":"fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg","prefix":"How many murders and manslaughters were recorded by the Belgian police in 2020?","suffix":"874"}
+{"image":"aca6fd05e9b2830518288ba082aa6f76_png.rf.001543e209328197472f6587dfa8a6d6.jpg","prefix":"What was the unemployment rate in Chile in 2020?","suffix":"11.51"}
@@ -41,6 +41,7 @@
         # f"import {thisdir}/data/cultura-pepino-yolov8_voc -w wolfodorpythontests -p yellow-auto -c 100".split()  # noqa: E501 // docs
         # f"import {thisdir}/data/cultura-pepino-yolov5pytorch -w wolfodorpythontests -p yellow-auto -c 100 -n papaiasso".split()  # noqa: E501 // docs
         # f"import {thisdir}/../datasets/mosquitos -w wolfodorpythontests -p yellow-auto -n papaiasso".split()  # noqa: E501 // docs
-        f"deployment list".split()  # noqa: E501 // docs
+        # f"deployment list".split()  # noqa: E501 // docs
+        f"import -w tonyprivate -p meh-plvrv {thisdir}/../datasets/paligemma/".split()  # noqa: E501 // docs
     )
     args.func(args)
@@ -1,5 +1,8 @@
 #!/bin/env bash
-cp data/.config-staging data/.config
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cp $SCRIPT_DIR/data/.config-staging $SCRIPT_DIR/data/.config
 export API_URL=https://localhost.roboflow.one
 export APP_URL=https://localhost.roboflow.one
+export DEDICATED_DEPLOYMENT_URL=https://staging.roboflow.cloud
+export ROBOFLOW_CONFIG_DIR=$SCRIPT_DIR/data/.config
 # need to set it in /etc/hosts to the IP of host.docker.internal!
@@ -1,7 +1,9 @@
 #!/bin/env bash
 
-cp data/.config-prod data/.config
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cp $SCRIPT_DIR/data/.config-prod $SCRIPT_DIR/data/.config
 export API_URL=https://api.roboflow.com
 export APP_URL=https://app.roboflow.com
 export OBJECT_DETECTION_URL=https://detect.roboflow.one
 export DEDICATED_DEPLOYMENT_URL=https://roboflow.cloud
+export ROBOFLOW_CONFIG_DIR=$SCRIPT_DIR/data/.config
@@ -1,7 +1,9 @@
 #!/bin/env bash
 
-cp data/.config-staging data/.config
+SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cp $SCRIPT_DIR/data/.config-staging $SCRIPT_DIR/data/.config
 export API_URL=https://api.roboflow.one
 export APP_URL=https://app.roboflow.one
 export OBJECT_DETECTION_URL=https://lambda-object-detection.staging.roboflow.com
 export DEDICATED_DEPLOYMENT_URL=https://staging.roboflow.cloud
+export ROBOFLOW_CONFIG_DIR=$SCRIPT_DIR/data/.config
@@ -52,6 +52,20 @@ def test_parse_mosquitos_csv(self):
         expected += "train_10308.jpeg,1058,943,japonicus/koreicus,28,187,908,815\n"
         assert testImage["annotationfile"]["rawText"] == expected
 
+    def test_paligemma_format(self):
+        folder = f"{thisdir}/../datasets/paligemma"
+        parsed = folderparser.parsefolder(folder)
+        testImagePath = "/dataset/de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg"
+        testImage = [i for i in parsed["images"] if i["file"] == testImagePath][0]
+        assert testImage["annotationfile"]["name"] == "annotation.jsonl"
+        expected = (
+            '{"image": "de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg",'
+            ' "prefix": "Which sector had the highest ROI in 2013?", "suffix": "Retail"}\n'
+            '{"image": "de48275e1ff70fab78bee31e09fc896d_png.rf.01a97b1ad053aa1e6525ac0451cee8b7.jpg",'
+            ' "prefix": "Which sector had the highest ROI in 2014?", "suffix": "Electronics"}'
+        )
+        assert testImage["annotationfile"]["rawText"] == expected
+
 
 def _assertJsonMatchesFile(actual, filename):
     with open(filename) as file:
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+{"image":"fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg","prefix":"How many murders and manslaughters were recorded by the Belgian police in 2020?","suffix":"874"}`
	`2`	`+{"image":"fa68474f5b30c3d647ec1f5cddf41570_png.rf.000949c9aafeb8c594a936a0ef92993f.jpg","prefix":"How many murders and manslaughters were recorded by the Belgian police in 2020?","suffix":"874"}`
	`3`	`+{"image":"aca6fd05e9b2830518288ba082aa6f76_png.rf.001543e209328197472f6587dfa8a6d6.jpg","prefix":"What was the unemployment rate in Chile in 2020?","suffix":"11.51"}`
Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@`
`41`	`41`	`# f"import {thisdir}/data/cultura-pepino-yolov8_voc -w wolfodorpythontests -p yellow-auto -c 100".split() # noqa: E501 // docs`
`42`	`42`	`# f"import {thisdir}/data/cultura-pepino-yolov5pytorch -w wolfodorpythontests -p yellow-auto -c 100 -n papaiasso".split() # noqa: E501 // docs`
`43`	`43`	`# f"import {thisdir}/../datasets/mosquitos -w wolfodorpythontests -p yellow-auto -n papaiasso".split() # noqa: E501 // docs`
`44`		`- f"deployment list".split() # noqa: E501 // docs`
	`44`	`+ # f"deployment list".split() # noqa: E501 // docs`
	`45`	`+ f"import -w tonyprivate -p meh-plvrv {thisdir}/../datasets/paligemma/".split() # noqa: E501 // docs`
`45`	`46`	`)`
`46`	`47`	`args.func(args)`