From 2b9ddd43f385d2470e0bf78bbf5810620446dbdd Mon Sep 17 00:00:00 2001 From: Rodrigo Barbosa Date: Fri, 13 Feb 2026 11:42:48 +0000 Subject: [PATCH 1/9] initial version --- roboflow/adapters/rfapi.py | 46 +++++++ roboflow/core/workspace.py | 117 +++++++++++++++++ roboflow/roboflowpy.py | 29 +++++ tests/test_search_export.py | 241 ++++++++++++++++++++++++++++++++++++ 4 files changed, 433 insertions(+) create mode 100644 tests/test_search_export.py diff --git a/roboflow/adapters/rfapi.py b/roboflow/adapters/rfapi.py index a522429e..57a86e6b 100644 --- a/roboflow/adapters/rfapi.py +++ b/roboflow/adapters/rfapi.py @@ -152,6 +152,52 @@ def get_version_export( return payload +def start_search_export( + api_key: str, + workspace_url: str, + query: str, + format: str, + dataset: Optional[str] = None, + annotation_group: Optional[str] = None, + name: Optional[str] = None, +) -> str: + """Start a search export job. + + Returns the export_id string used to poll for completion. + + Raises RoboflowError on non-202 responses. + """ + url = f"{API_URL}/{workspace_url}/search/export?api_key={api_key}" + body: Dict[str, str] = {"query": query, "format": format} + if dataset is not None: + body["dataset"] = dataset + if annotation_group is not None: + body["annotationGroup"] = annotation_group + if name is not None: + body["name"] = name + + response = requests.post(url, json=body) + if response.status_code != 202: + raise RoboflowError(response.text) + + payload = response.json() + return payload["link"] + + +def get_search_export(api_key: str, workspace_url: str, export_id: str) -> dict: + """Poll the status of a search export job. + + Returns dict with ``ready`` (bool) and ``link`` (str, present when ready). + + Raises RoboflowError on non-200 responses. + """ + url = f"{API_URL}/{workspace_url}/search/export/{export_id}?api_key={api_key}" + response = requests.get(url) + if response.status_code != 200: + raise RoboflowError(response.text) + return response.json() + + def upload_image( api_key, project_url, diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py index de5fcca4..3d78c585 100644 --- a/roboflow/core/workspace.py +++ b/roboflow/core/workspace.py @@ -5,10 +5,14 @@ import json import os import sys +import time +import zipfile from typing import Any, Dict, List, Optional import requests from PIL import Image +from requests.exceptions import HTTPError +from tqdm import tqdm from roboflow.adapters import rfapi from roboflow.adapters.rfapi import AnnotationSaveError, ImageUploadError, RoboflowError @@ -662,6 +666,119 @@ def _upload_zip( except Exception as e: print(f"An error occured when uploading the model: {e}") + def search_export( + self, + query: str, + format: str = "coco", + location: Optional[str] = None, + dataset: Optional[str] = None, + annotation_group: Optional[str] = None, + name: Optional[str] = None, + extract_zip: bool = True, + ) -> str: + """Export search results as a downloaded dataset. + + Args: + query: Search query string (e.g. ``"tag:annotate"`` or ``"*"``). + format: Annotation format for the export (default ``"coco"``). + location: Local directory to save the exported dataset. + Defaults to ``./search-export-{format}``. + dataset: Limit export to a specific dataset (project) slug. + annotation_group: Limit export to a specific annotation group. + name: Optional name for the export. + extract_zip: If True (default), extract the zip and remove it. + If False, keep the zip file as-is. + + Returns: + Absolute path to the extracted directory or the zip file. + + Raises: + ValueError: If both *dataset* and *annotation_group* are provided. + RoboflowError: On API errors or export timeout. + """ + if dataset is not None and annotation_group is not None: + raise ValueError("dataset and annotation_group are mutually exclusive; provide only one") + + if location is None: + location = f"./search-export-{format}" + location = os.path.abspath(location) + + # 1. Start the export + export_id = rfapi.start_search_export( + api_key=self.__api_key, + workspace_url=self.url, + query=query, + format=format, + dataset=dataset, + annotation_group=annotation_group, + name=name, + ) + print(f"Export started (id={export_id}). Polling for completion...") + + # 2. Poll until ready + timeout = 600 + poll_interval = 5 + elapsed = 0 + while elapsed < timeout: + status = rfapi.get_search_export( + api_key=self.__api_key, + workspace_url=self.url, + export_id=export_id, + ) + if status.get("ready"): + break + time.sleep(poll_interval) + elapsed += poll_interval + else: + raise RoboflowError(f"Search export timed out after {timeout}s") + + download_url = status["link"] + + # 3. Download zip + if not os.path.exists(location): + os.makedirs(location) + + zip_path = os.path.join(location, "roboflow.zip") + response = requests.get(download_url, stream=True) + try: + response.raise_for_status() + except HTTPError as e: + raise RoboflowError(f"Failed to download search export: {e}") + + total_length = response.headers.get("content-length") + try: + total_kib = int(total_length) // 1024 + 1 if total_length is not None else None + except (TypeError, ValueError): + total_kib = None + with open(zip_path, "wb") as f: + for chunk in tqdm( + response.iter_content(chunk_size=1024), + desc=f"Downloading search export to {location}", + total=total_kib, + ): + if chunk: + f.write(chunk) + f.flush() + + if extract_zip: + desc = f"Extracting search export to {location}" + try: + with zipfile.ZipFile(zip_path, "r") as zip_ref: + for member in tqdm(zip_ref.infolist(), desc=desc): + try: + zip_ref.extract(member, location) + except zipfile.error: + raise RoboflowError("Error unzipping search export") + except zipfile.BadZipFile: + raise RoboflowError(f"Downloaded file is not a valid zip archive: {zip_path}") + + os.remove(zip_path) + print(f"Search export extracted to {location}") + return location + else: + print(f"Search export saved to {zip_path}") + return zip_path + def __str__(self): projects = self.projects() json_value = {"name": self.name, "url": self.url, "projects": projects} diff --git a/roboflow/roboflowpy.py b/roboflow/roboflowpy.py index 70cf6db9..f68bda47 100755 --- a/roboflow/roboflowpy.py +++ b/roboflow/roboflowpy.py @@ -202,6 +202,21 @@ def infer(args): print(group) +def search_export(args): + rf = roboflow.Roboflow() + workspace = rf.workspace(args.workspace) + result = workspace.search_export( + query=args.query, + format=args.format, + location=args.location, + dataset=args.dataset, + annotation_group=args.annotation_group, + name=args.name, + extract_zip=not args.no_extract, + ) + print(result) + + def _argparser(): parser = argparse.ArgumentParser(description="Welcome to the roboflow CLI: computer vision at your fingertips 🪄") subparsers = parser.add_subparsers(title="subcommands") @@ -218,6 +233,7 @@ def _argparser(): _add_run_video_inference_api_parser(subparsers) deployment.add_deployment_parser(subparsers) _add_whoami_parser(subparsers) + _add_search_export_parser(subparsers) parser.add_argument("-v", "--version", help="show version info", action="store_true") parser.set_defaults(func=show_version) @@ -594,6 +610,19 @@ def _add_get_workspace_project_version_parser(subparsers): workspace_project_version_parser.set_defaults(func=get_workspace_project_version) +def _add_search_export_parser(subparsers): + p = subparsers.add_parser("search-export", help="Export search results as a dataset") + p.add_argument("query", help="Search query (e.g. 'tag:annotate' or '*')") + p.add_argument("-f", dest="format", default="coco", help="Annotation format (default: coco)") + p.add_argument("-w", dest="workspace", help="Workspace url or id (uses default workspace if not specified)") + p.add_argument("-l", dest="location", help="Local directory to save the export") + p.add_argument("-d", dest="dataset", help="Limit export to a specific dataset (project slug)") + p.add_argument("-g", dest="annotation_group", help="Limit export to a specific annotation group") + p.add_argument("-n", dest="name", help="Optional name for the export") + p.add_argument("--no-extract", dest="no_extract", action="store_true", help="Skip extraction, keep the zip file") + p.set_defaults(func=search_export) + + def _add_login_parser(subparsers): login_parser = subparsers.add_parser("login", help="Log in to Roboflow") login_parser.add_argument( diff --git a/tests/test_search_export.py b/tests/test_search_export.py new file mode 100644 index 00000000..c1a24f43 --- /dev/null +++ b/tests/test_search_export.py @@ -0,0 +1,241 @@ +import io +import os +import shutil +import unittest +import zipfile +from unittest.mock import MagicMock, patch + +import responses +import requests + +from roboflow.adapters.rfapi import RoboflowError, get_search_export, start_search_export +from roboflow.config import API_URL + + +class TestStartSearchExport(unittest.TestCase): + API_KEY = "test_key" + WORKSPACE = "my-workspace" + + @responses.activate + def test_success(self): + url = f"{API_URL}/{self.WORKSPACE}/search/export?api_key={self.API_KEY}" + responses.add(responses.POST, url, json={"success": True, "link": "export_123"}, status=202) + + export_id = start_search_export(self.API_KEY, self.WORKSPACE, query="*", format="coco") + self.assertEqual(export_id, "export_123") + + body = responses.calls[0].request.body + self.assertIn(b'"query"', body) + self.assertIn(b'"format"', body) + + @responses.activate + def test_with_dataset(self): + url = f"{API_URL}/{self.WORKSPACE}/search/export?api_key={self.API_KEY}" + responses.add(responses.POST, url, json={"success": True, "link": "export_456"}, status=202) + + export_id = start_search_export( + self.API_KEY, self.WORKSPACE, query="tag:train", format="yolov8", dataset="my-dataset" + ) + self.assertEqual(export_id, "export_456") + + body = responses.calls[0].request.body + self.assertIn(b'"dataset"', body) + + @responses.activate + def test_error_response(self): + url = f"{API_URL}/{self.WORKSPACE}/search/export?api_key={self.API_KEY}" + responses.add(responses.POST, url, body="Bad Request", status=400) + + with self.assertRaises(RoboflowError): + start_search_export(self.API_KEY, self.WORKSPACE, query="*", format="coco") + + +class TestGetSearchExport(unittest.TestCase): + API_KEY = "test_key" + WORKSPACE = "my-workspace" + + @responses.activate + def test_not_ready(self): + url = f"{API_URL}/{self.WORKSPACE}/search/export/exp1?api_key={self.API_KEY}" + responses.add(responses.GET, url, json={"ready": False}, status=200) + + result = get_search_export(self.API_KEY, self.WORKSPACE, "exp1") + self.assertFalse(result["ready"]) + + @responses.activate + def test_ready(self): + url = f"{API_URL}/{self.WORKSPACE}/search/export/exp1?api_key={self.API_KEY}" + responses.add(responses.GET, url, json={"ready": True, "link": "https://download.url/file.zip"}, status=200) + + result = get_search_export(self.API_KEY, self.WORKSPACE, "exp1") + self.assertTrue(result["ready"]) + self.assertEqual(result["link"], "https://download.url/file.zip") + + @responses.activate + def test_error_response(self): + url = f"{API_URL}/{self.WORKSPACE}/search/export/exp1?api_key={self.API_KEY}" + responses.add(responses.GET, url, body="Not Found", status=404) + + with self.assertRaises(RoboflowError): + get_search_export(self.API_KEY, self.WORKSPACE, "exp1") + + +class TestWorkspaceSearchExportValidation(unittest.TestCase): + def _make_workspace(self): + from roboflow.core.workspace import Workspace + + info = { + "workspace": { + "name": "Test", + "url": "test-ws", + "projects": [], + "members": [], + } + } + return Workspace(info, api_key="test_key", default_workspace="test-ws", model_format="yolov8") + + def test_mutual_exclusion(self): + ws = self._make_workspace() + with self.assertRaises(ValueError) as ctx: + ws.search_export(query="*", dataset="ds", annotation_group="ag") + self.assertIn("mutually exclusive", str(ctx.exception)) + + +class TestWorkspaceSearchExportFlow(unittest.TestCase): + @staticmethod + def _build_zip_bytes(files): + buffer = io.BytesIO() + with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for filename, content in files.items(): + zip_file.writestr(filename, content) + return buffer.getvalue() + + def _make_workspace(self): + from roboflow.core.workspace import Workspace + + info = { + "workspace": { + "name": "Test", + "url": "test-ws", + "projects": [], + "members": [], + } + } + return Workspace(info, api_key="test_key", default_workspace="test-ws", model_format="yolov8") + + @patch("roboflow.core.workspace.rfapi") + @patch("roboflow.core.workspace.requests") + def test_full_flow(self, mock_requests, mock_rfapi): + ws = self._make_workspace() + + mock_rfapi.start_search_export.return_value = "exp_abc" + mock_rfapi.get_search_export.return_value = {"ready": True, "link": "https://example.com/export.zip"} + + fake_zip = self._build_zip_bytes({"images/sample.jpg": "fake-image-data"}) + mock_response = MagicMock() + mock_response.headers = {"content-length": str(len(fake_zip))} + mock_response.raise_for_status.return_value = None + mock_response.iter_content.return_value = [fake_zip[:1024], fake_zip[1024:]] + mock_requests.get.return_value = mock_response + + location = "./test_search_export_output" + try: + result = ws.search_export(query="*", format="coco", location=location) + + expected_location = os.path.abspath(location) + self.assertEqual(result, expected_location) + self.assertTrue(os.path.exists(os.path.join(expected_location, "images", "sample.jpg"))) + self.assertFalse(os.path.exists(os.path.join(expected_location, "roboflow.zip"))) + + mock_rfapi.start_search_export.assert_called_once_with( + api_key="test_key", + workspace_url="test-ws", + query="*", + format="coco", + dataset=None, + annotation_group=None, + name=None, + ) + mock_rfapi.get_search_export.assert_called_once_with( + api_key="test_key", + workspace_url="test-ws", + export_id="exp_abc", + ) + mock_response.raise_for_status.assert_called_once() + mock_response.iter_content.assert_called_once_with(chunk_size=1024) + finally: + if os.path.exists(location): + shutil.rmtree(location) + + @patch("roboflow.core.workspace.rfapi") + @patch("roboflow.core.workspace.requests") + def test_full_flow_without_content_length_still_streams(self, mock_requests, mock_rfapi): + ws = self._make_workspace() + + mock_rfapi.start_search_export.return_value = "exp_abc" + mock_rfapi.get_search_export.return_value = {"ready": True, "link": "https://example.com/export.zip"} + + fake_zip = self._build_zip_bytes({"annotations/instances.json": "{}"}) + mock_response = MagicMock() + mock_response.headers = {} + mock_response.raise_for_status.return_value = None + mock_response.iter_content.return_value = [fake_zip] + mock_requests.get.return_value = mock_response + + location = "./test_search_export_no_content_length" + try: + result = ws.search_export(query="*", format="coco", location=location) + expected_location = os.path.abspath(location) + self.assertEqual(result, expected_location) + self.assertTrue(os.path.exists(os.path.join(expected_location, "annotations", "instances.json"))) + mock_response.iter_content.assert_called_once_with(chunk_size=1024) + finally: + if os.path.exists(location): + shutil.rmtree(location) + + @patch("roboflow.core.workspace.rfapi") + @patch("roboflow.core.workspace.requests") + def test_download_http_error_raises_roboflow_error(self, mock_requests, mock_rfapi): + ws = self._make_workspace() + + mock_rfapi.start_search_export.return_value = "exp_abc" + mock_rfapi.get_search_export.return_value = {"ready": True, "link": "https://example.com/export.zip"} + + mock_response = MagicMock() + mock_response.raise_for_status.side_effect = requests.HTTPError("403 Client Error") + mock_requests.get.return_value = mock_response + + with self.assertRaises(RoboflowError) as context: + ws.search_export(query="*", format="coco", location="./test_search_export_http_error") + + self.assertIn("Failed to download search export", str(context.exception)) + + @patch("roboflow.core.workspace.rfapi") + @patch("roboflow.core.workspace.requests") + def test_no_extract(self, mock_requests, mock_rfapi): + ws = self._make_workspace() + + mock_rfapi.start_search_export.return_value = "exp_abc" + mock_rfapi.get_search_export.return_value = {"ready": True, "link": "https://example.com/export.zip"} + + fake_zip = self._build_zip_bytes({"images/sample.jpg": "fake-image-data"}) + mock_response = MagicMock() + mock_response.headers = {"content-length": str(len(fake_zip))} + mock_response.raise_for_status.return_value = None + mock_response.iter_content.return_value = [fake_zip] + mock_requests.get.return_value = mock_response + + location = "./test_search_export_no_extract" + try: + result = ws.search_export(query="*", format="coco", location=location, extract_zip=False) + + expected_zip = os.path.join(os.path.abspath(location), "roboflow.zip") + self.assertEqual(result, expected_zip) + self.assertTrue(os.path.exists(expected_zip)) + finally: + if os.path.exists(location): + shutil.rmtree(location) + + +if __name__ == "__main__": + unittest.main() From ecb79e4084d07daf9f78095170f0f190af23d490 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 11:45:49 +0000 Subject: [PATCH 2/9] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/test_search_export.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_search_export.py b/tests/test_search_export.py index c1a24f43..c8f3de1b 100644 --- a/tests/test_search_export.py +++ b/tests/test_search_export.py @@ -5,8 +5,8 @@ import zipfile from unittest.mock import MagicMock, patch -import responses import requests +import responses from roboflow.adapters.rfapi import RoboflowError, get_search_export, start_search_export from roboflow.config import API_URL From 15b0e24f10aa31c9ae522fb6f468573670edf659 Mon Sep 17 00:00:00 2001 From: Rodrigo Barbosa Date: Fri, 13 Feb 2026 11:55:17 +0000 Subject: [PATCH 3/9] refactoring to reuse zip and download utils --- examples/user_metadata_example.py | 183 ++++++++++++++++++++++++++++++ roboflow/core/version.py | 29 +---- roboflow/core/workspace.py | 15 +-- roboflow/util/general.py | 25 ++++ tests/test_version.py | 2 +- 5 files changed, 213 insertions(+), 41 deletions(-) create mode 100644 examples/user_metadata_example.py diff --git a/examples/user_metadata_example.py b/examples/user_metadata_example.py new file mode 100644 index 00000000..67297927 --- /dev/null +++ b/examples/user_metadata_example.py @@ -0,0 +1,183 @@ +""" +Roboflow User Metadata Example +============================== + +This example demonstrates how to access user_metadata (custom metadata fields) +attached to images in your Roboflow projects. + +User metadata allows you to store custom key-value pairs on images, such as: +- capture_location: "warehouse-A" +- camera_id: "cam-001" +- quality_score: 0.95 +- is_validated: True + +There are three ways to access user_metadata: + +1. Search API - Query images and retrieve metadata in bulk +2. Image API - Get metadata for a specific image by ID +3. Version Export - Download datasets with metadata included in annotation files + +Requirements: + pip install roboflow + +Usage: + python user_metadata_example.py +""" + +import os +import json +import glob + + +from roboflow import Roboflow + + +# ============================================================================= +# Configuration +# ============================================================================= +# change as needed +API_KEY = os.getenv("ROBOFLOW_API_KEY") +WORKSPACE = os.getenv("ROBOFLOW_WORKSPACE") +PROJECT = os.getenv("ROBOFLOW_PROJECT") +VERSION = os.getenv("ROBOFLOW_VERSION") + + +def example_search_with_metadata(project): + """ + Example 1: Search API with user_metadata + + Use the `fields` parameter to request user_metadata in search results. + This is efficient for retrieving metadata for multiple images at once. + + Available fields: id, name, created, annotations, labels, split, tags, + owner, embedding, user_metadata + """ + print("=" * 70) + print("Example 1: Search API with user_metadata") + print("=" * 70) + + # Search for images and include user_metadata in results + results = project.search(limit=5, fields=["id", "name", "tags", "user_metadata"]) + + print(f"\nFound {len(results)} images\n") + + for img in results: + print(f"Image: {img['name']}") + print(f" ID: {img['id']}") + print(f" Tags: {img.get('tags', [])}") + + metadata = img.get("user_metadata") + if metadata: + print(" User Metadata:") + for key, value in metadata.items(): + print(f" - {key}: {value}") + else: + print(" User Metadata: (none)") + print() + + return results + + +def example_image_by_id(project, image_id): + """ + Example 2: Image API - Get metadata for a specific image + + Use project.image(id) to retrieve full details including metadata. + The metadata is returned in the 'metadata' field. + """ + print("=" * 70) + print("Example 2: Image API - Get metadata by image ID") + print("=" * 70) + + # Get image details by ID + image = project.image(image_id) + + print(f"\nImage: {image['name']}") + print(f" ID: {image['id']}") + print(f" Split: {image.get('split', 'N/A')}") + print(f" Tags: {image.get('tags', [])}") + + metadata = image.get("metadata", {}) + if metadata: + print(" Metadata:") + for key, value in metadata.items(): + print(f" - {key}: {value}") + else: + print(" Metadata: (none)") + print() + + +def example_version_export(version, export_path="./dataset_export"): + """ + Example 3: Version Export with user_metadata + + When exporting a dataset version in COCO format, user_metadata is included + in the annotation JSON file under each image's 'extra' field. + + Location in COCO JSON: images[].extra.user_metadata + """ + print("=" * 70) + print("Example 3: Version Export with user_metadata (COCO format)") + print("=" * 70) + + # Download the dataset in COCO format + print(f"\nDownloading dataset to: {export_path}") + dataset = version.download("coco", location=export_path, overwrite=True) + + # Read the annotation file to show user_metadata + json_files = glob.glob(f"{dataset.location}/**/_annotations.coco.json", recursive=True) + + if json_files: + print(f"\nInspecting: {json_files[0]}") + with open(json_files[0], "r") as f: + coco_data = json.load(f) + + print("\nImages with user_metadata in export:\n") + for img in coco_data.get("images", [])[:5]: # Show first 5 + extra = img.get("extra", {}) + user_metadata = extra.get("user_metadata") + + print(f" {img['file_name']}") + if user_metadata: + print(f" user_metadata: {user_metadata}") + else: + print(" user_metadata: (none)") + print() + + +def main(): + # ========================================================================= + # Initialize Roboflow + # ========================================================================= + print("\nInitializing Roboflow...\n") + rf = Roboflow(api_key=API_KEY) + + project = rf.workspace(WORKSPACE).project(PROJECT) + version = project.version(VERSION) + + # ========================================================================= + # Example 1: Search with user_metadata + # ========================================================================= + results = example_search_with_metadata(project) + + # ========================================================================= + # Example 2: Get image by ID + # ========================================================================= + # Find an image with metadata from search results + image_with_metadata = next((img for img in results if img.get("user_metadata")), results[0] if results else None) + + if image_with_metadata: + example_image_by_id(project, image_with_metadata["id"]) + + # ========================================================================= + # Example 3: Version export (COCO format) + # ========================================================================= + example_version_export(version, export_path="./dataset_with_metadata") + + print("=" * 70) + print("Examples completed!") + print("=" * 70) + + +if __name__ == "__main__": + main() diff --git a/roboflow/core/version.py b/roboflow/core/version.py index bd6e035c..88ba5740 100644 --- a/roboflow/core/version.py +++ b/roboflow/core/version.py @@ -5,7 +5,6 @@ import os import sys import time -import zipfile from typing import TYPE_CHECKING, Optional, Union import requests @@ -32,7 +31,7 @@ from roboflow.models.object_detection import ObjectDetectionModel from roboflow.models.semantic_segmentation import SemanticSegmentationModel from roboflow.util.annotations import amend_data_yaml -from roboflow.util.general import write_line +from roboflow.util.general import extract_zip, write_line from roboflow.util.model_processor import process from roboflow.util.versions import get_model_format, get_wrong_dependencies_versions, normalize_yolo_model_type @@ -239,7 +238,7 @@ def download(self, model_format=None, location=None, overwrite: bool = False): link = export_info["export"]["link"] self.__download_zip(link, location, model_format) - self.__extract_zip(location, model_format) + extract_zip(location, desc=f"Extracting Dataset Version Zip to {location} in {model_format}:") self.__reformat_yaml(location, model_format) # TODO: is roboflow-python a place to be munging yaml files? return Dataset(self.name, self.version, model_format, os.path.abspath(location)) @@ -577,30 +576,6 @@ def bar_progress(current, total, width=80): sys.stdout.write("\n") sys.stdout.flush() - def __extract_zip(self, location, format): - """ - Extracts the contents of a downloaded ZIP file and then deletes the zipped file. - - Args: - location (str): filepath of the data directory that contains the ZIP file - format (str): the format identifier string - - Raises: - RuntimeError: If there is an error unzipping the file - """ # noqa: E501 // docs - desc = None if TQDM_DISABLE else f"Extracting Dataset Version Zip to {location} in {format}:" - with zipfile.ZipFile(location + "/roboflow.zip", "r") as zip_ref: - for member in tqdm( - zip_ref.infolist(), - desc=desc, - ): - try: - zip_ref.extract(member, location) - except zipfile.error: - raise RuntimeError("Error unzipping download") - - os.remove(location + "/roboflow.zip") - def __get_download_location(self): """ Get the local path to save a downloaded dataset to diff --git a/roboflow/core/workspace.py b/roboflow/core/workspace.py index 3d78c585..5953dc17 100644 --- a/roboflow/core/workspace.py +++ b/roboflow/core/workspace.py @@ -6,7 +6,6 @@ import os import sys import time -import zipfile from typing import Any, Dict, List, Optional import requests @@ -20,6 +19,7 @@ from roboflow.core.project import Project from roboflow.util import folderparser from roboflow.util.active_learning_utils import check_box_size, clip_encode, count_comparisons +from roboflow.util.general import extract_zip as _extract_zip from roboflow.util.image_utils import load_labelmap from roboflow.util.model_processor import process from roboflow.util.two_stage_utils import ocr_infer @@ -761,18 +761,7 @@ def search_export( f.flush() if extract_zip: - desc = f"Extracting search export to {location}" - try: - with zipfile.ZipFile(zip_path, "r") as zip_ref: - for member in tqdm(zip_ref.infolist(), desc=desc): - try: - zip_ref.extract(member, location) - except zipfile.error: - raise RoboflowError("Error unzipping search export") - except zipfile.BadZipFile: - raise RoboflowError(f"Downloaded file is not a valid zip archive: {zip_path}") - - os.remove(zip_path) + _extract_zip(location, desc=f"Extracting search export to {location}") print(f"Search export extracted to {location}") return location else: diff --git a/roboflow/util/general.py b/roboflow/util/general.py index 9c92e552..9368d7a2 100644 --- a/roboflow/util/general.py +++ b/roboflow/util/general.py @@ -1,7 +1,13 @@ +import os import sys import time +import zipfile from random import random +from tqdm import tqdm + +from roboflow.config import TQDM_DISABLE + def write_line(line): sys.stdout.write("\r" + line) @@ -40,3 +46,22 @@ def __call__(self, func, *args, **kwargs): self.retries += 1 else: raise + + +def extract_zip(location: str, desc: str = "Extracting"): + """Extract ``roboflow.zip`` inside *location* and remove the archive. + + Args: + location: Directory containing ``roboflow.zip``. + desc: Description shown in the tqdm progress bar. + """ + zip_path = os.path.join(location, "roboflow.zip") + tqdm_desc = None if TQDM_DISABLE else desc + with zipfile.ZipFile(zip_path, "r") as zip_ref: + for member in tqdm(zip_ref.infolist(), desc=tqdm_desc): + try: + zip_ref.extract(member, location) + except zipfile.error: + raise RuntimeError("Error unzipping download") + + os.remove(zip_path) diff --git a/tests/test_version.py b/tests/test_version.py index 031ee674..8cd5b69c 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -46,7 +46,7 @@ def test_download_raises_exception_on_api_failure(self): @responses.activate @patch.object(Version, "_Version__download_zip") - @patch.object(Version, "_Version__extract_zip") + @patch("roboflow.core.version.extract_zip") @patch.object(Version, "_Version__reformat_yaml") def test_download_returns_dataset(self, *_): responses.add(responses.GET, self.api_url, json={"export": {"link": None}}) From 5f945cdfa78ba823f783f76725df16b0654d169e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 11:56:40 +0000 Subject: [PATCH 4/9] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- examples/user_metadata_example.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/examples/user_metadata_example.py b/examples/user_metadata_example.py index 67297927..be0c9249 100644 --- a/examples/user_metadata_example.py +++ b/examples/user_metadata_example.py @@ -24,14 +24,12 @@ python user_metadata_example.py """ -import os -import json import glob - +import json +import os from roboflow import Roboflow - # ============================================================================= # Configuration # ============================================================================= From 0358874755910605a1230571068e9d59393ae134 Mon Sep 17 00:00:00 2001 From: Rodrigo Barbosa Date: Fri, 13 Feb 2026 08:58:42 -0300 Subject: [PATCH 5/9] remove it --- examples/user_metadata_example.py | 181 ------------------------------ 1 file changed, 181 deletions(-) delete mode 100644 examples/user_metadata_example.py diff --git a/examples/user_metadata_example.py b/examples/user_metadata_example.py deleted file mode 100644 index be0c9249..00000000 --- a/examples/user_metadata_example.py +++ /dev/null @@ -1,181 +0,0 @@ -""" -Roboflow User Metadata Example -============================== - -This example demonstrates how to access user_metadata (custom metadata fields) -attached to images in your Roboflow projects. - -User metadata allows you to store custom key-value pairs on images, such as: -- capture_location: "warehouse-A" -- camera_id: "cam-001" -- quality_score: 0.95 -- is_validated: True - -There are three ways to access user_metadata: - -1. Search API - Query images and retrieve metadata in bulk -2. Image API - Get metadata for a specific image by ID -3. Version Export - Download datasets with metadata included in annotation files - -Requirements: - pip install roboflow - -Usage: - python user_metadata_example.py -""" - -import glob -import json -import os - -from roboflow import Roboflow - -# ============================================================================= -# Configuration -# ============================================================================= -# change as needed -API_KEY = os.getenv("ROBOFLOW_API_KEY") -WORKSPACE = os.getenv("ROBOFLOW_WORKSPACE") -PROJECT = os.getenv("ROBOFLOW_PROJECT") -VERSION = os.getenv("ROBOFLOW_VERSION") - - -def example_search_with_metadata(project): - """ - Example 1: Search API with user_metadata - - Use the `fields` parameter to request user_metadata in search results. - This is efficient for retrieving metadata for multiple images at once. - - Available fields: id, name, created, annotations, labels, split, tags, - owner, embedding, user_metadata - """ - print("=" * 70) - print("Example 1: Search API with user_metadata") - print("=" * 70) - - # Search for images and include user_metadata in results - results = project.search(limit=5, fields=["id", "name", "tags", "user_metadata"]) - - print(f"\nFound {len(results)} images\n") - - for img in results: - print(f"Image: {img['name']}") - print(f" ID: {img['id']}") - print(f" Tags: {img.get('tags', [])}") - - metadata = img.get("user_metadata") - if metadata: - print(" User Metadata:") - for key, value in metadata.items(): - print(f" - {key}: {value}") - else: - print(" User Metadata: (none)") - print() - - return results - - -def example_image_by_id(project, image_id): - """ - Example 2: Image API - Get metadata for a specific image - - Use project.image(id) to retrieve full details including metadata. - The metadata is returned in the 'metadata' field. - """ - print("=" * 70) - print("Example 2: Image API - Get metadata by image ID") - print("=" * 70) - - # Get image details by ID - image = project.image(image_id) - - print(f"\nImage: {image['name']}") - print(f" ID: {image['id']}") - print(f" Split: {image.get('split', 'N/A')}") - print(f" Tags: {image.get('tags', [])}") - - metadata = image.get("metadata", {}) - if metadata: - print(" Metadata:") - for key, value in metadata.items(): - print(f" - {key}: {value}") - else: - print(" Metadata: (none)") - print() - - -def example_version_export(version, export_path="./dataset_export"): - """ - Example 3: Version Export with user_metadata - - When exporting a dataset version in COCO format, user_metadata is included - in the annotation JSON file under each image's 'extra' field. - - Location in COCO JSON: images[].extra.user_metadata - """ - print("=" * 70) - print("Example 3: Version Export with user_metadata (COCO format)") - print("=" * 70) - - # Download the dataset in COCO format - print(f"\nDownloading dataset to: {export_path}") - dataset = version.download("coco", location=export_path, overwrite=True) - - # Read the annotation file to show user_metadata - json_files = glob.glob(f"{dataset.location}/**/_annotations.coco.json", recursive=True) - - if json_files: - print(f"\nInspecting: {json_files[0]}") - with open(json_files[0], "r") as f: - coco_data = json.load(f) - - print("\nImages with user_metadata in export:\n") - for img in coco_data.get("images", [])[:5]: # Show first 5 - extra = img.get("extra", {}) - user_metadata = extra.get("user_metadata") - - print(f" {img['file_name']}") - if user_metadata: - print(f" user_metadata: {user_metadata}") - else: - print(" user_metadata: (none)") - print() - - -def main(): - # ========================================================================= - # Initialize Roboflow - # ========================================================================= - print("\nInitializing Roboflow...\n") - rf = Roboflow(api_key=API_KEY) - - project = rf.workspace(WORKSPACE).project(PROJECT) - version = project.version(VERSION) - - # ========================================================================= - # Example 1: Search with user_metadata - # ========================================================================= - results = example_search_with_metadata(project) - - # ========================================================================= - # Example 2: Get image by ID - # ========================================================================= - # Find an image with metadata from search results - image_with_metadata = next((img for img in results if img.get("user_metadata")), results[0] if results else None) - - if image_with_metadata: - example_image_by_id(project, image_with_metadata["id"]) - - # ========================================================================= - # Example 3: Version export (COCO format) - # ========================================================================= - example_version_export(version, export_path="./dataset_with_metadata") - - print("=" * 70) - print("Examples completed!") - print("=" * 70) - - -if __name__ == "__main__": - main() From 930cff3b7a8fc47122243278344049de0cfb6f4a Mon Sep 17 00:00:00 2001 From: Rodrigo Barbosa Date: Fri, 13 Feb 2026 11:57:45 +0000 Subject: [PATCH 6/9] remove it --- tests/manual/test_metadata_export.py | 106 +++++++++++++++++++++++ tests/manual/test_metadata_search.py | 124 +++++++++++++++++++++++++++ 2 files changed, 230 insertions(+) create mode 100644 tests/manual/test_metadata_export.py create mode 100644 tests/manual/test_metadata_search.py diff --git a/tests/manual/test_metadata_export.py b/tests/manual/test_metadata_export.py new file mode 100644 index 00000000..ac540776 --- /dev/null +++ b/tests/manual/test_metadata_export.py @@ -0,0 +1,106 @@ +""" +Test user_metadata export in roboflow-python SDK. + +Related Linear Issues: +- DATAMAN-98: roboflow-python should export user_metadata on code +- DATAMAN-99: roboflow-python version export with user_metadata + +This test validates that user_metadata is properly exported when downloading +dataset versions via the SDK. + +Uses staging project: model-evaluation-workspace/donut-2-lcfx0/28 +This project has tags and metadata on its images. + +FINDINGS (2026-01-30): +===================== +1. COCO format export: ✅ WORKS + - user_metadata is included in image.extra.user_metadata + - Example: {"id": 0, ..., "extra": {"name": "...", "user_metadata": {"yummy": 0}}} + +2. YOLOv8 format export: + - Standard YOLO format (images + txt labels) + - No dedicated metadata file (expected - YOLO format doesn't have metadata concept) + +3. SDK Changes Needed: NONE + - version.download() downloads ZIP from server + - Server (roboflow-zip) already includes user_metadata in COCO JSON + - SDK extracts ZIP locally + - user_metadata is available in the downloaded files +""" +import os +import sys +import json +import glob + +thisdir = os.path.dirname(os.path.abspath(__file__)) +os.environ["ROBOFLOW_CONFIG_DIR"] = f"{thisdir}/data/.config" +# Use staging API +os.environ["API_URL"] = "https://api.roboflow.one" + +rootdir = os.path.abspath(f"{thisdir}/../..") +sys.path.append(rootdir) + +from roboflow import Roboflow + + +def _get_manual_api_key(): + api_key = os.getenv("ROBOFLOW_STAGING_API_KEY") or os.getenv("ROBOFLOW_API_KEY") + if not api_key: + raise RuntimeError("Set ROBOFLOW_STAGING_API_KEY (or ROBOFLOW_API_KEY) to run this manual test") + return api_key + + +def test_version_export_metadata(): + """ + Test that user_metadata is exported with version download. + """ + rf = Roboflow(api_key=_get_manual_api_key()) + + # Access the staging project with metadata + project = rf.workspace("model-evaluation-workspace").project("donut-2-lcfx0") + version = project.version(28) + + # Download in COCO format (metadata is in the JSON) + print("Downloading version in COCO format...") + dataset = version.download("coco", location=f"{thisdir}/metadata_test_coco", overwrite=True) + print(f"\nDataset downloaded to: {dataset.location}") + + # Look for annotation files + json_files = glob.glob(f"{dataset.location}/**/*.json", recursive=True) + print(f"\nFound JSON files: {json_files}") + + # Check each JSON file for user_metadata in the extra field + has_metadata = False + for json_file in json_files: + print(f"\n--- Inspecting: {json_file} ---") + with open(json_file, 'r') as f: + data = json.load(f) + + # COCO format has 'images' array + if 'images' in data: + print(f"Found {len(data['images'])} images in COCO format") + for i, img in enumerate(data['images'][:3]): # Check first 3 + print(f"\nImage {i}: {img.get('file_name', 'unknown')}") + extra = img.get('extra', {}) + user_metadata = extra.get('user_metadata') + if user_metadata: + print(f" ✅ user_metadata (in extra): {user_metadata}") + has_metadata = True + else: + print(f" ❌ No user_metadata in extra field") + print(f" Keys in image: {list(img.keys())}") + print(f" Keys in extra: {list(extra.keys()) if extra else 'N/A'}") + else: + print(f"Keys in JSON: {list(data.keys())[:10]}") + + print("\n" + "=" * 60) + if has_metadata: + print("✅ TEST PASSED: user_metadata is exported in COCO format") + print(" Location: image['extra']['user_metadata']") + else: + print("❌ TEST FAILED: user_metadata not found in exported files") + print("=" * 60) + + +if __name__ == "__main__": + test_version_export_metadata() diff --git a/tests/manual/test_metadata_search.py b/tests/manual/test_metadata_search.py new file mode 100644 index 00000000..73a4e2fe --- /dev/null +++ b/tests/manual/test_metadata_search.py @@ -0,0 +1,124 @@ +""" +Test user_metadata retrieval via SDK APIs. + +Related Linear Issues: +- DATAMAN-98: roboflow-python should export user_metadata on code +- DATAMAN-99: roboflow-python version export with user_metadata + +This test validates that user_metadata can be retrieved via the SDK's +search() and image() methods. + +Uses staging project: model-evaluation-workspace/donut-2-lcfx0 +This project has tags and metadata on its images. + +API SUPPORT: +============ +1. project.search() - Available fields: + - id, name, created, annotations, labels, split, tags, owner, embedding, user_metadata + - Default: ['id', 'name', 'created', 'labels'] + - To get tags: fields=['id', 'name', 'tags'] + - To get user_metadata: fields=['id', 'name', 'user_metadata'] + +2. project.image(id) - Image API: + - Returns metadata in the 'metadata' field + - Example: {'material': 'aluminium', 'yummy': 3, 'penguin': 2} +""" +import os +import sys +import json + +thisdir = os.path.dirname(os.path.abspath(__file__)) +os.environ["ROBOFLOW_CONFIG_DIR"] = f"{thisdir}/data/.config" +# Use staging API +os.environ["API_URL"] = "https://api.roboflow.one" + +rootdir = os.path.abspath(f"{thisdir}/../..") +sys.path.append(rootdir) + +from roboflow import Roboflow + + +def _get_manual_api_key(): + api_key = os.getenv("ROBOFLOW_STAGING_API_KEY") or os.getenv("ROBOFLOW_API_KEY") + if not api_key: + raise RuntimeError("Set ROBOFLOW_STAGING_API_KEY (or ROBOFLOW_API_KEY) to run this manual test") + return api_key + + +def test_search_and_image_api(): + """ + Test that user_metadata can be retrieved via project.search() and project.image(). + """ + rf = Roboflow(api_key=_get_manual_api_key()) + + # Access the staging project with metadata + project = rf.workspace("model-evaluation-workspace").project("donut-2-lcfx0") + + print("=" * 60) + print("TEST 1: project.search() - Default fields") + print("=" * 60) + results = project.search(limit=3) + print(f"\nFound {len(results)} images") + for i, img in enumerate(results): + print(f"\nImage {i}: {img.get('name', 'unknown')}") + print(f" Keys: {list(img.keys())}") + + print("\n" + "=" * 60) + print("TEST 2: project.search() - With tags field") + print("=" * 60) + results_tags = project.search(limit=3, fields=["id", "name", "tags"]) + print(f"\nFound {len(results_tags)} images") + for i, img in enumerate(results_tags): + print(f"\nImage {i}: {img.get('name', 'unknown')}") + print(f" Keys: {list(img.keys())}") + if 'tags' in img: + print(f" ✅ tags: {img['tags']}") + + print("\n" + "=" * 60) + print("TEST 3: project.search() - With user_metadata field") + print("=" * 60) + try: + results_metadata = project.search(limit=3, fields=["id", "name", "user_metadata"]) + print(f"\nFound {len(results_metadata)} images") + for i, img in enumerate(results_metadata): + print(f"\nImage {i}: {img.get('name', 'unknown')}") + print(f" Keys: {list(img.keys())}") + if 'user_metadata' in img: + print(f" ✅ user_metadata: {img['user_metadata']}") + else: + print(f" ❌ No user_metadata in response") + except Exception as e: + print(f" ⚠️ Error: {e}") + print(" (user_metadata field may not be deployed yet)") + + print("\n" + "=" * 60) + print("TEST 4: project.image(id) - Image API") + print("=" * 60) + + # Get images and check their metadata via image API + images_with_metadata = 0 + for img in results[:5]: + image_id = img.get('id') + image_name = img.get('name', 'unknown') + + print(f"\nFetching details for: {image_name}") + details = project.image(image_id) + + metadata = details.get('metadata', {}) + if metadata: + print(f" ✅ metadata: {metadata}") + images_with_metadata += 1 + else: + print(f" ❌ No metadata (empty dict)") + + print("\n" + "=" * 60) + if images_with_metadata > 0: + print(f"✅ TEST PASSED: {images_with_metadata}/{min(5, len(results))} images have metadata") + print(" Access via: project.image(id)['metadata']") + else: + print("❌ TEST FAILED: No images have metadata") + print("=" * 60) + + +if __name__ == "__main__": + test_search_and_image_api() From cfa44ca45481c57d231a236e90caba3d12cdd715 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Feb 2026 11:59:34 +0000 Subject: [PATCH 7/9] =?UTF-8?q?fix(pre=5Fcommit):=20=F0=9F=8E=A8=20auto=20?= =?UTF-8?q?format=20pre-commit=20hooks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/manual/test_metadata_export.py | 17 +++++++++-------- tests/manual/test_metadata_search.py | 16 ++++++++-------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/tests/manual/test_metadata_export.py b/tests/manual/test_metadata_export.py index ac540776..1c20fb5d 100644 --- a/tests/manual/test_metadata_export.py +++ b/tests/manual/test_metadata_export.py @@ -27,10 +27,11 @@ - SDK extracts ZIP locally - user_metadata is available in the downloaded files """ + +import glob +import json import os import sys -import json -import glob thisdir = os.path.dirname(os.path.abspath(__file__)) os.environ["ROBOFLOW_CONFIG_DIR"] = f"{thisdir}/data/.config" @@ -73,21 +74,21 @@ def test_version_export_metadata(): has_metadata = False for json_file in json_files: print(f"\n--- Inspecting: {json_file} ---") - with open(json_file, 'r') as f: + with open(json_file, "r") as f: data = json.load(f) # COCO format has 'images' array - if 'images' in data: + if "images" in data: print(f"Found {len(data['images'])} images in COCO format") - for i, img in enumerate(data['images'][:3]): # Check first 3 + for i, img in enumerate(data["images"][:3]): # Check first 3 print(f"\nImage {i}: {img.get('file_name', 'unknown')}") - extra = img.get('extra', {}) - user_metadata = extra.get('user_metadata') + extra = img.get("extra", {}) + user_metadata = extra.get("user_metadata") if user_metadata: print(f" ✅ user_metadata (in extra): {user_metadata}") has_metadata = True else: - print(f" ❌ No user_metadata in extra field") + print(" ❌ No user_metadata in extra field") print(f" Keys in image: {list(img.keys())}") print(f" Keys in extra: {list(extra.keys()) if extra else 'N/A'}") else: diff --git a/tests/manual/test_metadata_search.py b/tests/manual/test_metadata_search.py index 73a4e2fe..e54ccc0d 100644 --- a/tests/manual/test_metadata_search.py +++ b/tests/manual/test_metadata_search.py @@ -23,9 +23,9 @@ - Returns metadata in the 'metadata' field - Example: {'material': 'aluminium', 'yummy': 3, 'penguin': 2} """ + import os import sys -import json thisdir = os.path.dirname(os.path.abspath(__file__)) os.environ["ROBOFLOW_CONFIG_DIR"] = f"{thisdir}/data/.config" @@ -71,7 +71,7 @@ def test_search_and_image_api(): for i, img in enumerate(results_tags): print(f"\nImage {i}: {img.get('name', 'unknown')}") print(f" Keys: {list(img.keys())}") - if 'tags' in img: + if "tags" in img: print(f" ✅ tags: {img['tags']}") print("\n" + "=" * 60) @@ -83,10 +83,10 @@ def test_search_and_image_api(): for i, img in enumerate(results_metadata): print(f"\nImage {i}: {img.get('name', 'unknown')}") print(f" Keys: {list(img.keys())}") - if 'user_metadata' in img: + if "user_metadata" in img: print(f" ✅ user_metadata: {img['user_metadata']}") else: - print(f" ❌ No user_metadata in response") + print(" ❌ No user_metadata in response") except Exception as e: print(f" ⚠️ Error: {e}") print(" (user_metadata field may not be deployed yet)") @@ -98,18 +98,18 @@ def test_search_and_image_api(): # Get images and check their metadata via image API images_with_metadata = 0 for img in results[:5]: - image_id = img.get('id') - image_name = img.get('name', 'unknown') + image_id = img.get("id") + image_name = img.get("name", "unknown") print(f"\nFetching details for: {image_name}") details = project.image(image_id) - metadata = details.get('metadata', {}) + metadata = details.get("metadata", {}) if metadata: print(f" ✅ metadata: {metadata}") images_with_metadata += 1 else: - print(f" ❌ No metadata (empty dict)") + print(" ❌ No metadata (empty dict)") print("\n" + "=" * 60) if images_with_metadata > 0: From 9dfcda11f66eac2ba44d624adcacb9cadf847bed Mon Sep 17 00:00:00 2001 From: Rodrigo Barbosa Date: Fri, 13 Feb 2026 12:00:17 +0000 Subject: [PATCH 8/9] removing unnecessary --- tests/manual/test_metadata_export.py | 107 ----------------------- tests/manual/test_metadata_search.py | 124 --------------------------- 2 files changed, 231 deletions(-) delete mode 100644 tests/manual/test_metadata_export.py delete mode 100644 tests/manual/test_metadata_search.py diff --git a/tests/manual/test_metadata_export.py b/tests/manual/test_metadata_export.py deleted file mode 100644 index 1c20fb5d..00000000 --- a/tests/manual/test_metadata_export.py +++ /dev/null @@ -1,107 +0,0 @@ -""" -Test user_metadata export in roboflow-python SDK. - -Related Linear Issues: -- DATAMAN-98: roboflow-python should export user_metadata on code -- DATAMAN-99: roboflow-python version export with user_metadata - -This test validates that user_metadata is properly exported when downloading -dataset versions via the SDK. - -Uses staging project: model-evaluation-workspace/donut-2-lcfx0/28 -This project has tags and metadata on its images. - -FINDINGS (2026-01-30): -===================== -1. COCO format export: ✅ WORKS - - user_metadata is included in image.extra.user_metadata - - Example: {"id": 0, ..., "extra": {"name": "...", "user_metadata": {"yummy": 0}}} - -2. YOLOv8 format export: - - Standard YOLO format (images + txt labels) - - No dedicated metadata file (expected - YOLO format doesn't have metadata concept) - -3. SDK Changes Needed: NONE - - version.download() downloads ZIP from server - - Server (roboflow-zip) already includes user_metadata in COCO JSON - - SDK extracts ZIP locally - - user_metadata is available in the downloaded files -""" - -import glob -import json -import os -import sys - -thisdir = os.path.dirname(os.path.abspath(__file__)) -os.environ["ROBOFLOW_CONFIG_DIR"] = f"{thisdir}/data/.config" -# Use staging API -os.environ["API_URL"] = "https://api.roboflow.one" - -rootdir = os.path.abspath(f"{thisdir}/../..") -sys.path.append(rootdir) - -from roboflow import Roboflow - - -def _get_manual_api_key(): - api_key = os.getenv("ROBOFLOW_STAGING_API_KEY") or os.getenv("ROBOFLOW_API_KEY") - if not api_key: - raise RuntimeError("Set ROBOFLOW_STAGING_API_KEY (or ROBOFLOW_API_KEY) to run this manual test") - return api_key - - -def test_version_export_metadata(): - """ - Test that user_metadata is exported with version download. - """ - rf = Roboflow(api_key=_get_manual_api_key()) - - # Access the staging project with metadata - project = rf.workspace("model-evaluation-workspace").project("donut-2-lcfx0") - version = project.version(28) - - # Download in COCO format (metadata is in the JSON) - print("Downloading version in COCO format...") - dataset = version.download("coco", location=f"{thisdir}/metadata_test_coco", overwrite=True) - print(f"\nDataset downloaded to: {dataset.location}") - - # Look for annotation files - json_files = glob.glob(f"{dataset.location}/**/*.json", recursive=True) - print(f"\nFound JSON files: {json_files}") - - # Check each JSON file for user_metadata in the extra field - has_metadata = False - for json_file in json_files: - print(f"\n--- Inspecting: {json_file} ---") - with open(json_file, "r") as f: - data = json.load(f) - - # COCO format has 'images' array - if "images" in data: - print(f"Found {len(data['images'])} images in COCO format") - for i, img in enumerate(data["images"][:3]): # Check first 3 - print(f"\nImage {i}: {img.get('file_name', 'unknown')}") - extra = img.get("extra", {}) - user_metadata = extra.get("user_metadata") - if user_metadata: - print(f" ✅ user_metadata (in extra): {user_metadata}") - has_metadata = True - else: - print(" ❌ No user_metadata in extra field") - print(f" Keys in image: {list(img.keys())}") - print(f" Keys in extra: {list(extra.keys()) if extra else 'N/A'}") - else: - print(f"Keys in JSON: {list(data.keys())[:10]}") - - print("\n" + "=" * 60) - if has_metadata: - print("✅ TEST PASSED: user_metadata is exported in COCO format") - print(" Location: image['extra']['user_metadata']") - else: - print("❌ TEST FAILED: user_metadata not found in exported files") - print("=" * 60) - - -if __name__ == "__main__": - test_version_export_metadata() diff --git a/tests/manual/test_metadata_search.py b/tests/manual/test_metadata_search.py deleted file mode 100644 index e54ccc0d..00000000 --- a/tests/manual/test_metadata_search.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Test user_metadata retrieval via SDK APIs. - -Related Linear Issues: -- DATAMAN-98: roboflow-python should export user_metadata on code -- DATAMAN-99: roboflow-python version export with user_metadata - -This test validates that user_metadata can be retrieved via the SDK's -search() and image() methods. - -Uses staging project: model-evaluation-workspace/donut-2-lcfx0 -This project has tags and metadata on its images. - -API SUPPORT: -============ -1. project.search() - Available fields: - - id, name, created, annotations, labels, split, tags, owner, embedding, user_metadata - - Default: ['id', 'name', 'created', 'labels'] - - To get tags: fields=['id', 'name', 'tags'] - - To get user_metadata: fields=['id', 'name', 'user_metadata'] - -2. project.image(id) - Image API: - - Returns metadata in the 'metadata' field - - Example: {'material': 'aluminium', 'yummy': 3, 'penguin': 2} -""" - -import os -import sys - -thisdir = os.path.dirname(os.path.abspath(__file__)) -os.environ["ROBOFLOW_CONFIG_DIR"] = f"{thisdir}/data/.config" -# Use staging API -os.environ["API_URL"] = "https://api.roboflow.one" - -rootdir = os.path.abspath(f"{thisdir}/../..") -sys.path.append(rootdir) - -from roboflow import Roboflow - - -def _get_manual_api_key(): - api_key = os.getenv("ROBOFLOW_STAGING_API_KEY") or os.getenv("ROBOFLOW_API_KEY") - if not api_key: - raise RuntimeError("Set ROBOFLOW_STAGING_API_KEY (or ROBOFLOW_API_KEY) to run this manual test") - return api_key - - -def test_search_and_image_api(): - """ - Test that user_metadata can be retrieved via project.search() and project.image(). - """ - rf = Roboflow(api_key=_get_manual_api_key()) - - # Access the staging project with metadata - project = rf.workspace("model-evaluation-workspace").project("donut-2-lcfx0") - - print("=" * 60) - print("TEST 1: project.search() - Default fields") - print("=" * 60) - results = project.search(limit=3) - print(f"\nFound {len(results)} images") - for i, img in enumerate(results): - print(f"\nImage {i}: {img.get('name', 'unknown')}") - print(f" Keys: {list(img.keys())}") - - print("\n" + "=" * 60) - print("TEST 2: project.search() - With tags field") - print("=" * 60) - results_tags = project.search(limit=3, fields=["id", "name", "tags"]) - print(f"\nFound {len(results_tags)} images") - for i, img in enumerate(results_tags): - print(f"\nImage {i}: {img.get('name', 'unknown')}") - print(f" Keys: {list(img.keys())}") - if "tags" in img: - print(f" ✅ tags: {img['tags']}") - - print("\n" + "=" * 60) - print("TEST 3: project.search() - With user_metadata field") - print("=" * 60) - try: - results_metadata = project.search(limit=3, fields=["id", "name", "user_metadata"]) - print(f"\nFound {len(results_metadata)} images") - for i, img in enumerate(results_metadata): - print(f"\nImage {i}: {img.get('name', 'unknown')}") - print(f" Keys: {list(img.keys())}") - if "user_metadata" in img: - print(f" ✅ user_metadata: {img['user_metadata']}") - else: - print(" ❌ No user_metadata in response") - except Exception as e: - print(f" ⚠️ Error: {e}") - print(" (user_metadata field may not be deployed yet)") - - print("\n" + "=" * 60) - print("TEST 4: project.image(id) - Image API") - print("=" * 60) - - # Get images and check their metadata via image API - images_with_metadata = 0 - for img in results[:5]: - image_id = img.get("id") - image_name = img.get("name", "unknown") - - print(f"\nFetching details for: {image_name}") - details = project.image(image_id) - - metadata = details.get("metadata", {}) - if metadata: - print(f" ✅ metadata: {metadata}") - images_with_metadata += 1 - else: - print(" ❌ No metadata (empty dict)") - - print("\n" + "=" * 60) - if images_with_metadata > 0: - print(f"✅ TEST PASSED: {images_with_metadata}/{min(5, len(results))} images have metadata") - print(" Access via: project.image(id)['metadata']") - else: - print("❌ TEST FAILED: No images have metadata") - print("=" * 60) - - -if __name__ == "__main__": - test_search_and_image_api() From d9a44aff725230001a7f0f41276ce2ed972cf840 Mon Sep 17 00:00:00 2001 From: Rodrigo Barbosa Date: Fri, 13 Feb 2026 17:35:52 +0000 Subject: [PATCH 9/9] fix --- roboflow/core/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roboflow/core/version.py b/roboflow/core/version.py index 88ba5740..5e236c61 100644 --- a/roboflow/core/version.py +++ b/roboflow/core/version.py @@ -682,4 +682,4 @@ def __str__(self): def unwrap_version_id(version_id: str) -> str: - return version_id if "/" not in str(version_id) else version_id.split("/")[-1] + return version_id if "/" not in str(version_id) else version_id.rsplit("/", maxsplit=1)[-1]