docs: Improve docs of Apify storage clients and export SQL storage client (#639)

vdusek · web-flow · commit b6353a903cf5 · 2025-10-21T16:15:19.000+02:00
- Export `SqlStorageClient` from Crawlee.
- Improve docs of `ApifyStorageClient` and `SmartApifyStorageClient`.
diff --git a/src/apify/storage_clients/_apify/_storage_client.py b/src/apify/storage_clients/_apify/_storage_client.py
@@ -21,23 +21,50 @@
 
 @docs_group('Storage clients')
 class ApifyStorageClient(StorageClient):
-    """Apify storage client."""
+    """Apify platform implementation of the storage client.
+
+    This storage client provides access to datasets, key-value stores, and request queues that persist data
+    to the Apify platform. Each storage type is implemented with its own specific Apify client that stores data
+    in the cloud, making it accessible from anywhere.
+
+    The communication with the Apify platform is handled via the Apify API client for Python, which is an HTTP API
+    wrapper. For maximum efficiency and performance of the storage clients, various caching mechanisms are used to
+    minimize the number of API calls made to the Apify platform. Data can be inspected and manipulated through
+    the Apify console web interface or via the Apify API.
+
+    The request queue client supports two access modes controlled by the `request_queue_access` parameter:
+
+    ### Single mode
+
+    The `single` mode is optimized for scenarios with only one consumer. It minimizes API calls, making it faster
+    and more cost-efficient compared to the `shared` mode. This option is ideal when a single Actor is responsible
+    for consuming the entire request queue. Using multiple consumers simultaneously may lead to inconsistencies
+    or unexpected behavior.
+
+    In this mode, multiple producers can safely add new requests, but forefront requests may not be processed
+    immediately, as the client relies on local head estimation instead of frequent forefront fetching. Requests can
+    also be added or marked as handled by other clients, but they must not be deleted or modified, since such changes
+    would not be reflected in the local cache. If a request is already fully cached locally, marking it as handled
+    by another client will be ignored by this client. This does not cause errors but can occasionally result in
+    reprocessing a request that was already handled elsewhere. If the request was not yet cached locally, marking
+    it as handled poses no issue.
+
+    ### Shared mode
+
+    The `shared` mode is designed for scenarios with multiple concurrent consumers. It ensures proper synchronization
+    and consistency across clients, at the cost of higher API usage and slightly worse performance. This mode is safe
+    for concurrent access from multiple processes, including Actors running in parallel on the Apify platform. It
+    should be used when multiple consumers need to process requests from the same queue simultaneously.
+    """
 
     def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'single') -> None:
-        """Initialize the Apify storage client.
+        """Initialize a new instance.
 
         Args:
-            request_queue_access: Controls the implementation of the request queue client based on expected scenario:
-                - 'single' is suitable for single consumer scenarios. It makes less API calls, is cheaper and faster.
-                - 'shared' is suitable for multiple consumers scenarios at the cost of higher API usage.
-                Detailed constraints for the 'single' access type:
-                - Only one client is consuming the request queue at the time.
-                - Multiple producers can put requests to the queue, but their forefront requests are not guaranteed to
-                  be handled so quickly as this client does not aggressively fetch the forefront and relies on local
-                  head estimation.
-                - Requests are only added to the queue, never deleted by other clients. (Marking as handled is ok.)
-                - Other producers can add new requests, but not modify existing ones.
-                  (Modifications would not be included in local cache)
+            request_queue_access: Defines how the request queue client behaves. Use `single` mode for a single
+                consumer. It has fewer API calls, meaning better performance and lower costs. If you need multiple
+                concurrent consumers use `shared` mode, but expect worse performance and higher costs due to
+                the additional overhead.
         """
         self._request_queue_access = request_queue_access
 
diff --git a/src/apify/storage_clients/_smart_apify/_storage_client.py b/src/apify/storage_clients/_smart_apify/_storage_client.py
@@ -19,10 +19,18 @@
 
 @docs_group('Storage clients')
 class SmartApifyStorageClient(StorageClient):
-    """SmartApifyStorageClient that delegates to cloud_storage_client or local_storage_client.
+    """Storage client that automatically selects cloud or local storage client based on the environment.
 
-    When running on Apify platform use cloud_storage_client, else use local_storage_client. This storage client is
-    designed to work specifically in Actor context.
+    This storage client provides access to datasets, key-value stores, and request queues by intelligently
+    delegating to either the cloud or local storage client based on the execution environment and configuration.
+
+    When running on the Apify platform (which is detected via environment variables), this client automatically
+    uses the `cloud_storage_client` to store storage data there. When running locally, it uses the
+    `local_storage_client` to store storage data there. You can also force cloud storage usage from your
+    local machine by using the `force_cloud` argument.
+
+    This storage client is designed to work specifically in `Actor` context and provides a seamless development
+    experience where the same code works both locally and on the Apify platform without any changes.
     """
 
     def __init__(
@@ -31,13 +39,13 @@ def __init__(
         cloud_storage_client: ApifyStorageClient | None = None,
         local_storage_client: StorageClient | None = None,
     ) -> None:
-        """Initialize the Apify storage client.
+        """Initialize a new instance.
 
         Args:
-            cloud_storage_client: Client used to communicate with the Apify platform storage. Either through
-                `force_cloud` argument when opening storages or automatically when running on the Apify platform.
-            local_storage_client: Client used to communicate with the storage when not running on the Apify
-                platform and not using `force_cloud` argument when opening storages.
+            cloud_storage_client: Storage client used when an Actor is running on the Apify platform, or when
+                explicitly enabled via the `force_cloud` argument. Defaults to `ApifyStorageClient`.
+            local_storage_client: Storage client used when an Actor is not running on the Apify platform and when
+                `force_cloud` flag is not set. Defaults to `FileSystemStorageClient`.
         """
         self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single')
         self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient()
diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js
@@ -239,6 +239,10 @@ module.exports = {
                         url: 'https://crawlee.dev/python/api/class/FileSystemStorageClient',
                         group: 'Storage clients',
                     },
+                    {
+                        url: 'https://crawlee.dev/python/api/class/SqlStorageClient',
+                        group: 'Storage clients',
+                    },
                     // Request loaders
                     {
                         url: 'https://crawlee.dev/python/api/class/RequestLoader',