Skip to content

Commit 3bf285d

Browse files
authored
fix: Fix type of cloud_storage_client in SmartApifyStorageClient (#642)
The type of `cloud_storage_client` in `SmartApifyStorageClient` should be `StorageClient`, not just `ApifyStorageClient`, as we want to support using other storage clients on the Apify platform as well. Other minor changes: - I've also updated the method order for consistency: constants -> public -> private. - `ApifyStorageClient` is in `single` access mode by default.
1 parent cb60a4e commit 3bf285d

File tree

3 files changed

+44
-46
lines changed

3 files changed

+44
-46
lines changed

src/apify/storage_clients/_apify/_models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ class RequestQueueStats(BaseModel):
120120
"""The number of request queue reads."""
121121

122122
storage_bytes: Annotated[int, Field(alias='storageBytes', default=0)]
123-
"""Storage size in Bytes."""
123+
"""Storage size in bytes."""
124124

125125
write_count: Annotated[int, Field(alias='writeCount', default=0)]
126126
"""The number of request queue writes."""

src/apify/storage_clients/_apify/_storage_client.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ class ApifyStorageClient(StorageClient):
5757
should be used when multiple consumers need to process requests from the same queue simultaneously.
5858
"""
5959

60+
_LSP_ERROR_MSG = 'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
61+
"""This class (intentionally) violates the Liskov Substitution Principle.
62+
63+
It requires a specialized `Configuration` instance compared to its parent class.
64+
"""
65+
6066
def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'single') -> None:
6167
"""Initialize a new instance.
6268
@@ -68,23 +74,6 @@ def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'singl
6874
"""
6975
self._request_queue_access = request_queue_access
7076

71-
# This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
72-
_lsp_violation_error_message_template = (
73-
'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
74-
)
75-
76-
@override
77-
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
78-
if isinstance(configuration, ApifyConfiguration):
79-
# It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
80-
# Whichever client variation gets used first, wins.
81-
return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
82-
83-
config_class = type(configuration)
84-
raise TypeError(
85-
self._lsp_violation_error_message_template.format(f'{config_class.__module__}.{config_class.__name__}')
86-
)
87-
8877
@override
8978
async def create_dataset_client(
9079
self,
@@ -98,7 +87,7 @@ async def create_dataset_client(
9887
if isinstance(configuration, ApifyConfiguration):
9988
return await ApifyDatasetClient.open(id=id, name=name, alias=alias, configuration=configuration)
10089

101-
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
90+
raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
10291

10392
@override
10493
async def create_kvs_client(
@@ -113,7 +102,7 @@ async def create_kvs_client(
113102
if isinstance(configuration, ApifyConfiguration):
114103
return await ApifyKeyValueStoreClient.open(id=id, name=name, alias=alias, configuration=configuration)
115104

116-
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
105+
raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
117106

118107
@override
119108
async def create_rq_client(
@@ -130,4 +119,14 @@ async def create_rq_client(
130119
id=id, name=name, alias=alias, configuration=configuration, access=self._request_queue_access
131120
)
132121

133-
raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
122+
raise TypeError(self._LSP_ERROR_MSG.format(type(configuration).__name__))
123+
124+
@override
125+
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
126+
if isinstance(configuration, ApifyConfiguration):
127+
# It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
128+
# Whichever client variation gets used first, wins.
129+
return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
130+
131+
config_class = type(configuration)
132+
raise TypeError(self._LSP_ERROR_MSG.format(f'{config_class.__module__}.{config_class.__name__}'))

src/apify/storage_clients/_smart_apify/_storage_client.py

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,7 @@
88

99
from apify._configuration import Configuration as ApifyConfiguration
1010
from apify._utils import docs_group
11-
from apify.storage_clients import ApifyStorageClient
12-
from apify.storage_clients._file_system import ApifyFileSystemStorageClient
11+
from apify.storage_clients import ApifyStorageClient, FileSystemStorageClient
1312

1413
if TYPE_CHECKING:
1514
from collections.abc import Hashable
@@ -36,7 +35,7 @@ class SmartApifyStorageClient(StorageClient):
3635
def __init__(
3736
self,
3837
*,
39-
cloud_storage_client: ApifyStorageClient | None = None,
38+
cloud_storage_client: StorageClient | None = None,
4039
local_storage_client: StorageClient | None = None,
4140
) -> None:
4241
"""Initialize a new instance.
@@ -47,35 +46,15 @@ def __init__(
4746
local_storage_client: Storage client used when an Actor is not running on the Apify platform and when
4847
`force_cloud` flag is not set. Defaults to `FileSystemStorageClient`.
4948
"""
50-
self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single')
51-
self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient()
49+
self._cloud_storage_client = cloud_storage_client or ApifyStorageClient()
50+
self._local_storage_client = local_storage_client or FileSystemStorageClient()
5251

5352
def __str__(self) -> str:
5453
return (
5554
f'{self.__class__.__name__}(cloud_storage_client={self._cloud_storage_client.__class__.__name__},'
5655
f' local_storage_client={self._local_storage_client.__class__.__name__})'
5756
)
5857

59-
def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
60-
"""Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
61-
62-
Args:
63-
force_cloud: If True, return `cloud_storage_client`.
64-
"""
65-
if ApifyConfiguration.get_global_configuration().is_at_home:
66-
return self._cloud_storage_client
67-
68-
configuration = ApifyConfiguration.get_global_configuration()
69-
if force_cloud:
70-
if configuration.token is None:
71-
raise RuntimeError(
72-
'In order to use the Apify cloud storage from your computer, '
73-
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
74-
)
75-
return self._cloud_storage_client
76-
77-
return self._local_storage_client
78-
7958
@override
8059
def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
8160
if ApifyConfiguration.get_global_configuration().is_at_home:
@@ -123,3 +102,23 @@ async def create_rq_client(
123102
return await self.get_suitable_storage_client().create_rq_client(
124103
id=id, name=id, alias=alias, configuration=configuration
125104
)
105+
106+
def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
107+
"""Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
108+
109+
Args:
110+
force_cloud: If True, return `cloud_storage_client`.
111+
"""
112+
if ApifyConfiguration.get_global_configuration().is_at_home:
113+
return self._cloud_storage_client
114+
115+
configuration = ApifyConfiguration.get_global_configuration()
116+
if force_cloud:
117+
if configuration.token is None:
118+
raise RuntimeError(
119+
'In order to use the Apify cloud storage from your computer, '
120+
'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
121+
)
122+
return self._cloud_storage_client
123+
124+
return self._local_storage_client

0 commit comments

Comments
 (0)