Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OCI]: Storage: support buckets from different region #4554

Merged
merged 6 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/reference/storage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ Storage YAML reference
- https://<azure_storage_account>.blob.core.windows.net/<container_name>
- r2://<bucket_name>
- cos://<region_name>/<bucket_name>
- oci://<bucket_name>
- oci://<bucket_name>@<region>

If the source is local, data is uploaded to the cloud to an appropriate
bucket (s3, gcs, azure, r2, oci, or ibm). If source is bucket URI,
Expand Down
2 changes: 1 addition & 1 deletion examples/oci/dataset-mount.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ resources:
file_mounts:
# Mount an existing oci bucket
/datasets-storage:
source: oci://skybucket
source: oci://skybucket@us-sanjose-1
mode: MOUNT # Either MOUNT or COPY. Optional.

# Working directory (optional) containing the project codebase.
Expand Down
50 changes: 43 additions & 7 deletions sky/data/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3968,7 +3968,7 @@ class OciStore(AbstractStore):

def __init__(self,
name: str,
source: str,
source: Optional[SourceType],
region: Optional[str] = None,
is_sky_managed: Optional[bool] = None,
sync_on_reconstruction: Optional[bool] = True,
Expand All @@ -3980,13 +3980,46 @@ def __init__(self,
self.compartment: str
self.namespace: str

# Bucket region should be consistence with the OCI config file
region = oci.get_oci_config()['region']
# Region is from the specified name in <bucket>@<region> format.
# Another case is name can also be set by the source, for example:
# /datasets-storage:
# source: oci://RAGData@us-sanjose-1
# The name in above mount will be set to RAGData@us-sanjose-1
region_in_name = None
if name is not None and '@' in name:
self._validate_bucket_expr(name)
name, region_in_name = name.split('@')

# Region is from the specified source in oci://<bucket>@<region> format
region_in_source = None
if isinstance(source,
str) and source.startswith('oci://') and '@' in source:
self._validate_bucket_expr(source)
source, region_in_source = source.split('@')

if region_in_name is not None:
region = region_in_name
elif region_in_source is not None:
region = region_in_source
HysunHe marked this conversation as resolved.
Show resolved Hide resolved

# Default region set to what specified in oci config.
if region is None:
region = oci.get_oci_config()['region']

# So far from now on, the name and source are canonical, means there
# is no region (@<region> suffix) associated with them anymore.

super().__init__(name, source, region, is_sky_managed,
sync_on_reconstruction, _bucket_sub_path)
# TODO(zpoint): add _bucket_sub_path to the sync/mount/delete commands

def _validate_bucket_expr(self, bucket_expr: str):
pattern = r'^(\w+://)?[A-Za-z0-9-._]+(@\w{2}-\w+-\d{1})$'
if not re.match(pattern, bucket_expr):
raise ValueError(
'The format for the bucket portion is <bucket>@<region> '
'when specify a region with a bucket.')

def _validate(self):
if self.source is not None and isinstance(self.source, str):
if self.source.startswith('oci://'):
Expand Down Expand Up @@ -4137,7 +4170,8 @@ def get_file_sync_command(base_dir_path, file_names):
sync_command = (
'oci os object bulk-upload --no-follow-symlinks --overwrite '
f'--bucket-name {self.name} --namespace-name {self.namespace} '
f'--src-dir "{base_dir_path}" {includes}')
f'--region {self.region} --src-dir "{base_dir_path}" '
HysunHe marked this conversation as resolved.
Show resolved Hide resolved
f'{includes}')

return sync_command

Expand All @@ -4157,8 +4191,8 @@ def get_dir_sync_command(src_dir_path, dest_dir_name):
sync_command = (
'oci os object bulk-upload --no-follow-symlinks --overwrite '
f'--bucket-name {self.name} --namespace-name {self.namespace} '
f'--object-prefix "{dest_dir_name}" --src-dir "{src_dir_path}" '
f'{excludes} ')
f'--region {self.region} --object-prefix "{dest_dir_name}" '
f'--src-dir "{src_dir_path}" {excludes}')

return sync_command

Expand Down Expand Up @@ -4289,7 +4323,8 @@ def _download_file(self, remote_path: str, local_path: str) -> None:
def get_file_download_command(remote_path, local_path):
download_command = (f'oci os object get --bucket-name {self.name} '
f'--namespace-name {self.namespace} '
f'--name {remote_path} --file {local_path}')
f'--region {self.region} --name {remote_path} '
f'--file {local_path}')

return download_command

Expand Down Expand Up @@ -4346,6 +4381,7 @@ def _delete_oci_bucket(self, bucket_name: str) -> bool:
@oci.with_oci_env
def get_bucket_delete_command(bucket_name):
remove_command = (f'oci os bucket delete --bucket-name '
f'--region {self.region} '
f'{bucket_name} --empty --force')

return remove_command
Expand Down
Loading