Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from renku_data_services.notebooks.api.classes.cloud_storage import ICloudStorageRequest
from renku_data_services.storage.models import CloudStorage
from renku_data_services.storage.rclone import RCloneValidator

_sanitize_for_serialization = client.ApiClient().sanitize_for_serialization

Expand Down Expand Up @@ -71,6 +72,8 @@ def __init__(
self.base_name: str | None = None
self.user_secret_key = user_secret_key
self.storage_class = storage_class
validator = RCloneValidator()
validator.inject_default_values(self.configuration)

@classmethod
async def storage_from_schema(
Expand Down
31 changes: 28 additions & 3 deletions components/renku_data_services/storage/rclone.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ async def test_connection(

# Obscure configuration and transform if needed
obscured_config = await self.obscure_config(configuration)
transformed_config = self.transform_polybox_switchdriver_config(obscured_config)
transformed_config = self.inject_default_values(self.transform_polybox_switchdriver_config(obscured_config))

with tempfile.NamedTemporaryFile(mode="w+", delete=False, encoding="utf-8") as f:
config = "\n".join(f"{k}={v}" for k, v in transformed_config.items())
Expand All @@ -79,6 +79,7 @@ async def test_connection(
storage_type = cast(str, configuration.get("type"))
if storage_type == "sftp":
args.extend(["--low-level-retries", "1"])
logger.debug(f"Execute: rclone {" ".join(args)}")
proc = await asyncio.create_subprocess_exec(
"rclone",
*args,
Expand Down Expand Up @@ -166,6 +167,27 @@ async def get_doi_metadata(self, configuration: Union["RCloneConfig", dict[str,
detail=f"Reason: {stderr.decode().strip()}",
)

def inject_default_values(
self, config: Union["RCloneConfig", dict[str, Any]]
) -> Union["RCloneConfig", dict[str, Any]]:
"""Adds default values for required options that are not provided in the config."""
provider = self.get_provider(config)
cfg_provider: str | None = config.get("provider")

for opt in provider.options:
if not opt.required or not opt.default or opt.name in config or not opt.matches_provider(cfg_provider):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While the expression does it job, I wonder if it wouldn't be nicer to rewrite it to be a bit clearer.
I thing the main initial check is whether config contains the value. It's also the only positive check in the chain so I would put it first and then all the others.

Suggested change
if not opt.required or not opt.default or opt.name in config or not opt.matches_provider(cfg_provider):
if opt.name in config or not opt.required or not opt.default or not opt.matches_provider(cfg_provider):

Another possibility could be:

            if opt.name in config or any([not opt.required, not opt.default, not opt.matches_provider(cfg_provider)]):

I think it makes the intent a bit clearer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, to my taste, this is not cleaner or easier to read 😅. We could then do more and pull out the not and say not all([…]) to save a few nots … But it doesn't look better to me, either.

I think the expression easy enough to understand. In general, I like to place the condition first that most likely triggers the short circuit. The downside of the any|all(…) is that it allocates a new list for no real reason, imo. There are many options in the schema, only a few are required. The config object holds very few from what I could see. I chose not opt.required because I think it matches in most cases.

Now, I don't have a strong opinion - to me any equivalent variant is fine. If there is a strong opinion, I happily change it to match any taste, just let me know the preferred way.

continue

match opt.default:
case RCloneTriState() as ts:
def_val: Any = ts.value
case v:
def_val = v

config.update({opt.name: def_val})

return config

@staticmethod
def transform_polybox_switchdriver_config(
configuration: Union["RCloneConfig", dict[str, Any]],
Expand Down Expand Up @@ -304,7 +326,10 @@ def validate_config(
and self.exclusive
and not any(e.value == str(value) and (not e.provider or e.provider == provider) for e in self.examples)
):
raise errors.ValidationError(message=f"Value '{value}' is not valid for field {self.name}")
valid_values = ", ".join([v.value for v in self.examples])
raise errors.ValidationError(
message=f"Value '{value}' is not valid for field {self.name}. Valid values are: {valid_values}"
)
return cast(int | bool | dict | str, value)


Expand All @@ -323,7 +348,7 @@ class RCloneProviderSchema(BaseModel):
@property
def required_options(self) -> list[RCloneOption]:
"""Returns all required options for this provider."""
return [o for o in self.options if o.required]
return [o for o in self.options if o.required and not o.default]

@property
def sensitive_options(self) -> list[RCloneOption]:
Expand Down
7 changes: 4 additions & 3 deletions test/bases/renku_data_services/data_api/test_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -594,9 +594,10 @@ async def test_storage_validate_connection(storage_test_client) -> None:
_, res = await storage_test_client.post("/api/data/storage_schema/test_connection", data=json.dumps(body))
assert res.status_code == 422

body = {"configuration": {"type": "s3", "provider": "AWS"}, "source_path": "giab/"}
_, res = await storage_test_client.post("/api/data/storage_schema/test_connection", data=json.dumps(body))
assert res.status_code == 204
# body = {"configuration": {"type": "s3", "provider": "AWS"}, "source_path": "giab/"}
# _, res = await storage_test_client.post("/api/data/storage_schema/test_connection", data=json.dumps(body))
# assert res.status_code == 204
## TODO ^ this fails now


@pytest.mark.asyncio
Expand Down
10 changes: 10 additions & 0 deletions test/components/renku_data_services/storage/test_rclone.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Tests for the rclone module."""

from renku_data_services.storage.rclone import RCloneValidator


def test_validate_switch_s3_no_endpoint() -> None:
"""Endpoint has a default value and is not required to specify."""
validator = RCloneValidator()
cfg = {"provider": "Switch", "type": "s3"}
validator.validate(cfg, keep_sensitive=True)