diff --git a/ci/conda_recipe/meta.yaml b/ci/conda_recipe/meta.yaml index 722c0384..232b7d6d 100644 --- a/ci/conda_recipe/meta.yaml +++ b/ci/conda_recipe/meta.yaml @@ -5,7 +5,7 @@ package: name: snowflake-ml-python - version: 0.2.4 # this has to be in sync with snowflake/ml/BUILD.bazel and snowflake/ml/version.py + version: 0.3.0 # this has to be in sync with snowflake/ml/BUILD.bazel and snowflake/ml/version.py source: path: ../../ @@ -40,11 +40,11 @@ requirements: about: home: https://github.com/snowflakedb/snowflake-ml-python license: Apache-2.0 - license_family : Apache + license_family: Apache license_file: ../../LICENSE.txt summary: Snowflake ML Library description: | - Snowflake ML client Library is used for interacting with Snowflake to build machine learning solutions. - Functionalities include feature engineering, modeling, model management, deployment, etc + Snowflake ML client Library is used for interacting with Snowflake to build machine learning solutions. + Functionalities include feature engineering, modeling, model management, deployment, etc dev_url: https://github.com/snowflakedb/snowflake-ml-python doc_url: https://github.com/snowflakedb/snowflake-ml-python/blob/main/README.md diff --git a/snowflake/ml/BUILD.bazel b/snowflake/ml/BUILD.bazel index db035957..1668cda4 100644 --- a/snowflake/ml/BUILD.bazel +++ b/snowflake/ml/BUILD.bazel @@ -46,9 +46,9 @@ snowml_wheel( # versions in the requirements file, we are pinning the versions here. "scikit-learn==1.2.1", "xgboost==1.7.3", - "joblib>=1.0.0,<=1.1.1", # All the release versions between 1.0.0 and 1.1.1 are available in SF Conda channel. + "joblib>=1.0.0,<=1.1.1", # All the release versions between 1.0.0 and 1.1.1 are available in SF Conda channel. ], - version = "0.2.4", # this has to be in sync with version.py and ci/conda_recipe/meta.yaml + version = "0.3.0", # this has to be in sync with version.py and ci/conda_recipe/meta.yaml deps = [ "//snowflake/ml/metrics:metrics_pkg", "//snowflake/ml/preprocessing:preprocessing_pkg", diff --git a/snowflake/ml/_internal/BUILD.bazel b/snowflake/ml/_internal/BUILD.bazel index 4f308bba..b425d261 100644 --- a/snowflake/ml/_internal/BUILD.bazel +++ b/snowflake/ml/_internal/BUILD.bazel @@ -27,6 +27,7 @@ py_library( py_test( name = "file_utils_test", srcs = ["file_utils_test.py"], + timeout = "short", deps = [ ":file_utils", ], diff --git a/snowflake/ml/_internal/file_utils.py b/snowflake/ml/_internal/file_utils.py index 2b05a51b..5a0e2b10 100644 --- a/snowflake/ml/_internal/file_utils.py +++ b/snowflake/ml/_internal/file_utils.py @@ -70,10 +70,11 @@ def zip_file_or_directory_to_stream( with io.BytesIO() as input_stream: with zipfile.ZipFile(input_stream, mode="w", compression=zipfile.ZIP_DEFLATED) as zf: - cur_path = os.path.dirname(path) - while os.path.realpath(cur_path) != os.path.realpath(start_path): - zf.writestr(f"{os.path.relpath(cur_path, start_path)}/", "") - cur_path = os.path.dirname(cur_path) + if os.path.realpath(path) != os.path.realpath(start_path): + cur_path = os.path.dirname(path) + while os.path.realpath(cur_path) != os.path.realpath(start_path): + zf.writestr(f"{os.path.relpath(cur_path, start_path)}/", "") + cur_path = os.path.dirname(cur_path) if os.path.isdir(path): for dirname, _, files in os.walk(path): diff --git a/snowflake/ml/_internal/file_utils_test.py b/snowflake/ml/_internal/file_utils_test.py index 5e717053..3d3a5eb2 100644 --- a/snowflake/ml/_internal/file_utils_test.py +++ b/snowflake/ml/_internal/file_utils_test.py @@ -46,3 +46,7 @@ def test_zip_file_or_directory_to_stream(self) -> None: importlib.import_module("snowflake.snowpark.fake_module.p") sys.path.remove(os.path.abspath(zip_module_filename)) + + with file_utils.zip_file_or_directory_to_stream(fake_mod_dirpath, fake_mod_dirpath) as input_stream: + with open(zip_module_filename, "wb") as f: + f.write(input_stream.getbuffer()) diff --git a/snowflake/ml/registry/BUILD.bazel b/snowflake/ml/registry/BUILD.bazel index 3d06c333..324c69f0 100644 --- a/snowflake/ml/registry/BUILD.bazel +++ b/snowflake/ml/registry/BUILD.bazel @@ -11,6 +11,7 @@ py_library( "//snowflake/ml/_internal/utils:formatting", "//snowflake/ml/_internal/utils:query_result_checker", "//snowflake/ml/_internal/utils:uri", + "//snowflake/ml/_internal:file_utils", "//snowflake/ml/model:_model", "//snowflake/ml/model:_deployer", "//snowflake/ml/utils:telemetry", diff --git a/snowflake/ml/registry/model_registry.py b/snowflake/ml/registry/model_registry.py index 2aa7ee30..b293c944 100644 --- a/snowflake/ml/registry/model_registry.py +++ b/snowflake/ml/registry/model_registry.py @@ -12,6 +12,7 @@ from absl import logging from snowflake import connector, snowpark +from snowflake.ml._internal import file_utils from snowflake.ml._internal.utils import formatting, query_result_checker, uri from snowflake.ml.model import ( _deployer, @@ -21,7 +22,6 @@ ) from snowflake.ml.registry import _schema from snowflake.ml.utils import telemetry -from snowflake.snowpark._internal import utils if TYPE_CHECKING: import pandas as pd @@ -998,6 +998,9 @@ def log_model( output, which could be inferred by calling `infer_signature` method with sample input data. sample_input_data: Sample of the input data for the model. + Raises: + TypeError: Raised when both signatures and sample_input_data is not presented. Will be captured locally. + Returns: String of the auto-generate unique model identifier. None if failed. """ @@ -1031,6 +1034,8 @@ def log_model( pip_requirements=pip_requirements, sample_input=sample_input_data, ) + else: + raise TypeError("Either signature or sample input data should exist for native model packaging.") id = self.log_model_path( path=tmpdir, type="snowflake_native", @@ -1174,7 +1179,6 @@ def log_model_path( Returns: String of the auto-generate unique model identifier. """ - # Copy model from local disk to remote stage. fully_qualified_model_stage_name = self._prepare_model_stage(model_name=name, model_version=version) @@ -1183,7 +1187,7 @@ def log_model_path( if os.path.isfile(path): self._session.file.put(path, f"{fully_qualified_model_stage_name}/data") elif os.path.isdir(path): - with utils.zip_file_or_directory_to_stream(path, path, add_init_py=True) as input_stream: + with file_utils.zip_file_or_directory_to_stream(path, path) as input_stream: self._session._conn.upload_stream( input_stream=input_stream, stage_location=fully_qualified_model_stage_name, diff --git a/snowflake/ml/version.py b/snowflake/ml/version.py index dfd68105..859264f1 100644 --- a/snowflake/ml/version.py +++ b/snowflake/ml/version.py @@ -1,6 +1,6 @@ # Update this for the versions # Don't change the forth version number from None -VERSION = (0, 2, 4, None) # this has to be in sync with BUILD.bazel and ci/conda_recipe/meta.yaml +VERSION = (0, 3, 0, None) # this has to be in sync with BUILD.bazel and ci/conda_recipe/meta.yaml def get_version() -> str: