Project import generated by Copybara. (#95)

GitOrigin-RevId: 42f68c299b1a73edc54baccd7cc25743116f565f Co-authored-by: Snowflake Authors <[email protected]>
snowflakedb · Apr 8, 2024 · b1cfe76 · b1cfe76
1 parent fbebee7
commit b1cfe76
Show file tree

Hide file tree

Showing 59 changed files with 2,158 additions and 635 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,12 +1,40 @@
 # Release History
 
-## 1.3.1
+## 1.4.0
+
+### Bug Fixes
+
+- Registry: Fix a bug when multiple models are being called from the same query, models other than the first one will
+  have incorrect result. This fix only works for newly logged model.
+- Modeling: When registering a model, only method(s) that is mentioned in `save_model` would be added to model signature
+in SnowML models.
+- Modeling: Fix a bug that when n_jobs is not 1, model cannot execute methods such as
+predict, predict_log_proba, and other batch inference methods. The n_jobs would automatically
+set to 1 because vectorized udf currently doesn't support joblib parallel backend.
+- Modeling: Fix a bug that batch inference methods cannot infer the datatype when the first row of data contains NULL.
+- Modeling: Matches Distributed HPO output column names with the snowflake identifier.
+- Modeling: Relax package versions for all Distributed HPO methods if the installed version
+  is not available in the Snowflake conda channel
+- Modeling: Add sklearn as required dependency for LightGBM package.
+
+### Behavior Changes
+
+- Registry: `apply` method is no longer by default logged when logging a xgboost model. If that is required, it could
+  be specified manually when logging the model by `log_model(..., options={"target_methods": ["apply", ...]})`.
+
+### New Features
+
+- Registry: Add support for `sentence-transformers` model (`sentence_transformers.SentenceTransformer`).
+- Registry: Now version name is no longer required when logging a model. If not provided, a random human readable ID
+  will be generated.
+
+## 1.3.1 (2024-03-21)
 
 ### New Features
 
 - FileSet: `snowflake.ml.fileset.sfcfs.SFFileSystem` can now be used in UDFs and stored procedures.
 
-## 1.3.0
+## 1.3.0 (2024-03-12)
 
 ### Bug Fixes
 

diff --git a/bazel/environments/conda-env-build-test.yml b/bazel/environments/conda-env-build-test.yml
@@ -8,5 +8,6 @@ channels:
 dependencies:
   - inflection==0.5.1
   - psutil==5.9.0
+  - pytest-rerunfailures==12.0
   - pytest-xdist==3.5.0
   - pytest==7.4.0
diff --git a/bazel/environments/conda-env-snowflake.yml b/bazel/environments/conda-env-snowflake.yml
@@ -34,6 +34,7 @@ dependencies:
   - protobuf==3.20.3
   - psutil==5.9.0
   - pyarrow==10.0.1
+  - pytest-rerunfailures==12.0
   - pytest-xdist==3.5.0
   - pytest==7.4.0
   - pytimeparse==1.1.8

diff --git a/bazel/environments/conda-env.yml b/bazel/environments/conda-env.yml
@@ -39,6 +39,7 @@ dependencies:
   - protobuf==3.20.3
   - psutil==5.9.0
   - pyarrow==10.0.1
+  - pytest-rerunfailures==12.0
   - pytest-xdist==3.5.0
   - pytest==7.4.0
   - pytimeparse==1.1.8

diff --git a/bazel/environments/conda-gpu-env.yml b/bazel/environments/conda-gpu-env.yml
@@ -40,6 +40,7 @@ dependencies:
   - protobuf==3.20.3
   - psutil==5.9.0
   - pyarrow==10.0.1
+  - pytest-rerunfailures==12.0
   - pytest-xdist==3.5.0
   - pytest==7.4.0
   - pytimeparse==1.1.8

diff --git a/ci/RunBazelAction.sh b/ci/RunBazelAction.sh
@@ -149,6 +149,7 @@ if [[ "${action}" = "test" ]]; then
     "${bazel}" test \
         "${cache_test_results}" \
         --test_output=errors \
+        --flaky_test_attempts=2 \
         "${tag_filter}" \
         --target_pattern_file "${sf_only_test_targets_file}"
     sf_only_bazel_exit_code=$?
@@ -158,6 +159,7 @@ if [[ "${action}" = "test" ]]; then
         --config=extended \
         "${cache_test_results}" \
         --test_output=errors \
+        --flaky_test_attempts=2 \
         "${tag_filter}" \
         --target_pattern_file "${extended_test_targets_file}"
     extended_bazel_exit_code=$?

diff --git a/ci/build_and_run_tests.sh b/ci/build_and_run_tests.sh
@@ -274,6 +274,7 @@ COMMON_PYTEST_FLAG+=(--strict-markers) # Strict the pytest markers to avoid typo
 COMMON_PYTEST_FLAG+=(--import-mode=append)
 COMMON_PYTEST_FLAG+=(--log-cli-level=INFO)
 COMMON_PYTEST_FLAG+=(-n logical)
+COMMON_PYTEST_FLAG+=(--reruns 1)
 
 if [[ -n "${JUNIT_REPORT_PATH}" ]]; then
     COMMON_PYTEST_FLAG+=(--junitxml "${JUNIT_REPORT_PATH}")

diff --git a/ci/conda_recipe/meta.yaml b/ci/conda_recipe/meta.yaml
@@ -17,7 +17,7 @@ build:
   noarch: python
 package:
   name: snowflake-ml-python
-  version: 1.3.1
+  version: 1.4.0
 requirements:
   build:
     - python

diff --git a/codegen/sklearn_wrapper_generator.py b/codegen/sklearn_wrapper_generator.py
@@ -1127,11 +1127,14 @@ def generate(self) -> "LightGBMWrapperGenerator":
         super().generate()
 
         # Populate LightGBM specific values
-        self.estimator_imports_list.append("import lightgbm")
+        self.estimator_imports_list.extend(["import sklearn", "import lightgbm"])
         self.test_estimator_input_args_list.extend(["random_state=0", "n_jobs=1"])
-        self.score_sproc_imports = ["lightgbm"]
+        self.score_sproc_imports = ["lightgbm", "sklearn"]
 
-        self.deps = "f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'"
+        self.deps = (
+            "f'numpy=={np.__version__}', f'lightgbm=={lightgbm.__version__}', f'cloudpickle=={cp.__version__}'"
+            ", f'scikit-learn=={sklearn.__version__}'"
+        )
         self.supported_export_method = "to_lightgbm"
         self.unsupported_export_methods = ["to_sklearn", "to_xgboost"]
         self._construct_string_from_lists()

diff --git a/requirements.txt b/requirements.txt
@@ -31,6 +31,7 @@ peft==0.5.0
 protobuf==3.20.3
 psutil==5.9.0
 pyarrow==10.0.1
+pytest-rerunfailures==12.0
 pytest-xdist==3.5.0
 pytest==7.4.0
 pytimeparse==1.1.8

diff --git a/requirements.yml b/requirements.yml
@@ -198,6 +198,10 @@
   dev_version: 7.4.0
   tags:
     - build_test_env
+- name: pytest-rerunfailures
+  dev_version: 12.0
+  tags:
+    - build_test_env
 - name: pytest-xdist
   dev_version: 3.5.0
   tags:

diff --git a/snowflake/ml/_internal/exceptions/BUILD.bazel b/snowflake/ml/_internal/exceptions/BUILD.bazel
@@ -21,6 +21,24 @@ py_library(
     srcs = ["error_messages.py"],
 )
 
+py_library(
+    name = "dataset_errors",
+    srcs = ["dataset_errors.py"],
+    visibility = [
+        "//bazel:snowml_public_common",
+        "//snowflake/ml/beta/dataset:__pkg__",
+    ],
+)
+
+py_library(
+    name = "dataset_error_messages",
+    srcs = ["dataset_error_messages.py"],
+    visibility = [
+        "//bazel:snowml_public_common",
+        "//snowflake/ml/beta/dataset:__pkg__",
+    ],
+)
+
 py_library(
     name = "fileset_errors",
     srcs = ["fileset_errors.py"],

diff --git a/snowflake/ml/_internal/exceptions/dataset_error_messages.py b/snowflake/ml/_internal/exceptions/dataset_error_messages.py
@@ -0,0 +1 @@
+DATASET_ALREADY_EXISTS = "Dataset {}/{} already exists."
diff --git a/snowflake/ml/_internal/exceptions/dataset_errors.py b/snowflake/ml/_internal/exceptions/dataset_errors.py
@@ -0,0 +1,32 @@
+# Error code from Snowflake Python Connector.
+ERRNO_FILE_EXIST_IN_STAGE = "001030"
+ERRNO_DOMAIN_NOT_EXIST = "002003"
+ERRNO_STAGE_NOT_EXIST = "391707"
+
+
+class DatasetError(Exception):
+    """Base class for other exceptions."""
+
+
+class DatasetNotExistError(DatasetError):
+    """Raised when the requested Dataset does not exist."""
+
+
+class DatasetExistError(DatasetError):
+    """Raised when there is already an existing Dataset with the same name and version in selected schema."""
+
+
+class DatasetLocationError(DatasetError):
+    """Raised when the given location to the Dataset is invalid."""
+
+
+class DatasetCannotDeleteError(DatasetError):
+    """Raised when a Dataset is unable to get deleted."""
+
+
+class DatasetIntegrityError(DatasetError):
+    """Raised when the Dataset contains invalid or unrecognized files."""
+
+
+class DatasetInvalidSourceError(DatasetError, ValueError):
+    """Raised when trying to create a Dataset from an invalid data source"""
diff --git a/snowflake/ml/_internal/human_readable_id/BUILD.bazel b/snowflake/ml/_internal/human_readable_id/BUILD.bazel
@@ -0,0 +1,36 @@
+load("//bazel:py_rules.bzl", "py_library", "py_test")
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+    name = "hrid_words",
+    srcs = [
+        "adjectives.txt",
+        "animals.txt",
+    ],
+)
+
+py_library(
+    name = "hrid_generator_base",
+    srcs = ["hrid_generator_base.py"],
+)
+
+py_library(
+    name = "hrid_generator",
+    srcs = ["hrid_generator.py"],
+    data = [
+        ":hrid_words",
+    ],
+    deps = [
+        ":hrid_generator_base",
+    ],
+)
+
+py_test(
+    name = "hrid_generator_test",
+    srcs = ["hrid_generator_test.py"],
+    deps = [
+        ":hrid_generator",
+        "//snowflake/ml/_internal/utils:sql_identifier",
+    ],
+)
diff --git a/snowflake/ml/_internal/human_readable_id/adjectives.txt b/snowflake/ml/_internal/human_readable_id/adjectives.txt
@@ -0,0 +1,128 @@
+afraid
+ancient
+angry
+average
+bad
+big
+bitter
+black
+blue
+brave
+breezy
+bright
+brown
+calm
+chatty
+chilly
+clever
+cold
+cowardly
+cuddly
+curly
+curvy
+dangerous
+dry
+dull
+empty
+evil
+fast
+fat
+fluffy
+foolish
+fresh
+friendly
+funny
+gentle
+giant
+good
+great
+green
+grumpy
+happy
+hard
+heavy
+helpless
+honest
+horrible
+hot
+hungry
+itchy
+jolly
+kind
+lazy
+light
+little
+loud
+lovely
+lucky
+massive
+mean
+mighty
+modern
+moody
+nasty
+neat
+nervous
+new
+nice
+odd
+old
+orange
+ordinary
+perfect
+pink
+plastic
+polite
+popular
+pretty
+proud
+purple
+quick
+quiet
+rare
+red
+rotten
+rude
+selfish
+serious
+shaggy
+sharp
+short
+shy
+silent
+silly
+slimy
+slippery
+smart
+smooth
+soft
+sour
+spicy
+splendid
+spotty
+stale
+strange
+strong
+stupid
+sweet
+swift
+tall
+tame
+tasty
+tender
+terrible
+thin
+tidy
+tiny
+tough
+tricky
+ugly
+warm
+weak
+wet
+wicked
+wise
+witty
+wonderful
+yellow
+young
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		DATASET_ALREADY_EXISTS = "Dataset {}/{} already exists."