Skip to content

Commit 7ee2621

Browse files
Update OSS repo (#55)
1 parent 1578599 commit 7ee2621

File tree

169 files changed

+15385
-1997
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

169 files changed

+15385
-1997
lines changed

CHANGELOG.md

+21-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,32 @@
11
# Release History
22

3-
## 1.0.9
3+
## 1.0.10
44

55
### Behavior Changes
66

7-
- Model Development: log_loss metric calculation is now distributed.
7+
- Model Development: precision_score, recall_score, f1_score, fbeta_score, precision_recall_fscore_support,
8+
mean_absolute_error, mean_squared_error, and mean_absolute_percentage_error metric calculations are now distributed.
9+
- Model Registry: `deploy` will now return `Deployment` for deployment information.
810

911
### New Features
1012

13+
- Model Registry: When the model signature is auto-inferred, it will be printed to the log for reference.
14+
- Model Registry: For SPCS deployment, `Deployment` details will contains `image_name`, `service_spec` and `service_function_sql`.
15+
16+
### Bug Fixes
17+
18+
- Model Development: Fix an issue that leading to UTF-8 decoding errors when using modeling modules on Windows.
19+
- Model Development: Fix an issue that alias definitions cause `SnowparkSQLUnexpectedAliasException` in inference.
20+
- Model Registry: Fix an issue that signature inference could be incorrect when using Snowpark DataFrame as sample input.
21+
- Model Registry: Fix too strict data type validation when predicting. Now, for example, if you have a INT8
22+
type feature in the signature, if providing a INT64 dataframe but all values are within the range, it would not fail.
23+
24+
## 1.0.9 (2023-09-28)
25+
26+
### Behavior Changes
27+
28+
- Model Development: log_loss metric calculation is now distributed.
29+
1130
### Bug Fixes
1231

1332
- Model Registry: Fix an issue that building images fails with specific docker setup.

bazel/environments/conda-env-build.yml

-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ dependencies:
1313
- lightgbm==3.3.5
1414
- numpy==1.24.3
1515
- packaging==23.0
16-
- pytimeparse==1.1.8
1716
- ruamel.yaml==0.17.21
1817
- scikit-learn==1.3.0
1918
- sphinx==5.0.2

bazel/environments/conda-env-snowflake.yml

+3-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ dependencies:
3030
- packaging==23.0
3131
- pandas==1.5.3
3232
- protobuf==3.20.3
33-
- pytest==7.1.2
33+
- pytest==7.4.0
3434
- pytimeparse==1.1.8
3535
- pytorch==2.0.1
3636
- pyyaml==6.0
@@ -46,8 +46,9 @@ dependencies:
4646
- sphinx==5.0.2
4747
- sqlparse==0.4.4
4848
- tensorflow==2.10.0
49+
- tokenizers==0.13.2
4950
- torchdata==0.6.1
50-
- transformers==4.29.2
51+
- transformers==4.32.1
5152
- types-protobuf==4.23.0.1
5253
- types-requests==2.30.0.0
5354
- typing-extensions==4.5.0

bazel/environments/conda-env.yml

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ dependencies:
1313
- cachetools==4.2.2
1414
- cloudpickle==2.0.0
1515
- conda-forge::accelerate==0.22.0
16-
- conda-forge::mypy==1.4.1
16+
- conda-forge::mypy==1.5.1
1717
- conda-forge::starlette==0.27.0
1818
- conda-forge::types-PyYAML==6.0.12
1919
- conda-forge::types-cachetools==4.2.2
@@ -35,7 +35,7 @@ dependencies:
3535
- packaging==23.0
3636
- pandas==1.5.3
3737
- protobuf==3.20.3
38-
- pytest==7.1.2
38+
- pytest==7.4.0
3939
- pytimeparse==1.1.8
4040
- pytorch==2.0.1
4141
- pyyaml==6.0
@@ -51,8 +51,9 @@ dependencies:
5151
- sphinx==5.0.2
5252
- sqlparse==0.4.4
5353
- tensorflow==2.10.0
54+
- tokenizers==0.13.2
5455
- torchdata==0.6.1
55-
- transformers==4.29.2
56+
- transformers==4.32.1
5657
- types-protobuf==4.23.0.1
5758
- types-requests==2.30.0.0
5859
- typing-extensions==4.5.0

bazel/requirements/parse_and_generate_requirements.py

+3
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,8 @@ def generate_requirements(
361361
)
362362
)
363363
sys.stdout.writelines(results)
364+
elif (mode, format) == ("dev_version", "python"):
365+
sys.stdout.writelines(f"REQUIREMENTS = {repr(snowflake_only_env)}\n")
364366
elif (mode, format) == ("version_requirements", "bzl"):
365367
extras_requirements = list(filter(lambda req_info: filter_by_extras(req_info, True, False), requirements))
366368
extras_results: MutableMapping[str, Sequence[str]] = {}
@@ -479,6 +481,7 @@ def main() -> None:
479481
VALID_SETTINGS = [
480482
("validate", None, False), # Validate the environment
481483
("dev_version", "text", False), # requirements.txt
484+
("dev_version", "python", True), # sproc test dependencies list
482485
("version_requirements", "bzl", False), # wheel rule requirements
483486
("version_requirements", "python", False), # model deployment core dependencies list
484487
("dev_version", "conda_env", False), # dev conda-env.yml file

ci/build_and_run_tests.sh

+85-26
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ WITH_SNOWPARK=false
3838
MODE="continuous_run"
3939
SNOWML_DIR="snowml"
4040
SNOWPARK_DIR="snowpark-python"
41+
IS_NT=false
4142

4243
while (($#)); do
4344
case $1 in
@@ -74,26 +75,70 @@ while (($#)); do
7475
shift
7576
done
7677

78+
EXT=""
79+
BAZEL_ADDITIONAL_BUILD_FLAGS=()
80+
BAZEL_ADDITIONAL_STARTUP_FLAGS=()
81+
82+
# Computing artifact location
83+
# Detect the platform, also update some platform specific bazel settings
84+
case "$(uname)" in
85+
Linux)
86+
PLATFORM="linux" ;;
87+
Darwin)
88+
PLATFORM="darwin" ;;
89+
*NT*)
90+
PLATFORM="windows"
91+
IS_NT=true ;;
92+
esac
93+
94+
# Detect the architecture
95+
ARCH="$(uname -m)"
96+
case "$ARCH" in
97+
aarch64|ppc64le|arm64)
98+
ARCH="arm64" ;;
99+
*)
100+
ARCH="amd64" ;;
101+
esac
102+
103+
# Compute the platform-arch string used to download yq.
104+
case "${PLATFORM}_${ARCH}" in
105+
linux_arm64|linux_amd64|darwin_arm64|darwin_amd64|windows_amd64)
106+
;; # pass
107+
*)
108+
echo "Platform / Architecture is not supported by yq." >&2
109+
exit 1
110+
;;
111+
esac
112+
77113
# Check Python3.8 exist
78114
# TODO(SNOW-845592): ideally we should download py3.8 from conda if not exist. Currently we just fail.
79-
set +eu
80-
source /opt/rh/rh-python38/enable
81-
PYTHON38_EXIST=$?
82-
if [ $PYTHON38_EXIST -ne 0 ]; then
83-
echo "Failed to execute tests: Python3.8 is not installed."
84-
rm -rf "${TEMP_TEST_DIR}"
85-
exit ${PYTHON38_EXIST}
115+
if [ "${ENV}" = "pip" ]; then
116+
set +eu
117+
source /opt/rh/rh-python38/enable
118+
PYTHON38_EXIST=$?
119+
if [ $PYTHON38_EXIST -ne 0 ]; then
120+
echo "Failed to execute tests: Python3.8 is not installed."
121+
rm -rf "${TEMP_TEST_DIR}"
122+
exit ${PYTHON38_EXIST}
123+
fi
124+
set -eu
125+
fi
126+
127+
if [ ${IS_NT} = true ]; then
128+
EXT=".exe"
129+
BAZEL_ADDITIONAL_BUILD_FLAGS+=(--nobuild_python_zip)
130+
BAZEL_ADDITIONAL_BUILD_FLAGS+=(--enable_runfiles)
131+
BAZEL_ADDITIONAL_STARTUP_FLAGS+=(--output_user_root=D:/broot)
86132
fi
87-
set -eu
88133

89134
cd "${WORKSPACE}"
90135

91136
# Check and download yq if not presented.
92-
_YQ_BIN="yq"
137+
_YQ_BIN="yq${EXT}"
93138
if ! command -v "${_YQ_BIN}" &>/dev/null; then
94139
TEMP_BIN=$(mktemp -d "${WORKSPACE}/tmp_bin_XXXXX")
95-
curl -Ls https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -o "${TEMP_BIN}/yq" && chmod +x "${TEMP_BIN}/yq"
96-
_YQ_BIN="${TEMP_BIN}/yq"
140+
curl -Lsv https://github.com/mikefarah/yq/releases/latest/download/yq_${PLATFORM}_${ARCH}${EXT} -o "${TEMP_BIN}/yq${EXT}" && chmod +x "${TEMP_BIN}/yq${EXT}"
141+
_YQ_BIN="${TEMP_BIN}/yq${EXT}"
97142
fi
98143

99144
# Create temp release folder
@@ -109,23 +154,39 @@ echo "Extracted Package Version from code: ${VERSION}"
109154
OPTIONAL_REQUIREMENTS=()
110155
while IFS='' read -r line; do OPTIONAL_REQUIREMENTS+=("$line"); done < <("${_YQ_BIN}" '.requirements.run_constrained.[] | ... style=""' ci/conda_recipe/meta.yaml)
111156

112-
# Generate and copy auto-gen tests.
113-
if [[ ${MODE} = "release" ]]; then
114-
"${BAZEL}" build //tests/... --build_tag_filters=autogen_build
115-
cp -r "$("${BAZEL}" info bazel-bin)/tests" "${TEMP_TEST_DIR}"
116-
fi
117-
118157
# Compare test required dependencies with wheel pkg dependencies and exclude tests if necessary
119158
EXCLUDE_TESTS=$(mktemp "${TEMP_TEST_DIR}/exclude_tests_XXXXX")
120159
if [[ ${MODE} = "continuous_run" || ${MODE} = "release" ]]; then
121160
./ci/get_excluded_tests.sh -f "${EXCLUDE_TESTS}" -m unused -b "${BAZEL}"
122161
elif [[ ${MODE} = "merge_gate" ]]; then
123162
./ci/get_excluded_tests.sh -f "${EXCLUDE_TESTS}" -m all -b "${BAZEL}"
124163
fi
164+
165+
# Generate and copy auto-gen tests.
166+
if [[ ${MODE} = "release" ]]; then
167+
# When release, we build all autogen tests
168+
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" build "${BAZEL_ADDITIONAL_BUILD_FLAGS[@]+"${BAZEL_ADDITIONAL_BUILD_FLAGS[@]}"}" //tests/integ/...
169+
else
170+
# In other cases, we build required utility only.
171+
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" build --build_tag_filters=-autogen_build,-autogen "${BAZEL_ADDITIONAL_BUILD_FLAGS[@]+"${BAZEL_ADDITIONAL_BUILD_FLAGS[@]}"}" //tests/integ/...
172+
fi
173+
174+
# Rsync cannot work well with path that has drive letter in Windows,
175+
# Thus, these two rsync has to use relative path instead of absolute ones.
176+
177+
rsync -av --exclude '*.runfiles_manifest' --exclude '*.runfiles/**' "bazel-bin/tests" .
178+
125179
# Copy tests into temp directory
126180
pushd "${TEMP_TEST_DIR}"
127-
rsync -av --exclude-from "${EXCLUDE_TESTS}" "${WORKSPACE}/${SNOWML_DIR}/tests" .
181+
rsync -av --exclude-from "${EXCLUDE_TESTS}" "../${SNOWML_DIR}/tests" .
128182
popd
183+
184+
# Bazel on windows is consuming a lot of memory, let's clean it before proceed to avoid OOM.
185+
if [ ${IS_NT} = true ]; then
186+
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" clean --expunge
187+
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" shutdown
188+
fi
189+
129190
popd
130191

131192
# Build snowml package
@@ -149,12 +210,10 @@ if [ "${ENV}" = "pip" ]; then
149210

150211
# Build SnowML
151212
pushd ${SNOWML_DIR}
152-
"${BAZEL}" build //snowflake/ml:wheel
213+
"${BAZEL}" "${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]+"${BAZEL_ADDITIONAL_STARTUP_FLAGS[@]}"}" build "${BAZEL_ADDITIONAL_BUILD_FLAGS[@]+"${BAZEL_ADDITIONAL_BUILD_FLAGS[@]}"}" //snowflake/ml:wheel
153214
cp "$(${BAZEL} info bazel-bin)/snowflake/ml/snowflake_ml_python-${VERSION}-py3-none-any.whl" "${WORKSPACE}"
154215
popd
155216
else
156-
which conda
157-
158217
# Clean conda cache
159218
conda clean --all --force-pkgs-dirs -y
160219

@@ -183,7 +242,7 @@ pushd "${TEMP_TEST_DIR}"
183242
COMMON_PYTEST_FLAG=()
184243
COMMON_PYTEST_FLAG+=(--strict-markers) # Strict the pytest markers to avoid typo in markers
185244
COMMON_PYTEST_FLAG+=(--import-mode=append)
186-
COMMON_PYTEST_FLAG+=(-n 10)
245+
COMMON_PYTEST_FLAG+=(-n logical)
187246

188247
if [ "${ENV}" = "pip" ]; then
189248
# Copy wheel package
@@ -196,10 +255,10 @@ if [ "${ENV}" = "pip" ]; then
196255
# otherwise it will fail in dependency resolution.
197256
python3.8 -m pip install --upgrade pip
198257
python3.8 -m pip list
199-
python3.8 -m pip install "snowflake_ml_python-${VERSION}-py3-none-any.whl[all]" pytest-xdist inflection --no-cache-dir --force-reinstall
258+
python3.8 -m pip install "snowflake_ml_python-${VERSION}-py3-none-any.whl[all]" pytest-xdist[psutil] -r "${WORKSPACE}/${SNOWML_DIR}/requirements.txt" --no-cache-dir --force-reinstall
200259
if [ "${WITH_SNOWPARK}" = true ]; then
201260
cp "$(find "${WORKSPACE}" -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl')" "${TEMP_TEST_DIR}"
202-
python3.8 -m pip install "$(find . -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl')" --force-reinstall
261+
python3.8 -m pip install "$(find . -maxdepth 1 -iname 'snowflake_snowpark_python-*.whl')" --no-deps --force-reinstall
203262
fi
204263
python3.8 -m pip list
205264

@@ -216,12 +275,12 @@ else
216275
conda clean --all --force-pkgs-dirs -y
217276

218277
# Create testing env
219-
conda create -y -p testenv -c "file://${WORKSPACE}/conda-bld" -c "https://repo.anaconda.com/pkgs/snowflake/" --override-channels "python=3.8" snowflake-ml-python pytest-xdist inflection "${OPTIONAL_REQUIREMENTS[@]}"
278+
conda create -y -p testenv -c "${WORKSPACE}/conda-bld" -c "https://repo.anaconda.com/pkgs/snowflake/" --override-channels "python=3.8" snowflake-ml-python pytest-xdist psutil inflection "${OPTIONAL_REQUIREMENTS[@]}"
220279
conda list -p testenv
221280

222281
# Run integration tests
223282
set +e
224-
TEST_SRCDIR="${TEMP_TEST_DIR}" conda run -p testenv --no-capture-output python3.8 -m pytest "${COMMON_PYTEST_FLAG[@]}" tests/integ/
283+
TEST_SRCDIR="${TEMP_TEST_DIR}" conda run -p testenv --no-capture-output python -m pytest "${COMMON_PYTEST_FLAG[@]}" tests/integ/
225284
TEST_RETCODE=$?
226285
set -e
227286

ci/conda_recipe/meta.yaml

+4-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ build:
1717
noarch: python
1818
package:
1919
name: snowflake-ml-python
20-
version: 1.0.9
20+
version: 1.0.10
2121
requirements:
2222
build:
2323
- python
@@ -27,11 +27,12 @@ requirements:
2727
- aiohttp!=4.0.0a0, !=4.0.0a1
2828
- anyio>=3.5.0,<4
2929
- cachetools>=3.1.1,<5
30-
- cloudpickle
30+
- cloudpickle>=2.0.0
3131
- fsspec>=2022.11,<2024
3232
- numpy>=1.23,<2
3333
- packaging>=20.9,<24
3434
- pandas>=1.0.0,<2
35+
- pytimeparse>=1.1.8,<2
3536
- pyyaml>=6.0,<7
3637
- requests
3738
- s3fs>=2022.11,<2024
@@ -49,6 +50,7 @@ requirements:
4950
- sentencepiece>=0.1.95,<0.2
5051
- shap==0.42.1
5152
- tensorflow>=2.9,<3
53+
- tokenizers>=0.10,<1
5254
- torchdata>=0.4,<1
5355
- transformers>=4.29.2,<5
5456
source:

ci/get_excluded_tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ if [[ $mode = "unaffected" || $mode = "all" ]]; then
8787
# -- Begin of Query Rules Heredoc --
8888
cat >"${unaffected_test_rule_file}" <<EndOfMessage
8989
let unaffected_targets = //tests/... - rdeps(//tests/..., set($(<"${affected_targets_file}"))) in
90-
kind('source file', labels(srcs, set($(<ci/skip_merge_gate_targets)) + kind('py_test rule', \$unaffected_targets)))
90+
kind('source file', labels(srcs, set($(<ci/skip_merge_gate_targets)) + kind('py_test rule', \$unaffected_targets)) - labels(srcs, rdeps(//tests/..., set($(<"${affected_targets_file}")))))
9191
EndOfMessage
9292
# -- End of Query Rules Heredoc --
9393

codegen/codegen_rules.bzl

+1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def autogen_estimators(module, estimator_info_list):
9191
"//snowflake/ml/_internal/utils:identifier",
9292
"//snowflake/ml/model:model_signature",
9393
"//snowflake/ml/model/_signatures:utils",
94+
"//snowflake/ml/modeling/_internal:estimator_utils",
9495
],
9596
)
9697

codegen/sklearn_wrapper_autogen.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def _generate_src_files(
113113
List of generated files.
114114
"""
115115

116-
template = open(self.template_path).read()
116+
template = open(self.template_path, encoding="utf-8").read()
117117

118118
generated_files_list = []
119119
for generator in generators:
@@ -130,7 +130,7 @@ def _generate_src_files(
130130
# Create output src dir if it don't exist already.
131131
os.makedirs("/".join(output_file_name.split("/")[:-1]), exist_ok=True)
132132

133-
open(output_file_name, "w").write(wrapped_transform_string)
133+
open(output_file_name, "w", encoding="utf-8").write(wrapped_transform_string)
134134
logging.info("Wrote file %s", output_file_name)
135135

136136
return generated_files_list
@@ -149,7 +149,7 @@ def _generate_test_files(
149149
Returns:
150150
List of generated files.
151151
"""
152-
test_template = open(self.template_path).read()
152+
test_template = open(self.template_path, encoding="utf-8").read()
153153

154154
generated_files_list = []
155155
for generator in generators:
@@ -166,7 +166,7 @@ def _generate_test_files(
166166
# Create output test dir if it don't exist already.
167167
os.makedirs("/".join(test_output_file_name.split("/")[:-1]), exist_ok=True)
168168

169-
open(test_output_file_name, "w").write(wrapped_transform_string)
169+
open(test_output_file_name, "w", encoding="utf-8").write(wrapped_transform_string)
170170
logging.info("Wrote file %s", test_output_file_name)
171171

172172
return generated_files_list

codegen/sklearn_wrapper_generator.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -722,9 +722,9 @@ def _populate_function_names_and_signatures(self) -> None:
722722
for arg_to_transform in args_to_transform:
723723
if arg_to_transform in self.original_init_signature.parameters.keys():
724724
arg_transform_calls.append(
725-
f"{arg_to_transform} = _transform_snowml_obj_to_sklearn_obj({arg_to_transform})"
725+
f"{arg_to_transform} = transform_snowml_obj_to_sklearn_obj({arg_to_transform})"
726726
)
727-
deps_gathering_calls.append(f"deps = deps | _gather_dependencies({arg_to_transform})")
727+
deps_gathering_calls.append(f"deps = deps | gather_dependencies({arg_to_transform})")
728728

729729
self.estimator_init_signature = ",\n ".join(signature_lines) + ","
730730
self.sklearn_init_arguments = ",\n ".join(sklearn_init_lines) + ","

0 commit comments

Comments
 (0)