Skip to content

Commit dbe9f94

Browse files
Project import generated by Copybara. (#28)
1 parent cd17c28 commit dbe9f94

File tree

96 files changed

+6198
-3629
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+6198
-3629
lines changed

.bazelrc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Make the target platform and the host platform the same
22
build --platforms //bazel/platforms:extended_conda_env --host_platform //bazel/platforms:extended_conda_env
3-
test --platforms //bazel/platforms:extended_conda_env --host_platform //bazel/platforms:extended_conda_env
3+
test --platforms //bazel/platforms:extended_conda_env --host_platform //bazel/platforms:extended_conda_env --run_under='//bazel:test_wrapper'
44
run --platforms //bazel/platforms:extended_conda_env --host_platform //bazel/platforms:extended_conda_env
55
cquery --platforms //bazel/platforms:extended_conda_env --host_platform //bazel/platforms:extended_conda_env
66

BUILD.bazel

+8
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
load("@rules_java//java:defs.bzl", "java_binary")
2+
13
exports_files([
24
"CHANGELOG.md",
35
"README.md",
@@ -8,3 +10,9 @@ exports_files([
810
"requirements.txt",
911
"requirements.yml",
1012
])
13+
14+
java_binary(
15+
name = "bazel-diff",
16+
main_class = "com.bazel_diff.Main",
17+
runtime_deps = ["@bazel_diff//jar"],
18+
)

CHANGELOG.md

+18
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,23 @@
11
# Release History
22

3+
## 1.0.3 (2023-07-12)
4+
5+
### Behavior Changes
6+
- Model Registry: When predicting a model whose output is a list of NumPy ndarray, the output would not be flattened, instead, every ndarray will act as a feature(column) in the output.
7+
8+
### New Features
9+
- Model Registry: Added support save/load/deploy PyTorch models (`torch.nn.Module` and `torch.jit.ScriptModule`).
10+
11+
### Bug Fixes
12+
13+
- Model Registry: Fix an issue that when database or schema name provided to `create_model_registry` contains special characters, the model registry cannot be created.
14+
- Model Registry: Fix an issue that `get_model_description` returns with additional quotes.
15+
- Model Registry: Fix incorrect error message when attempting to remove a unset tag of a model.
16+
- Model Registry: Fix a typo in the default deployment table name.
17+
- Model Registry: Snowpark dataframe for sample input or input for `predict` method that contains a column with Snowflake `NUMBER(precision, scale)` data type where `scale = 0` will not lead to error, and will now correctly recognized as `INT64` data type in model signature.
18+
- Model Registry: Fix an issue that prevent model logged in the system whose default encoding is not UTF-8 compatible from deploying.
19+
- Model Registry: Added earlier and better error message when any file name in the model or the file name of model itself contains characters that are unable to be encoded using ASCII. It is currently not supported to deploy such a model.
20+
321
## 1.0.2 (2023-06-22)
422

523
### Behavior Changes

README.md

+1-3
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ Snowpark MLOps complements the Snowpark ML Development API, and provides model m
2222

2323
During PrPr, we are iterating on API without backward compatibility guarantees. It is better to recreate your registry everytime you update the package. This means, at this time, you cannot use the registry for production use.
2424

25-
- [Documentation](http://docs.snowflake.com/developer-guide/snowpark/python/snowpark-ml-modeling)
26-
- [Issues](https://github.com/snowflakedb/snowflake-ml-python/issues)
27-
- [Source](https://github.com/snowflakedb/snowflake-ml-python/)
25+
- [Documentation](https://docs.snowflake.com/developer-guide/snowpark-ml)
2826

2927
## Getting started
3028
### Have your Snowflake account ready

WORKSPACE

+17-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,15 @@
11
workspace(name = "SnowML")
22

3-
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
3+
load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive", "http_jar")
4+
load("@bazel_tools//tools/build_defs/repo:git.bzl", "git_repository")
5+
6+
http_jar(
7+
name = "bazel_diff",
8+
urls = [
9+
"https://github.com/Tinder/bazel-diff/releases/download/4.3.0/bazel-diff_deploy.jar",
10+
],
11+
sha256 = "9c4546623a8b9444c06370165ea79a897fcb9881573b18fa5c9ee5c8ba0867e2",
12+
)
413

514
http_archive(
615
name = "bazel_skylib",
@@ -14,15 +23,13 @@ http_archive(
1423
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
1524

1625
bazel_skylib_workspace()
17-
# Latest @ 2022-10-18. Version 0.13.0 released 2022-09-25
18-
http_archive(
19-
name = "rules_python",
20-
sha256 = "8c8fe44ef0a9afc256d1e75ad5f448bb59b81aba149b8958f02f7b3a98f5d9b4",
21-
strip_prefix = "rules_python-0.13.0",
22-
url = "https://github.com/bazelbuild/rules_python/archive/refs/tags/0.13.0.tar.gz",
23-
# This will be unnecessary once https://github.com/bazelbuild/rules_python/pull/1274
24-
# is released.
25-
patches = ["//third_party:rules_python_description_content_type.patch"],
26+
27+
# Latest @ 2023-06-20
28+
# Replace with released version once newer version released.
29+
git_repository(
30+
name="rules_python",
31+
commit="0d59fcf561f6d2c4705924bc17c151fb4b998841",
32+
remote="https://github.com/bazelbuild/rules_python.git"
2633
)
2734

2835
load("//third_party/rules_conda:defs.bzl", "conda_create", "load_conda", "register_toolchain")

bazel/BUILD.bazel

+6
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
load("@rules_python//python:defs.bzl", native_py_test = "py_test")
2+
package(default_visibility = ["//visibility:public"])
23

34
native_py_test(
45
name = "repo_paths_test",
@@ -7,3 +8,8 @@ native_py_test(
78
python_version = "PY3",
89
srcs_version = "PY3",
910
)
11+
12+
sh_binary(
13+
name = "test_wrapper",
14+
srcs = ["test_wrapper.sh"]
15+
)

bazel/filter_affected_targets.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121

2222

2323
def main(input_file: str, test_target_only: bool) -> None:
24-
with open(input_file) as f:
24+
with open(input_file, encoding="utf-8") as f:
2525
raw_targets = f.read()
2626
pattern = _AFFECTED_TESTS_QUERY_PATTERN if test_target_only else _AFFECTED_TARGETS_QUERY_PATTERN
2727
print(pattern.format(raw_targets=raw_targets))

bazel/get_affected_targets.sh

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
#!/bin/bash
2+
3+
# Usage
4+
# get_affected_targets.sh [-b <bazel_path>] [-f <output_path>] [-r <revision>] [-w <workspace path>]
5+
#
6+
# Flags
7+
# -b: specify path to bazel
8+
# -f: specify output file path
9+
# -r: specify the revision to use, default the current
10+
# -w: specify the workspace_path, default $(cwd)
11+
#
12+
# Notes:
13+
# This script relies on bazel-diff, which is installed in WORKSPACE via L6-12 of WORKSPACE file.
14+
#
15+
# Action
16+
# - Get affected targets list in our repo to the output_path file
17+
18+
set -o pipefail
19+
set -u
20+
PROG=$0
21+
22+
help() {
23+
local exit_code=$1
24+
echo "Usage: ${PROG} [-b <bazel_path>] [-f <output_path>] [-r <revision>] [-w <workspace>]"
25+
exit "${exit_code}"
26+
}
27+
28+
echo "Running ${PROG}"
29+
30+
bazel="bazel"
31+
current_revision=$(git rev-parse HEAD)
32+
pr_revision=${current_revision}
33+
output_path="/tmp/affected_targets/targets"
34+
workspace_path=$(pwd)
35+
36+
37+
38+
while getopts "b:f:r:w:h" opt; do
39+
case "${opt}" in
40+
b)
41+
bazel=${OPTARG}
42+
;;
43+
f)
44+
output_path=${OPTARG}
45+
;;
46+
r)
47+
pr_revision=${OPTARG}
48+
;;
49+
w)
50+
workspace_path=${OPTARG}
51+
;;
52+
h)
53+
help 0
54+
;;
55+
:)
56+
help 1
57+
;;
58+
?)
59+
help 1
60+
;;
61+
esac
62+
done
63+
64+
working_dir=$(mktemp -d "/tmp/tmp_XXXXX")
65+
trap 'rm -rf "${working_dir}"' EXIT
66+
67+
starting_hashes_json="${working_dir}/starting_hashes.json"
68+
final_hashes_json="${working_dir}/final_hashes.json"
69+
impacted_targets_path="${working_dir}/impacted_targets.txt"
70+
bazel_diff="${working_dir}/bazel_diff"
71+
72+
"${bazel}" run :bazel-diff --script_path="${bazel_diff}"
73+
74+
git -C "${workspace_path}" checkout "${pr_revision}" --quiet
75+
76+
echo "Generating Hashes for Revision '${pr_revision}'"
77+
78+
"${bazel_diff}" generate-hashes -w "$workspace_path" -b "${bazel}" "${starting_hashes_json}"
79+
80+
MERGE_BASE_MAIN=$(git merge-base "${pr_revision}" main)
81+
git -C "${workspace_path}" checkout "${MERGE_BASE_MAIN}" --quiet
82+
83+
echo "Generating Hashes for merge base ${MERGE_BASE_MAIN}"
84+
85+
$bazel_diff generate-hashes -w "${workspace_path}" -b "${bazel}" "${final_hashes_json}"
86+
87+
echo "Determining Impacted Targets and output to ${output_path}"
88+
$bazel_diff get-impacted-targets -sh "${starting_hashes_json}" -fh "${final_hashes_json}" -o "${impacted_targets_path}"
89+
90+
filter_query_rules_file="${working_dir}/filter_query_rules"
91+
92+
# -- Begin of Query Rules Heredoc --
93+
cat > "${filter_query_rules_file}" << EndOfMessage
94+
let raw_targets = set($(<"${impacted_targets_path}")) in
95+
\$raw_targets - kind('source file', \$raw_targets) - filter('//external[:/].*', \$raw_targets)
96+
EndOfMessage
97+
# -- End of Query Rules Heredoc --
98+
99+
"${bazel}" query --query_file="${filter_query_rules_file}" >"${output_path}"
100+
101+
git -C "${workspace_path}" checkout "${current_revision}" --quiet

bazel/py_rules.bzl

+10-4
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,7 @@ def snowml_wheel(
144144
deps,
145145
description_file = None,
146146
development_status = "Alpha",
147-
compatible_with_snowpark = True,
148-
homepage = None,
149-
summary = None):
147+
compatible_with_snowpark = True):
150148
"""A SnowML customized wheel definition with lots of default values filled in.
151149
152150
Args:
@@ -156,6 +154,7 @@ def snowml_wheel(
156154
version: Version string
157155
deps: List of dependencies of type py_package
158156
development_status: String with PrPr, PuPr & GA
157+
description_file: Label of readme file.
159158
compatible_with_snowpark: adds a tag to the wheel to indicate that this
160159
wheel is compatible with the snowpark running environment.
161160
"""
@@ -164,6 +163,7 @@ def snowml_wheel(
164163
dev_status = "Development Status :: 3 - Alpha"
165164
elif development_status.lower() == "pupr":
166165
dev_status = "Development Status :: 3 - Beta"
166+
homepage = "https://github.com/snowflakedb/snowflake-ml-python"
167167
py_wheel(
168168
name = name,
169169
author = "Snowflake, Inc",
@@ -194,11 +194,17 @@ def snowml_wheel(
194194
distribution = "snowflake-ml-python",
195195
extra_requires = extra_requires,
196196
homepage = homepage,
197+
project_urls = {
198+
"Changelog": homepage + "/blob/main/CHANGELOG.md",
199+
"Documentation": "https://docs.snowflake.com/developer-guide/snowpark-ml",
200+
"Issues": homepage + "/issues",
201+
"Source": homepage,
202+
},
197203
license = "Apache License, Version 2.0",
198204
python_requires = ">=3.8,<4",
199205
python_tag = "py3",
200206
requires = requires,
201-
summary = summary,
207+
summary = "The machine learning client library that is used for interacting with Snowflake to build machine learning solutions.",
202208
version = version,
203209
deps = deps,
204210
)

bazel/requirements/parse_and_generate_requirements.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,9 @@ def generate_requirements(
267267
tag_filter: Optional[str] = None,
268268
version: Optional[str] = None,
269269
) -> None:
270-
with open(schema_file_path) as f:
270+
with open(schema_file_path, encoding="utf-8") as f:
271271
schema = json.load(f)
272-
with open(req_file_path) as f:
272+
with open(req_file_path, encoding="utf-8") as f:
273273
requirements = yaml.safe_load(f)
274274

275275
jsonschema.validate(requirements, schema=schema)

bazel/test_wrapper.sh

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
#!/usr/bin/env bash
2+
3+
# Bazel test wrapper
4+
5+
# Get the bazel arg, which is a bazel generated python entrance file.
6+
# Example: snowflake/ml/model/_model_test
7+
# Part of its content (auto-generated) where the main_rel_path gets picked.
8+
#
9+
# # The main Python source file.
10+
# # The magic string percent-main-percent is replaced with the runfiles-relative
11+
# # filename of the main file of the Python binary in BazelPythonSemantics.java.
12+
# main_rel_path = 'SnowML/snowflake/ml/model/_model_test.py'
13+
#
14+
15+
ENTRY_FILE="$1"
16+
17+
# Follow how bazel generated entry file works
18+
RUNFILES_DIR=$(dirname $(pwd))
19+
# Get the actual main file by searching in bazel generated file.
20+
MAIN_REL_PATH=$(cat ${ENTRY_FILE} | grep -o " main_rel_path = '[^']*" | sed "s/ main_rel_path = '//g")
21+
22+
# If not a python test then this is empty
23+
if [[ "$MAIN_REL_PATH" ]]; then
24+
# Pattern for the main block
25+
TEST_STR="if[[:space:]]+__name__[[:space:]]+==[[:space:]]+[\'\"]__main__[\'\"]:[[:space:]]*"
26+
27+
# Check if main block exist
28+
if ! grep -q -x -E "$TEST_STR" "${RUNFILES_DIR}/${MAIN_REL_PATH}" ; then
29+
echo "Missing \`if __name__ == \"__main__\":\` block in test entry file ${ENTRY_FILE}, your tests won't be run."
30+
# Follow pytest which uses exit code 5 to label no tests can be found and run.
31+
exit 5
32+
fi
33+
fi
34+
35+
# Execute the actual target
36+
$@

0 commit comments

Comments
 (0)