snowflakedb
diff --git a/‎.pre-commit-config.yaml
+1-1 b/‎.pre-commit-config.yaml
+1-1
diff --git a/‎ci/conda_recipe/meta.yaml
+4-3 b/‎ci/conda_recipe/meta.yaml
+4-3
diff --git a/‎codegen/codegen_rules.bzl
+1-1 b/‎codegen/codegen_rules.bzl
+1-1
diff --git a/‎codegen/sklearn_wrapper_template.py_template
+39-15 b/‎codegen/sklearn_wrapper_template.py_template
+39-15
diff --git a/‎conda-env-snowflake.yml
+2-2 b/‎conda-env-snowflake.yml
+2-2
diff --git a/‎conda-env.yml
+2-2 b/‎conda-env.yml
+2-2
diff --git a/‎snowflake/ml/BUILD.bazel
+10-3 b/‎snowflake/ml/BUILD.bazel
+10-3
diff --git a/‎snowflake/ml/_internal/BUILD.bazel
+17 b/‎snowflake/ml/_internal/BUILD.bazel
+17
diff --git a/‎snowflake/ml/_internal/env.py
+1-1 b/‎snowflake/ml/_internal/env.py
+1-1
diff --git a/‎snowflake/ml/_internal/env_utils.py
+2-2 b/‎snowflake/ml/_internal/env_utils.py
+2-2
@@ -31,7 +31,7 @@ repos:
         # The first two lines of meta.yaml does not work with check-yaml
         exclude: >
           (?x)^(
-              recipe/meta.yaml|
+              ci/conda_recipe/meta.yaml|
               .github/repo_meta.yaml|
           )$
       - id: debug-statements
 
@@ -2,10 +2,11 @@
 # (dependencies, version number) from a common place. We also need to define that
 # common place, as currently it's a BUILD rule.
 # See https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#templating-with-jinja
+{% set version_match = load_file_regex(load_file='../../snowflake/ml/version.bzl', regex_pattern='VERSION = "(\d\.\d\.\d*)"\s.*') %}
 
 package:
   name: snowflake-ml-python
-  version: 0.3.0 # this has to be in sync with snowflake/ml/BUILD.bazel and snowflake/ml/version.py
+  version: {{ version_match.group(1) }}
 
 source:
   path: ../../
@@ -26,7 +27,7 @@ requirements:
     - pyyaml>=6.0,<7
     - scipy>=1.9,<2
     - snowflake-connector-python
-    - snowflake-snowpark-python>=1.3.0,<=2
+    - snowflake-snowpark-python>=1.4.0,<=2
     - sqlparse>=0.4,<1
 
     # TODO(snandamuri): Versions of these packages must be exactly same between user's workspace and
@@ -35,7 +36,7 @@ requirements:
     # versions that are available in the snowflake conda channel. Since there is no way to specify allow list of
     # versions in the requirements file, we are pinning the versions here.
     - joblib>=1.0.0,<=1.1.1
-    - scikit-learn==1.2.1
+    - scikit-learn>=1.2.1,<2
     - xgboost==1.7.3
 about:
   home: https://github.com/snowflakedb/snowflake-ml-python
 
@@ -83,7 +83,7 @@ def autogen_estimators(module, estimator_info_list):
             deps = [
                 ":init",
                 "//snowflake/ml/framework:framework",
-                "//snowflake/ml/utils:telemetry",
+                "//snowflake/ml/_internal:telemetry",
                 "//snowflake/ml/_internal/utils:temp_file_utils",
                 "//snowflake/ml/_internal/utils:query_result_checker",
                 "//snowflake/ml/_internal/utils:pkg_version_utils",
 
@@ -12,8 +12,8 @@ import numpy as np
 {transform.estimator_imports}
 from sklearn.utils.metaestimators import available_if
 
-from snowflake.ml.framework.base import BaseEstimator, BaseTransformer
-from snowflake.ml.utils import telemetry
+from snowflake.ml.framework.base import BaseTransformer
+from snowflake.ml._internal import telemetry
 from snowflake.ml._internal.utils.query_result_checker import SqlResultValidator
 from snowflake.ml._internal.utils import pkg_version_utils, identifier
 from snowflake.ml._internal.utils.temp_file_utils import cleanup_temp_files, get_temp_file_path
@@ -98,7 +98,7 @@ def _validate_sklearn_args(args: Dict[str, Any], klass: type) -> Dict[str, Any]:
     return result
 
 
-class {transform.original_class_name}(BaseEstimator, BaseTransformer):
+class {transform.original_class_name}(BaseTransformer):
     r"""{transform.estimator_class_docstring}
     """
 
@@ -203,9 +203,6 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         local_result_file_name = get_temp_file_path()
         stage_result_file_name = os.path.join(transform_stage_name, os.path.basename(local_transform_file_name))
 
-        # Put locally serialized transform on stage.
-        session.file.put(local_transform_file_name, stage_transform_file_name, auto_compress=False, overwrite=True)
-
         fit_sproc_name = "SNOWML_FIT_{{safe_id}}".format(safe_id=self.id)
         statement_params = telemetry.get_function_usage_statement_params(
             project=_PROJECT,
@@ -216,6 +213,8 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
             api_calls=[sproc],
             custom_tags=dict([("autogen", True)]),
         )
+        # Put locally serialized transform on stage.
+        session.file.put(local_transform_file_name, stage_transform_file_name, auto_compress=False, overwrite=True, statement_params=statement_params)
 
         @sproc(
             is_permanent=False,
@@ -244,13 +243,13 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
 
             # Execute snowpark query and obtain the results as pandas dataframe
             # NB: this implies that the result data must fit into memory.
-            df = session.sql(sql_query).to_pandas()
+            df = session.sql(sql_query).to_pandas(statement_params=statement_params)
 
             local_transform_file = tempfile.NamedTemporaryFile(delete=True)
             local_transform_file_name = local_transform_file.name
             local_transform_file.close()
 
-            session.file.get(stage_transform_file_name, local_transform_file_name)
+            session.file.get(stage_transform_file_name, local_transform_file_name, statement_params=statement_params)
 
             estimator = joblib.load(os.path.join(local_transform_file_name, os.listdir(local_transform_file_name)[0]))
 
@@ -270,7 +269,7 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
             local_result_file.close()
 
             joblib_dump_files = joblib.dump(estimator, local_result_file_name)
-            session.file.put(local_result_file_name, stage_result_file_name, auto_compress = False, overwrite = True)
+            session.file.put(local_result_file_name, stage_result_file_name, auto_compress = False, overwrite = True, statement_params=statement_params)
 
             # Note: you can add something like  + "|" + str(df) to the return string
             # to pass debug information to the caller.
@@ -303,7 +302,7 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
             if len(fields) > 1:
                 print("\n".join(fields[1:]))
 
-        session.file.get(os.path.join(stage_result_file_name, sproc_export_file_name), local_result_file_name)
+        session.file.get(os.path.join(stage_result_file_name, sproc_export_file_name), local_result_file_name, statement_params=statement_params)
         self._sklearn_object = joblib.load(os.path.join(local_result_file_name, sproc_export_file_name))
 
         cleanup_temp_files([local_transform_file_name, local_result_file_name])
@@ -543,6 +542,11 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         subproject=_SUBPROJECT,
         custom_tags=dict([("autogen", True)]),
     )
+    @telemetry.add_stmt_params_to_df(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        custom_tags=dict([("autogen", True)]),
+    )
     def predict(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
         """Predict lable values for each example in the input dataset.
 
@@ -578,6 +582,11 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         subproject=_SUBPROJECT,
         custom_tags=dict([("autogen", True)]),
     )
+    @telemetry.add_stmt_params_to_df(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        custom_tags=dict([("autogen", True)]),
+    )
     def transform(self, dataset: Union[DataFrame, pd.DataFrame]) -> Union[DataFrame, pd.DataFrame]:
         """Transform the dataset.
 
@@ -647,6 +656,11 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         subproject=_SUBPROJECT,
         custom_tags=dict([("autogen", True)]),
     )
+    @telemetry.add_stmt_params_to_df(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        custom_tags=dict([("autogen", True)]),
+    )
     def predict_proba(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_proba_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -688,6 +702,11 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         subproject=_SUBPROJECT,
         custom_tags=dict([("autogen", True)]),
     )
+    @telemetry.add_stmt_params_to_df(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        custom_tags=dict([("autogen", True)]),
+    )
     def predict_log_proba(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "predict_log_proba_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -729,6 +748,11 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         subproject=_SUBPROJECT,
         custom_tags=dict([("autogen", True)]),
     )
+    @telemetry.add_stmt_params_to_df(
+        project=_PROJECT,
+        subproject=_SUBPROJECT,
+        custom_tags=dict([("autogen", True)]),
+    )
     def decision_function(
         self, dataset: Union[DataFrame, pd.DataFrame], output_cols_prefix: str = "decision_function_"
     ) -> Union[DataFrame, pd.DataFrame]:
@@ -837,9 +861,6 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
         ).validate()
 
         stage_score_file_name = os.path.join(score_stage_name, os.path.basename(local_score_file_name))
-        # Put locally serialized score on stage.
-        session.file.put(local_score_file_name, stage_score_file_name, auto_compress=False, overwrite=True)
-
         score_sproc_name = "SNOWML_SCORE_{{safe_id}}".format(safe_id=self.id)
         statement_params = telemetry.get_function_usage_statement_params(
             project=_PROJECT,
@@ -850,6 +871,9 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
             api_calls=[sproc],
             custom_tags=dict([("autogen", True)]),
         )
+        # Put locally serialized score on stage.
+        session.file.put(local_score_file_name, stage_score_file_name, auto_compress=False, overwrite=True, statement_params=statement_params)
+
         @sproc(
             is_permanent=False,
             name=score_sproc_name,
@@ -874,13 +898,13 @@ class {transform.original_class_name}(BaseEstimator, BaseTransformer):
             import inspect
             {transform.fit_sproc_imports}
 
-            df = session.sql(sql_query).to_pandas()
+            df = session.sql(sql_query).to_pandas(statement_params=statement_params)
 
             local_score_file = tempfile.NamedTemporaryFile(delete=True)
             local_score_file_name = local_score_file.name
             local_score_file.close()
 
-            session.file.get(stage_score_file_name, local_score_file_name)
+            session.file.get(stage_score_file_name, local_score_file_name, statement_params=statement_params)
             estimator = joblib.load(os.path.join(local_score_file_name, os.listdir(local_score_file_name)[0]))
             argspec = inspect.getfullargspec(estimator.score)
             if "X" in argspec.args:
 
@@ -32,8 +32,8 @@ dependencies:
   - ruamel.yaml==0.17.21
   - s3fs==2022.10.0
   - scipy==1.9.3
-  - scikit-learn==1.2.1
-  - snowflake-snowpark-python==1.3.0
+  - scikit-learn==1.2.2
+  - snowflake-snowpark-python==1.4.0
   - sqlparse==0.4.3
   - typing-extensions==4.3.0
   - xgboost==1.7.3
 
@@ -28,9 +28,9 @@ dependencies:
   - pytorch==1.12.1
   - ruamel.yaml==0.17.21
   - s3fs==2022.10.0
-  - scikit-learn==1.2.1
+  - scikit-learn==1.2.2
   - scipy==1.9.3
-  - snowflake-snowpark-python==1.3.0
+  - snowflake-snowpark-python==1.4.0
   - sqlparse==0.4.3
   - tensorflow==2.9.1
   - torchdata==0.4.1
 
@@ -1,4 +1,5 @@
 load("//bazel:py_rules.bzl", "py_library", "snowml_wheel")
+load(":version.bzl", "VERSION")
 
 package(default_visibility = ["//visibility:public"])
 
@@ -10,6 +11,12 @@ _LIGHTGBM_REQUIRES = ["lightgbm==3.3.5"]
 
 _ALL_REQUIRES = _TENSORFLOW_REQUIRES + _PYTORCH_REQUIRES + _LIGHTGBM_REQUIRES
 
+genrule(
+    name = "generate_version",
+    outs = ["version.py"],
+    cmd = "echo 'VERSION=\"" + VERSION + "\"'> $@",
+)
+
 py_library(
     name = "version",
     srcs = ["version.py"],
@@ -36,19 +43,19 @@ snowml_wheel(
         "pyyaml>=6.0,<7",
         "scipy>=1.9,<2",
         "snowflake-connector-python[pandas]",
-        "snowflake-snowpark-python>=1.3.0,<2",
+        "snowflake-snowpark-python>=1.4.0,<2",
         "sqlparse>=0.4,<1",
 
         # TODO(snandamuri): Versions of these packages must be exactly same between user's workspace and
         # snowpark sandbox. Generic definitions like scikit-learn>=1.1.0,<2 wont work because snowflake conda channel
         # only has a few allowlisted versions of scikit-learn available, so we must force users to use scikit-learn
         # versions that are available in the snowflake conda channel. Since there is no way to specify allow list of
         # versions in the requirements file, we are pinning the versions here.
-        "scikit-learn==1.2.1",
+        "scikit-learn>=1.2.1,<2",
         "xgboost==1.7.3",
         "joblib>=1.0.0,<=1.1.1",  # All the release versions between 1.0.0 and 1.1.1 are available in SF Conda channel.
     ],
-    version = "0.3.0",  # this has to be in sync with version.py and ci/conda_recipe/meta.yaml
+    version = VERSION,
     deps = [
         "//snowflake/ml/metrics:metrics_pkg",
         "//snowflake/ml/preprocessing:preprocessing_pkg",
 
@@ -50,3 +50,20 @@ py_test(
         "//snowflake/ml/test_utils:mock_session",
     ],
 )
+
+py_library(
+    name = "telemetry",
+    srcs = ["telemetry.py"],
+    deps = [
+        "//snowflake/ml/_internal:env",
+    ],
+)
+
+py_test(
+    name = "telemetry_test",
+    srcs = ["telemetry_test.py"],
+    deps = [
+        ":telemetry",
+        "//snowflake/ml/_internal:env",
+    ],
+)
@@ -3,6 +3,6 @@
 from snowflake.ml import version
 
 SOURCE = "SnowML"
-VERSION = version.get_version()
+VERSION = version.VERSION
 PYTHON_VERSION = platform.python_version()
 OS = platform.system()
@@ -275,7 +275,7 @@ def validate_requirements_in_snowflake_conda_channel(
         )
         sql = textwrap.dedent(
             f"""
-            SELECT *
+            SELECT PACKAGE_NAME, VERSION
             FROM information_schema.packages
             WHERE ({pkg_names_str})
             AND language = 'python';
@@ -289,7 +289,7 @@ def validate_requirements_in_snowflake_conda_channel(
                     query=sql,
                 )
                 .has_column("VERSION")
-                .has_dimensions(expected_rows=None, expected_cols=3)
+                .has_dimensions(expected_rows=None, expected_cols=2)
                 .validate()
             )
             for row in result:
Original file line number	Diff line number	Diff line change
`@@ -275,7 +275,7 @@ def validate_requirements_in_snowflake_conda_channel(`
`275`	`275`	`)`
`276`	`276`	`sql = textwrap.dedent(`
`277`	`277`	`f"""`
`278`		`- SELECT *`
	`278`	`+ SELECT PACKAGE_NAME, VERSION`
`279`	`279`	`FROM information_schema.packages`
`280`	`280`	`WHERE ({pkg_names_str})`
`281`	`281`	`AND language = 'python';`
`@@ -289,7 +289,7 @@ def validate_requirements_in_snowflake_conda_channel(`
`289`	`289`	`query=sql,`
`290`	`290`	`)`
`291`	`291`	`.has_column("VERSION")`
`292`		`- .has_dimensions(expected_rows=None, expected_cols=3)`
	`292`	`+ .has_dimensions(expected_rows=None, expected_cols=2)`
`293`	`293`	`.validate()`
`294`	`294`	`)`
`295`	`295`	`for row in result:`