Skip to content

Commit

Permalink
Project import generated by Copybara. (#73)
Browse files Browse the repository at this point in the history
GitOrigin-RevId: 6af23f3594aae1b0f36177bfe8c706ef07c9350c

Co-authored-by: Snowflake Authors <[email protected]>
  • Loading branch information
sfc-gh-kdama and Snowflake Authors authored Dec 5, 2023
1 parent abe5b67 commit 72c6c24
Show file tree
Hide file tree
Showing 50 changed files with 1,222 additions and 504 deletions.
38 changes: 38 additions & 0 deletions .github/workflows/jira_close.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
name: Jira closure

on:
issues:
types:
- closed
- deleted

jobs:
close-issue:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
repository: snowflakedb/gh-actions
ref: jira_v1
token: ${{ secrets.SNOWFLAKE_GITHUB_TOKEN }} # stored in GitHub secrets
path: .
- name: Jira login
uses: atlassian/gajira-login@master
env:
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
- name: Extract issue from title
id: extract
env:
TITLE: ${{ github.event.issue.title }}
run: |
jira=$(echo -n $TITLE | awk '{print $1}' | sed -e 's/://')
echo ::set-output name=jira::$jira
- name: Close issue
uses: ./jira/gajira-close
if: startsWith(steps.extract.outputs.jira, 'SNOW-')
with:
issue: ${{ steps.extract.outputs.jira }}
33 changes: 33 additions & 0 deletions .github/workflows/jira_comment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
---
name: Jira comment

on:
issue_comment:
types:
- created

jobs:
comment-issue:
if: ${{ !github.event.issue.pull_request }}
runs-on: ubuntu-latest
steps:
- name: Jira login
uses: atlassian/gajira-login@master
env:
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
- name: Extract issue from title
id: extract
env:
TITLE: ${{ github.event.issue.title }}
run: |
jira=$(echo -n $TITLE | awk '{print $1}' | sed -e 's/://')
echo ::set-output name=jira::$jira
- name: Comment on issue
uses: atlassian/gajira-comment@master
if: startsWith(steps.extract.outputs.jira, 'SNOW-')
with:
issue: ${{ steps.extract.outputs.jira }}
comment: "${{ github.event.comment.user.login }} commented:\n\n${{ github.event.comment.body }}\n\n${{ github.event.comment.html_url\
\ }}"
54 changes: 54 additions & 0 deletions .github/workflows/jira_issue.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
---
name: Jira creation

on:
issues:
types:
- opened
issue_comment:
types:
- created

jobs:
create-issue:
runs-on: ubuntu-latest
permissions:
issues: write
if: (github.event_name == 'issues' && github.event.pull_request.user.login != 'whitesource-for-github-com[bot]')
steps:
- name: Checkout
uses: actions/checkout@v2
with:
repository: snowflakedb/gh-actions
ref: jira_v1
token: ${{ secrets.SNOWFLAKE_GITHUB_TOKEN }} # stored in GitHub secrets
path: .

- name: Login
uses: atlassian/[email protected]
env:
JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}

- name: Create JIRA Ticket
id: create
uses: atlassian/[email protected]
with:
project: SNOW
issuetype: Bug
summary: ${{ github.event.issue.title }}
description: |
${{ github.event.issue.body }} \\ \\ _Created from GitHub Action_ for ${{ github.event.issue.html_url }}
# Assign triage-ml-platform-dl and set "Data Platform: ML Engineering" component.
fields: '{"customfield_11401":{"id":"14538"}, "assignee":{"id":"639020ab3c26ca7fa0d6eb3f"},"components":[{"id":"16520"}]}'

- name: Update GitHub Issue
uses: ./jira/gajira-issue-update
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
issue_number: '{{ event.issue.id }}'
owner: '{{ event.repository.owner.login }}'
name: '{{ event.repository.name }}'
jira: ${{ steps.create.outputs.issue }}
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
# Release History

## 1.1.1

### Bug Fixes

- Model Registry: The `predict` target method on registered models is now compatible with unsupervised estimators.
- Model Development: Fix confusion_matrix incorrect results when the row number cannot be divided by the batch size.

### Behavior Changes

### New Features

- Introduced passthrough_col param in Modeling API. This new param is helpful in scenarios
requiring automatic input_cols inference, but need to avoid using specific
columns, like index columns, during training or inference.

## 1.1.0

### Bug Fixes
Expand Down
13 changes: 10 additions & 3 deletions codegen/sklearn_wrapper_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
input_cols: Optional[Union[str, List[str]]]
A string or list of strings representing column names that contain features.
If this parameter is not specified, all columns in the input DataFrame except
the columns specified by label_cols and sample_weight_col parameters are
considered input columns.
the columns specified by label_cols, sample_weight_col, and passthrough_cols
parameters are considered input columns.
label_cols: Optional[Union[str, List[str]]]
A string or list of strings representing column names that contain labels.
Expand All @@ -44,6 +44,13 @@
A string representing the column name containing the sample weights.
This argument is only required when working with weighted datasets.
passthrough_cols: Optional[Union[str, List[str]]]
A string or a list of strings indicating column names to be excluded from any
operations (such as train, transform, or inference). These specified column(s)
will remain untouched throughout the process. This option is helpful in scenarios
requiring automatic input_cols inference, but need to avoid using specific
columns, like index columns, during training or inference.
drop_input_cols: Optional[bool], default=False
If set, the response of predict(), transform() methods will not contain input columns.
"""
Expand Down Expand Up @@ -743,7 +750,7 @@ def _populate_function_names_and_signatures(self) -> None:
signature_lines.append(v.name)
sklearn_init_args_dict_list.append(f"'{v.name}':({v.name}, None, True)")

for arg in ["input_cols", "output_cols", "label_cols"]:
for arg in ["input_cols", "output_cols", "label_cols", "passthrough_cols"]:
signature_lines.append(f"{arg}: Optional[Union[str, Iterable[str]]] = None")
init_member_args.append(f"self.set_{arg}({arg})")

Expand Down
28 changes: 10 additions & 18 deletions codegen/sklearn_wrapper_template.py_template
Original file line number Diff line number Diff line change
Expand Up @@ -83,24 +83,6 @@ class {transform.original_class_name}(BaseTransformer):
"""
return str(uuid4()).replace("-", "_").upper()

def _infer_input_output_cols(self, dataset: Union[DataFrame, pd.DataFrame]) -> None:
"""
Infer `self.input_cols` and `self.output_cols` if they are not explicitly set.

Args:
dataset: Input dataset.
"""
if not self.input_cols:
cols = [
c for c in dataset.columns
if c not in self.get_label_cols() and c != self.sample_weight_col
]
self.set_input_cols(input_cols=cols)

if not self.output_cols:
cols = [identifier.concat_names(ids=['OUTPUT_', c]) for c in self.label_cols]
self.set_output_cols(output_cols=cols)

def set_input_cols(self, input_cols: Optional[Union[str, Iterable[str]]]) -> "{transform.original_class_name}":
"""
Input columns setter.
Expand Down Expand Up @@ -737,12 +719,22 @@ class {transform.original_class_name}(BaseTransformer):
self._model_signature_dict["predict"] = ModelSignature(inputs,
([] if self._drop_input_cols else inputs)
+ outputs)
# For mixture models that use the density mixin, `predict` returns the argmax of the log prob.
# For outlier models, returns -1 for outliers and 1 for inliers.
# Clusterer returns int64 cluster labels.
elif self._sklearn_object._estimator_type in ["DensityEstimator", "clusterer", "outlier_detector"]:
outputs = [FeatureSpec(dtype=DataType.INT64, name=c) for c in self.output_cols]
self._model_signature_dict["predict"] = ModelSignature(inputs,
([] if self._drop_input_cols else inputs)
+ outputs)

# For regressor, the type of predict is float64
elif self._sklearn_object._estimator_type == 'regressor':
outputs = [FeatureSpec(dtype=DataType.DOUBLE, name=c) for c in self.output_cols]
self._model_signature_dict["predict"] = ModelSignature(inputs,
([] if self._drop_input_cols else inputs)
+ outputs)

for prob_func in PROB_FUNCTIONS:
if hasattr(self, prob_func):
output_cols_prefix: str = f"{{prob_func}}_"
Expand Down
2 changes: 2 additions & 0 deletions codegen/transformer_autogen_test_template.py_template
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ class {transform.test_class_name}(TestCase):
inference_methods = ["transform", "predict"]
for m in inference_methods:
if callable(getattr(sklearn_reg, m, None)):
if m == 'predict':
self.assertTrue(m in reg.model_signatures)

if inference_with_udf:
output_df = getattr(reg, m)(input_df)
Expand Down
Loading

0 comments on commit 72c6c24

Please sign in to comment.