Skip to content

Commit

Permalink
Add pylint and docstring checks for promptflow-evals PRs (#3547)
Browse files Browse the repository at this point in the history
# Description

This PR

- Fixes all existing pylint errors in promptflow-evals
- Adds docstrings to all public promptflow-evals methods and classes
- Adds a pylint gate to run on all PRs that make changes to the
promptflow-evals module

# All Promptflow Contribution checklist:
- [x] **The pull request does not introduce [breaking changes].**
- [x] **CHANGELOG is updated for new features, bug fixes or other
significant changes.**
- [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).**
- [x] **I confirm that all new dependencies are compatible with the MIT
license.**
- [] **Create an issue and link to the pull request to get dedicated
review from promptflow team. Learn more: [suggested
workflow](../CONTRIBUTING.md#suggested-workflow).**

## General Guidelines and Best Practices
- [x] Title of the pull request is clear and informative.
- [x] There are a small number of commits, each of which have an
informative message. This means that previously merged commits do not
appear in the history of the PR. For more information on cleaning up the
commits in your PR, [see this
page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md).

### Testing Guidelines
- [] Pull request includes test coverage for the included changes.
  • Loading branch information
diondrapeck authored Jul 24, 2024
1 parent 2b53d9b commit fda05ab
Show file tree
Hide file tree
Showing 66 changed files with 1,265 additions and 578 deletions.
4 changes: 3 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,9 @@
"openaimodelconfiguration",
"usecwd",
"locustio",
"euap"
"euap",
"rcfile",
"pylintrc"
],
"flagWords": [
"Prompt Flow"
Expand Down
32 changes: 32 additions & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
name: Pylint

on:
pull_request:
paths:
- src/promptflow-evals/**

jobs:
run_pylint:
runs-on: ubuntu-latest

steps:
- name: checkout code
uses: actions/checkout@v2

- name: setup python
uses: actions/setup-python@v2
with:
python-version: 3.9

- uses: snok/install-poetry@v1
- name: install pylint and azure-pylint-guidelines-checker
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
set -xe
poetry install -C src/promptflow-evals --with dev
poetry show -C src/promptflow-evals
- name: run pylint
working-directory: ${{ env.WORKING_DIRECTORY }}
run: |
cd src/promptflow-evals
poetry run pylint promptflow/evals --rcfile=../../pylintrc
53 changes: 53 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,56 @@ repos:
# Use black profile for isort to avoid conflicts
# see https://github.com/PyCQA/isort/issues/1518
args: ["--profile", "black", --line-length=120]
- repo: local
hooks:
- id: pylint
name: pylint
entry: python
language: system
args: [ -m, pylint, --rcfile=pylintrc, --output-format=parseable ]
types: [python]
- repo: local
hooks:
- id: pylint-dependencies-check
name: pylint-dependencies-check
entry: python
language: system
types: [python]
args:
- "-c"
- |
import os
import sys
import pkg_resources
# These are the versions that run in our CI
dependencies = [
(
"azure-pylint-guidelines-checker",
"0.3.1",
[
"--index-url",
"https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/",
],
),
("pylint", "3.0.3", []),
]
# Make sure that correct versions are installed
for packagename, required_version, install_args in dependencies:
try:
version = pkg_resources.get_distribution(packagename).version
assert version == required_version
except AssertionError:
print(
f"Version mismatch: Installed version '{version}' of '{packagename}' does not match required version {required_version}"
)
except pkg_resources.DistributionNotFound:
print(f"Package '{packagename}' is not installed")
else:
continue
print(f"Please run the following command to install the correct version of {packagename}")
print(f"\tpython -m pip install {packagename}=={required_version} {' '.join(install_args)}")
sys.exit(1)
57 changes: 57 additions & 0 deletions pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
[MASTER]
py-version=3.9
ignore-patterns=test_*,conftest,setup,.*_utils\.py
ignore-paths=src\promptflow-evals\tests,src\promptflow-evals\samples,promptflow\evals\evaluate\_telemetry,promptflow\evals\evaluate\_batch_run_client\code_client.py,promptflow\evals\evaluate\_batch_run_client\proxy_client.py
reports=no
load-plugins=pylint_guidelines_checker

[MESSAGES CONTROL]
# For all codes, run 'pylint --list-msgs' or go to 'https://pylint.pycqa.org/en/latest/technical_reference/features.html'
# locally-disabled: Warning locally suppressed using disable-msg
# cyclic-import: because of https://github.com/PyCQA/pylint/issues/850
# too-many-arguments: Due to the nature of the CLI many commands have large arguments set which reflect in large arguments set in corresponding methods.
# Let's black deal with bad-continuation

# Added disables from super-with-arguments
disable=useless-object-inheritance,missing-timeout,missing-client-constructor-parameter-kwargs,logging-fstring-interpolation,locally-disabled,fixme,cyclic-import,unnecessary-lambda-assignment,client-method-missing-type-annotations,too-many-arguments,invalid-name,duplicate-code,too-few-public-methods,consider-using-f-string,super-with-arguments,redefined-builtin,import-outside-toplevel,client-suffix-needed,unnecessary-dunder-call,unnecessary-ellipsis,client-paging-methods-use-list,docstring-keyword-should-match-keyword-only,docstring-type-do-not-use-class,client-accepts-api-version-keyword,networking-import-outside-azure-core-transport,protected-access,missing-module-docstring,missing-client-constructor-parameter-credential

[FORMAT]
max-line-length=120

[VARIABLES]
# Tells whether we should check for unused import in __init__ files.
init-import=yes

[DESIGN]
# Maximum number of locals for function / method body
max-locals=25
# Maximum number of branch for function / method body
max-branches=20
# Maximum number of instance attributes for class
max-attributes=10
# Maximum number of ancestors
max-parents=15

[SIMILARITIES]
min-similarity-lines=10

[BASIC]
# Naming hints based on PEP 8 (https://www.python.org/dev/peps/pep-0008/#naming-conventions).
# Consider these guidelines and not hard rules. Read PEP 8 for more details.

# The invalid-name checker must be **enabled** for these hints to be used.
include-naming-hint=yes

module-naming-style=snake_case
const-naming-style=UPPER_CASE
class-naming-style=PascalCase
class-attribute-naming-style=snake_case
attr-naming-style=snake_case
method-naming-style=snake_case
function-naming-style=snake_case
argument-naming-style=snake_case
variable-naming-style=snake_case
inlinevar-naming-style=snake_case

[TYPECHECK]
generated-members=js.*
13 changes: 10 additions & 3 deletions src/promptflow-evals/promptflow/evals/_constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------


class EvaluationMetrics:
"""Metrics for model evaluation."""
GPT_GROUNDEDNESS = "gpt_groundedness"
GPT_RELEVANCE = "gpt_relevance"
GPT_COHERENCE = "gpt_coherence"
Expand All @@ -14,9 +20,10 @@ class EvaluationMetrics:


class Prefixes:
_INPUTS = "inputs."
_OUTPUTS = "outputs."
_TGT_OUTPUTS = "__outputs."
"""Column prefixes for inputs and outputs."""
INPUTS = "inputs."
OUTPUTS = "outputs."
TSG_OUTPUTS = "__outputs."


DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"
Expand Down
4 changes: 2 additions & 2 deletions src/promptflow-evals/promptflow/evals/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

try:
__version__ = importlib.metadata.version("promptflow-evals")
except BaseException:
__version__ = '0.0.1.dev0'
except BaseException: # pylint: disable=broad-exception-caught
__version__ = "0.0.1.dev0"

VERSION: str = __version__
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,15 @@


class BatchRunContext:
def __init__(self, client):
"""Context manager for batch run clients.
:param client: The client to run in the context.
:type client: Union[
~promptflow.evals.evaluate.code_client.CodeClient,
~promptflow.evals.evaluate.proxy_client.ProxyClient
]
"""
def __init__(self, client) -> None:
self.client = client
self._is_timeout_set_by_system = False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def get_aggregated_metrics(self):
if self.aggregated_metrics is not None
else None
)
except Exception as ex:
except Exception as ex: # pylint: disable=broad-exception-caught
LOGGER.debug(f"Error calculating metrics for evaluator {self.evaluator_name}, failed with error {str(ex)}")
aggregated_metrics = None

Expand Down Expand Up @@ -107,7 +107,7 @@ def _calculate_aggregations(self, evaluator, run):
aggr_func = getattr(evaluator, "__aggregate__")
aggregated_output = aggr_func(aggregate_input)
return aggregated_output
except Exception as ex:
except Exception as ex: # pylint: disable=broad-exception-caught
LOGGER.warning(
f"Error calculating aggregations for evaluator {run.evaluator_name}," f" failed with error {str(ex)}"
)
Expand All @@ -118,11 +118,15 @@ def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs):
if not isinstance(input_df, pd.DataFrame):
try:
json_data = load_jsonl(data)
except json.JSONDecodeError:
raise ValueError(f"Failed to parse data as JSON: {data}. Please provide a valid json lines data.")
except json.JSONDecodeError as exc:
raise ValueError(
f"Failed to parse data as JSON: {data}. Please provide a valid json lines data."
) from exc

input_df = pd.DataFrame(json_data)
eval_future = self._thread_pool.submit(self._calculate_metric, flow, input_df, column_mapping, evaluator_name)
eval_future = self._thread_pool.submit(
self._calculate_metric, flow, input_df, column_mapping, evaluator_name
) # pylint: disable=specify-parameter-names-in-call
run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
run.aggregated_metrics = aggregation_future
Expand All @@ -137,7 +141,7 @@ def get_metrics(self, run):
aggregated_metrics = run.get_aggregated_metrics()
print("Aggregated metrics")
print(aggregated_metrics)
except Exception as ex:
except Exception as ex: # pylint: disable=broad-exception-caught
LOGGER.debug(f"Error calculating metrics for evaluator {run.evaluator_name}, failed with error {str(ex)}")
return None
return aggregated_metrics
Loading

0 comments on commit fda05ab

Please sign in to comment.