Add pylint and docstring checks for promptflow-evals PRs (#3547)

# Description This PR - Fixes all existing pylint errors in promptflow-evals - Adds docstrings to all public promptflow-evals methods and classes - Adds a pylint gate to run on all PRs that make changes to the promptflow-evals module # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **I confirm that all new dependencies are compatible with the MIT license.** - [] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [] Pull request includes test coverage for the included changes.
microsoft · Jul 24, 2024 · fda05ab · fda05ab
1 parent 2b53d9b
commit fda05ab
Show file tree

Hide file tree

Showing 66 changed files with 1,265 additions and 578 deletions.
diff --git a/.cspell.json b/.cspell.json
@@ -243,7 +243,9 @@
     "openaimodelconfiguration",
     "usecwd",
     "locustio",
-    "euap"
+    "euap",
+    "rcfile",
+    "pylintrc"
   ],
   "flagWords": [
     "Prompt Flow"

diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -0,0 +1,32 @@
+name: Pylint
+
+on:
+  pull_request:
+    paths:
+      - src/promptflow-evals/**
+
+jobs:
+  run_pylint:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: checkout code
+        uses: actions/checkout@v2
+
+      - name: setup python
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.9
+
+      - uses: snok/install-poetry@v1
+      - name: install pylint and azure-pylint-guidelines-checker
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          set -xe
+          poetry install -C src/promptflow-evals --with dev
+          poetry show -C src/promptflow-evals
+      - name: run pylint
+        working-directory: ${{ env.WORKING_DIRECTORY }}
+        run: |
+          cd src/promptflow-evals
+          poetry run pylint promptflow/evals --rcfile=../../pylintrc
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -44,3 +44,56 @@ repos:
           # Use black profile for isort to avoid conflicts
           # see https://github.com/PyCQA/isort/issues/1518
           args: ["--profile", "black", --line-length=120]
+    - repo: local
+      hooks:
+        - id: pylint
+          name: pylint
+          entry: python
+          language: system
+          args: [ -m, pylint, --rcfile=pylintrc, --output-format=parseable ]
+          types: [python]
+    - repo: local
+      hooks:
+      - id: pylint-dependencies-check
+        name: pylint-dependencies-check
+        entry: python
+        language: system
+        types: [python]
+        args:
+        - "-c"
+        - |
+            import os
+            import sys
+
+            import pkg_resources
+
+            # These are the versions that run in our CI
+            dependencies = [
+                (
+                    "azure-pylint-guidelines-checker",
+                    "0.3.1",
+                    [
+                        "--index-url",
+                        "https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/",
+                    ],
+                ),
+                ("pylint", "3.0.3", []),
+            ]
+
+            # Make sure that correct versions are installed
+            for packagename, required_version, install_args in dependencies:
+                try:
+                    version = pkg_resources.get_distribution(packagename).version
+                    assert version == required_version
+                except AssertionError:
+                    print(
+                        f"Version mismatch: Installed version '{version}' of '{packagename}' does not match required version {required_version}"
+                    )
+                except pkg_resources.DistributionNotFound:
+                    print(f"Package '{packagename}' is not installed")
+                else:
+                    continue
+
+                print(f"Please run the following command to install the correct version of {packagename}")
+                print(f"\tpython -m pip install {packagename}=={required_version} {' '.join(install_args)}")
+                sys.exit(1)
diff --git a/pylintrc b/pylintrc
@@ -0,0 +1,57 @@
+[MASTER]
+py-version=3.9
+ignore-patterns=test_*,conftest,setup,.*_utils\.py
+ignore-paths=src\promptflow-evals\tests,src\promptflow-evals\samples,promptflow\evals\evaluate\_telemetry,promptflow\evals\evaluate\_batch_run_client\code_client.py,promptflow\evals\evaluate\_batch_run_client\proxy_client.py
+reports=no
+load-plugins=pylint_guidelines_checker
+
+[MESSAGES CONTROL]
+# For all codes, run 'pylint --list-msgs' or go to 'https://pylint.pycqa.org/en/latest/technical_reference/features.html'
+# locally-disabled: Warning locally suppressed using disable-msg
+# cyclic-import: because of https://github.com/PyCQA/pylint/issues/850
+# too-many-arguments: Due to the nature of the CLI many commands have large arguments set which reflect in large arguments set in corresponding methods.
+# Let's black deal with bad-continuation
+
+# Added disables from super-with-arguments
+disable=useless-object-inheritance,missing-timeout,missing-client-constructor-parameter-kwargs,logging-fstring-interpolation,locally-disabled,fixme,cyclic-import,unnecessary-lambda-assignment,client-method-missing-type-annotations,too-many-arguments,invalid-name,duplicate-code,too-few-public-methods,consider-using-f-string,super-with-arguments,redefined-builtin,import-outside-toplevel,client-suffix-needed,unnecessary-dunder-call,unnecessary-ellipsis,client-paging-methods-use-list,docstring-keyword-should-match-keyword-only,docstring-type-do-not-use-class,client-accepts-api-version-keyword,networking-import-outside-azure-core-transport,protected-access,missing-module-docstring,missing-client-constructor-parameter-credential
+
+[FORMAT]
+max-line-length=120
+
+[VARIABLES]
+# Tells whether we should check for unused import in __init__ files.
+init-import=yes
+
+[DESIGN]
+# Maximum number of locals for function / method body
+max-locals=25
+# Maximum number of branch for function / method body
+max-branches=20
+# Maximum number of instance attributes for class
+max-attributes=10
+# Maximum number of ancestors
+max-parents=15
+
+[SIMILARITIES]
+min-similarity-lines=10
+
+[BASIC]
+# Naming hints based on PEP 8 (https://www.python.org/dev/peps/pep-0008/#naming-conventions).
+# Consider these guidelines and not hard rules. Read PEP 8 for more details.
+
+# The invalid-name checker must be **enabled** for these hints to be used.
+include-naming-hint=yes
+
+module-naming-style=snake_case
+const-naming-style=UPPER_CASE
+class-naming-style=PascalCase
+class-attribute-naming-style=snake_case
+attr-naming-style=snake_case
+method-naming-style=snake_case
+function-naming-style=snake_case
+argument-naming-style=snake_case
+variable-naming-style=snake_case
+inlinevar-naming-style=snake_case
+
+[TYPECHECK]
+generated-members=js.*
diff --git a/src/promptflow-evals/promptflow/evals/_constants.py b/src/promptflow-evals/promptflow/evals/_constants.py
@@ -1,4 +1,10 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+
 class EvaluationMetrics:
+    """Metrics for model evaluation."""
     GPT_GROUNDEDNESS = "gpt_groundedness"
     GPT_RELEVANCE = "gpt_relevance"
     GPT_COHERENCE = "gpt_coherence"
@@ -14,9 +20,10 @@ class EvaluationMetrics:
 
 
 class Prefixes:
-    _INPUTS = "inputs."
-    _OUTPUTS = "outputs."
-    _TGT_OUTPUTS = "__outputs."
+    """Column prefixes for inputs and outputs."""
+    INPUTS = "inputs."
+    OUTPUTS = "outputs."
+    TSG_OUTPUTS = "__outputs."
 
 
 DEFAULT_EVALUATION_RESULTS_FILE_NAME = "evaluation_results.json"

diff --git a/src/promptflow-evals/promptflow/evals/_version.py b/src/promptflow-evals/promptflow/evals/_version.py
@@ -6,7 +6,7 @@
 
 try:
     __version__ = importlib.metadata.version("promptflow-evals")
-except BaseException:
-    __version__ = '0.0.1.dev0'
+except BaseException:  # pylint: disable=broad-exception-caught
+    __version__ = "0.0.1.dev0"
 
 VERSION: str = __version__
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/batch_run_context.py
@@ -15,7 +15,15 @@
 
 
 class BatchRunContext:
-    def __init__(self, client):
+    """Context manager for batch run clients.
+
+    :param client: The client to run in the context.
+    :type client: Union[
+        ~promptflow.evals.evaluate.code_client.CodeClient,
+        ~promptflow.evals.evaluate.proxy_client.ProxyClient
+    ]
+    """
+    def __init__(self, client) -> None:
         self.client = client
         self._is_timeout_set_by_system = False
 

diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py b/src/promptflow-evals/promptflow/evals/evaluate/_batch_run_client/code_client.py
@@ -38,7 +38,7 @@ def get_aggregated_metrics(self):
                 if self.aggregated_metrics is not None
                 else None
             )
-        except Exception as ex:
+        except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.debug(f"Error calculating metrics for evaluator {self.evaluator_name}, failed with error {str(ex)}")
             aggregated_metrics = None
 
@@ -107,7 +107,7 @@ def _calculate_aggregations(self, evaluator, run):
                 aggr_func = getattr(evaluator, "__aggregate__")
                 aggregated_output = aggr_func(aggregate_input)
                 return aggregated_output
-        except Exception as ex:
+        except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.warning(
                 f"Error calculating aggregations for evaluator {run.evaluator_name}," f" failed with error {str(ex)}"
             )
@@ -118,11 +118,15 @@ def run(self, flow, data, evaluator_name=None, column_mapping=None, **kwargs):
         if not isinstance(input_df, pd.DataFrame):
             try:
                 json_data = load_jsonl(data)
-            except json.JSONDecodeError:
-                raise ValueError(f"Failed to parse data as JSON: {data}. Please provide a valid json lines data.")
+            except json.JSONDecodeError as exc:
+                raise ValueError(
+                    f"Failed to parse data as JSON: {data}. Please provide a valid json lines data."
+                ) from exc
 
             input_df = pd.DataFrame(json_data)
-        eval_future = self._thread_pool.submit(self._calculate_metric, flow, input_df, column_mapping, evaluator_name)
+        eval_future = self._thread_pool.submit(
+            self._calculate_metric, flow, input_df, column_mapping, evaluator_name
+        )  # pylint: disable=specify-parameter-names-in-call
         run = CodeRun(run=eval_future, input_data=data, evaluator_name=evaluator_name, aggregated_metrics=None)
         aggregation_future = self._thread_pool.submit(self._calculate_aggregations, evaluator=flow, run=run)
         run.aggregated_metrics = aggregation_future
@@ -137,7 +141,7 @@ def get_metrics(self, run):
             aggregated_metrics = run.get_aggregated_metrics()
             print("Aggregated metrics")
             print(aggregated_metrics)
-        except Exception as ex:
+        except Exception as ex:  # pylint: disable=broad-exception-caught
             LOGGER.debug(f"Error calculating metrics for evaluator {run.evaluator_name}, failed with error {str(ex)}")
             return None
         return aggregated_metrics