Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Added

- `dp lint` command that uses [SQLFluff](https://github.com/sqlfluff/sqlfluff) to lint models and tests

## [0.19.0] - 2022-04-25

### Added
Expand Down
2 changes: 2 additions & 0 deletions data_pipelines_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .cli_commands.docs import docs_command
from .cli_commands.generate.generate import generate_group
from .cli_commands.init import init_command
from .cli_commands.lint import lint_command
from .cli_commands.prepare_env import prepare_env_command
from .cli_commands.publish import publish_command
from .cli_commands.run import run_command
Expand Down Expand Up @@ -43,6 +44,7 @@ def cli() -> None:
_cli.add_command(docs_command)
_cli.add_command(generate_group)
_cli.add_command(init_command)
_cli.add_command(lint_command)
_cli.add_command(list_templates_command)
_cli.add_command(prepare_env_command)
_cli.add_command(publish_command)
Expand Down
160 changes: 160 additions & 0 deletions data_pipelines_cli/cli_commands/lint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import pathlib
import tempfile
from configparser import ConfigParser
from typing import Any, List

import click
import yaml

from ..cli_constants import BUILD_DIR
from ..cli_utils import echo_info, echo_subinfo, echo_warning, subprocess_run
from ..config_generation import (
generate_profiles_yml,
read_dictionary_from_config_directory,
)
from ..dbt_utils import read_dbt_vars_from_configs
from ..errors import SQLLintError, SubprocessNonZeroExitError

SQLFLUFF_FIX_NOT_EVERYTHING_ERROR = 1
SQLFLUFF_LINT_ERROR = 65 # according to `sqlfluff.core.linter.LintingResult.stats`
SQLFLUFF_DIALECT_LOADING_ERROR = 66 # according to `sqlfluff.cli.commands.get_config`


def _get_dialect_or_default() -> str:
"""Read ``dbt.yml`` config file and return its ``target_type`` or just the ``ansi``."""
env, dbt_filename = "base", "dbt.yml"
dbt_env_config = read_dictionary_from_config_directory(
BUILD_DIR.joinpath("dag"), env, dbt_filename
) or read_dictionary_from_config_directory(pathlib.Path.cwd(), env, dbt_filename)
try:
dialect = dbt_env_config["target_type"]
echo_subinfo(f'Found target_type "{dialect}", attempting to use it as the SQL dialect.')
except KeyError:
dialect = "ansi"
echo_warning(
'Could not find `target_type` in `dbt.yml`. Using the default SQL dialect ("ansi").'
)
return dialect


def _get_source_tests_paths() -> List[pathlib.Path]:
with open(pathlib.Path.cwd().joinpath("dbt_project.yml"), "r") as f:
dbt_project_config = yaml.safe_load(f)
dir_names: List[str] = (
dbt_project_config.get("source-paths", [])
+ dbt_project_config.get("model-paths", [])
+ dbt_project_config.get("test-paths", [])
)
return list(map(lambda dir_name: pathlib.Path.cwd().joinpath(dir_name), dir_names))


def _insert_into_config_section(config: ConfigParser, section: str, key: str, value: Any) -> None:
if section not in config:
config[section] = {}
config[section][key] = value


def _create_temporary_sqlfluff_config(env: str) -> ConfigParser:
sqlfluff_config_path = pathlib.Path.cwd().joinpath(".sqlfluff")
config = ConfigParser()
if sqlfluff_config_path.exists():
config.read(sqlfluff_config_path)

_insert_into_config_section(config, "sqlfluff", "templater", "dbt")
_insert_into_config_section(
config,
"sqlfluff:templater:dbt",
"profiles_dir",
str(generate_profiles_yml(env, copy_config_dir=True).absolute()),
)
config["sqlfluff:templater:dbt:context"] = read_dbt_vars_from_configs(env)

return config


def _run_sqlfluff(command: str, dialect: str, env: str, additional_args: List[str]) -> None:
with tempfile.TemporaryDirectory() as tmp_dir:
tmp_config_path = pathlib.Path(tmp_dir).joinpath("sqlfluff.config")
with open(tmp_config_path, "w") as tmp_config:
_create_temporary_sqlfluff_config(env).write(tmp_config)

def sqlfluff_args(sql_dialect: str) -> List[str]:
return [
"sqlfluff",
command,
"--dialect",
sql_dialect,
"--config",
str(tmp_config_path),
*additional_args,
*map(str, _get_source_tests_paths()),
]

try:
subprocess_run(sqlfluff_args(dialect))
except SubprocessNonZeroExitError as err:
if err.exit_code == SQLFLUFF_DIALECT_LOADING_ERROR and dialect != "ansi":
subprocess_run(sqlfluff_args("ansi"))
else:
raise err


def _run_fix_sqlfluff(dialect: str, env: str) -> None:
try:
echo_subinfo("Attempting to fix SQLs. Not every error can be automatically fixed.")
_run_sqlfluff("fix", dialect, env, ["--force"])
except SubprocessNonZeroExitError as err:
if err.exit_code != SQLFLUFF_FIX_NOT_EVERYTHING_ERROR:
raise err


def _run_lint_sqlfluff(dialect: str, env: str) -> None:
try:
echo_subinfo("Linting SQLs.")
_run_sqlfluff("lint", dialect, env, [])
except SubprocessNonZeroExitError as err:
if err.exit_code == SQLFLUFF_LINT_ERROR:
raise SQLLintError
else:
raise err


def lint(fix: bool, env: str) -> None:
"""
Lint and format SQL.

:param fix: Whether to lint and fix linting errors, or just lint.
:type fix: bool
:param env: Name of the environment
:type env: str
"""
echo_info("Linting SQLs:")
dialect = _get_dialect_or_default()
if fix:
_run_fix_sqlfluff(dialect, env)
_run_lint_sqlfluff(dialect, env)


@click.command(
name="lint",
short_help="Lint and format SQL",
help="Lint and format SQL using SQLFluff.\n\n"
"For more information on rules and the workings of SQLFluff, "
"refer to https://docs.sqlfluff.com/",
)
@click.option(
"--no-fix",
is_flag=True,
default=False,
type=bool,
help="Whether to lint and fix linting errors, or just lint.",
)
@click.option(
"--env",
default="local",
type=str,
show_default=True,
help="Name of the environment",
)
def lint_command(no_fix: bool, env: str) -> None:
lint(not no_fix, env)
10 changes: 10 additions & 0 deletions data_pipelines_cli/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,16 @@ def __init__(self, project_path: str) -> None:
class SubprocessNonZeroExitError(DataPipelinesError):
"""Exception raised if subprocess exits with non-zero exit code"""

exit_code: int

def __init__(
self, subprocess_name: str, exit_code: int, subprocess_output: Optional[str] = None
) -> None:
super().__init__(
f"{subprocess_name} has exited with non-zero exit code: {exit_code}",
submessage=subprocess_output,
)
self.exit_code = exit_code


class SubprocessNotFound(DataPipelinesError):
Expand Down Expand Up @@ -91,3 +94,10 @@ class DockerErrorResponseError(DataPipelinesError):

def __init__(self, error_msg: str) -> None:
super().__init__("Error raised when using Docker.\n" + error_msg)


class SQLLintError(DataPipelinesError):
"""Exception raised if there are linting problems in some files."""

def __init__(self) -> None:
super().__init__("Fix SQL linting errors.")
2 changes: 2 additions & 0 deletions docs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ sphinx-click>=4.0,<4.1
myst-parser>=0.17, <0.18
docutils>=0.17,<0.18
GitPython==3.1.26
git+https://github.com/swtwsk/copier@regex-update

9 changes: 8 additions & 1 deletion docs/source/data_pipelines_cli.cli_commands.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ data\_pipelines\_cli.cli\_commands.init module
:undoc-members:
:show-inheritance:

data\_pipelines\_cli.cli\_commands.lint module
----------------------------------------------

.. automodule:: data_pipelines_cli.cli_commands.lint
:members:
:undoc-members:
:show-inheritance:

data\_pipelines\_cli.cli\_commands.prepare\_env module
------------------------------------------------------

Expand Down Expand Up @@ -119,4 +127,3 @@ data\_pipelines\_cli.cli\_commands.update module
:members:
:undoc-members:
:show-inheritance:

6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,11 @@
"questionary==1.10.0",
"pyyaml>=5.1, <6.0",
"types-PyYAML>=6.0",
"copier==5.1.0",
# due to the 'regex' conflict between copier and sqlfluff:
"copier @ git+https://github.com/swtwsk/copier@regex-update",
Comment on lines -15 to +16
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To remove when copier is updated

"Jinja2>=2.11,<2.12",
"fsspec",
"packaging>=20.4,<21.0",
"packaging>=20.9,<21.0",
]

EXTRA_FILESYSTEMS_REQUIRE = {
Expand All @@ -33,6 +34,7 @@
"docker": ["docker>=5.0"],
"datahub": ["acryl-datahub>=0.8.17, <0.8.18"],
"git": ["GitPython==3.1.26"],
"lint": ["sqlfluff>=0.12.0,<1.0", "sqlfluff-templater-dbt>=0.12.0,<1.0"],
"tests": [
"pytest>=6.2.2, <7.0.0",
"pytest-cov>=2.8.0, <3.0.0",
Expand Down
Loading