diff --git a/crates/lib/Cargo.toml b/crates/lib/Cargo.toml index 1cf3e76bd..958bd0b6d 100644 --- a/crates/lib/Cargo.toml +++ b/crates/lib/Cargo.toml @@ -34,6 +34,7 @@ name = "depth_map" harness = false [features] +default = ["python"] serde = ["dep:serde"] stringify = ["dep:serde_yaml", "serde"] python = ["pyo3"] diff --git a/crates/lib/src/templaters.rs b/crates/lib/src/templaters.rs index 3fd5a5649..e542c78af 100644 --- a/crates/lib/src/templaters.rs +++ b/crates/lib/src/templaters.rs @@ -6,9 +6,13 @@ use crate::core::config::FluffConfig; use crate::templaters::placeholder::PlaceholderTemplater; use crate::templaters::raw::RawTemplater; +#[cfg(feature = "python")] +use crate::templaters::jinja::JinjaTemplater; #[cfg(feature = "python")] use crate::templaters::python::PythonTemplater; +#[cfg(feature = "python")] +pub mod jinja; pub mod placeholder; #[cfg(feature = "python")] pub mod python; @@ -21,6 +25,7 @@ pub fn templaters() -> Vec> { Box::new(RawTemplater), Box::new(PlaceholderTemplater), Box::new(PythonTemplater), + Box::new(JinjaTemplater), ] } diff --git a/crates/lib/src/templaters/jinja.rs b/crates/lib/src/templaters/jinja.rs new file mode 100644 index 000000000..6bb2b64ea --- /dev/null +++ b/crates/lib/src/templaters/jinja.rs @@ -0,0 +1,76 @@ +use super::python::PythonTemplatedFile; +use super::Templater; +use pyo3::prelude::*; +use pyo3::types::{PyDict, PySlice}; +use pyo3::{Py, PyAny, Python}; +use sqruff_lib_core::errors::SQLFluffUserError; +use sqruff_lib_core::templaters::base::TemplatedFile; + +pub struct JinjaTemplater; + +const JINJA_FILE: &str = include_str!("python_templater.py"); + +impl Templater for JinjaTemplater { + fn name(&self) -> &'static str { + "jinja" + } + + fn description(&self) -> &'static str { + todo!() + } + + fn process( + &self, + in_str: &str, + f_name: &str, + config: Option<&crate::core::config::FluffConfig>, + formatter: Option<&crate::cli::formatters::OutputStreamFormatter>, + ) -> Result { + let templated_file = Python::with_gil(|py| -> PyResult { + let fun: Py = PyModule::from_code_bound(py, JINJA_FILE, "", "")? + .getattr("process_from_rust")? + .into(); + + // pass object with Rust tuple of positional arguments + let py_dict = PyDict::new_bound(py); + let args = (in_str.to_string(), f_name.to_string(), py_dict); + let returned = fun.call1(py, args); + + // Parse the returned value + let returned = returned?; + let templated_file: PythonTemplatedFile = returned.extract(py)?; + Ok(templated_file.to_templated_file()) + }) + .map_err(|e| SQLFluffUserError::new(format!("Python templater error: {:?}", e)))?; + + Ok(templated_file) + } +} + +#[cfg(test)] +mod tests { + use crate::core::config::FluffConfig; + + use super::*; + + const JINJA_STRING: &str = "SELECT * FROM {% for c in blah %}{{c}}{% if not loop.last %}, {% endif %}{% endfor %} WHERE {{condition}}\n\n"; + + #[test] + fn test_jinja_templater() { + let source = r" + [sqruff] + templater = jinja + "; + let config = FluffConfig::from_source(source); + let templater = JinjaTemplater; + + let processed = templater + .process(JINJA_STRING, "test.sql", Some(&config), None) + .unwrap(); + + assert_eq!( + processed.to_string(), + "SELECT * FROM f, o, o WHERE a < 10\n\n" + ) + } +} diff --git a/crates/lib/src/templaters/jinja_templater.py b/crates/lib/src/templaters/jinja_templater.py new file mode 100644 index 000000000..32e1a3d40 --- /dev/null +++ b/crates/lib/src/templaters/jinja_templater.py @@ -0,0 +1,1227 @@ +"""Defines the templaters.""" + +import copy +import logging +import os.path +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Set, + Tuple, + Type, + Union, + cast, +) + +import jinja2.nodes +import jinja2.parser +from jinja2 import ( + Environment, + FileSystemLoader, + TemplateError, + TemplateSyntaxError, + meta, +) +from jinja2.exceptions import TemplateNotFound, UndefinedError +from jinja2.ext import Extension +from jinja2.sandbox import SandboxedEnvironment + +from templaters.python_templater import PythonTemplater, SQLTemplaterError, TemplatedFile, RawFileSlice, \ + TemplatedFileSlice + +if TYPE_CHECKING: # pragma: no cover + from jinja2.runtime import Macro + +# Instantiate the templater logger +templater_logger = logging.getLogger("sqlfluff.templater") + + +def is_zero_slice(s: slice) -> bool: + """Return true if this is a zero slice.""" + is_zero: bool = s.stop == s.start + return is_zero + +class UndefinedRecorder: + """Similar to jinja2.StrictUndefined, but remembers, not fails.""" + + # Tell Jinja this object is safe to call and does not alter data. + # https://jinja.palletsprojects.com/en/3.0.x/sandbox/#jinja2.sandbox.SandboxedEnvironment.is_safe_callable + unsafe_callable = False + alters_data = False + + def __init__(self, name: str, undefined_set: Set[str]) -> None: + self.name = name + # Reference to undefined set to modify, it is assumed that the + # calling code keeps a reference to this variable to they can + # continue to access it after modification by this class. + self.undefined_set = undefined_set + + def __str__(self) -> str: + """Treat undefined vars as empty, but remember for later.""" + self.undefined_set.add(self.name) + return "" + + def __getattr__(self, item: str) -> "UndefinedRecorder": + """Don't fail when called, remember instead.""" + self.undefined_set.add(self.name) + return UndefinedRecorder(f"{self.name}.{item}", self.undefined_set) + + def __getitem__(self, item: str) -> "UndefinedRecorder": + """Don't fail when called, remember instead.""" + self.undefined_set.add(self.name) + return UndefinedRecorder(f"{self.name}.{item}", self.undefined_set) + + def __call__(self, *args: Any, **kwargs: Any) -> "UndefinedRecorder": + """Don't fail when called unlike parent class.""" + return UndefinedRecorder(f"{self.name}()", self.undefined_set) + + def __iter__(self) -> Iterator["UndefinedRecorder"]: + """Don't fail when iterated, remember instead.""" + self.undefined_set.add(self.name) + yield UndefinedRecorder(f"iter({self.name})", self.undefined_set) + + +class JinjaTemplater(PythonTemplater): + """A templater using the jinja2 library. + + See: https://jinja.palletsprojects.com/ + """ + + class Libraries: + """Mock namespace for user-defined Jinja library.""" + + pass + + @staticmethod + def _extract_macros_from_template( + template: str, env: Environment, ctx: Dict[str, Any] + ) -> Dict[str, "Macro"]: + """Take a template string and extract any macros from it. + + Lovingly inspired by http://codyaray.com/2015/05/auto-load-jinja2-macros + + Raises: + TemplateSyntaxError: If the macro we try to load has invalid + syntax. We assume that outer functions will catch this + exception and handle it appropriately. + """ + from jinja2.runtime import Macro # noqa + + # Iterate through keys exported from the loaded template string + context: Dict[str, Macro] = {} + # NOTE: `env.from_string()` will raise TemplateSyntaxError if `template` + # is invalid. + macro_template = env.from_string(template, globals=ctx) + + # This is kind of low level and hacky but it works + try: + for k in macro_template.module.__dict__: + attr = getattr(macro_template.module, k) + # Is it a macro? If so install it at the name of the macro + if isinstance(attr, Macro): + context[k] = attr + except UndefinedError: + # This occurs if any file in the macro path references an + # undefined Jinja variable. It's safe to ignore this. Any + # meaningful issues will surface later at linting time. + pass + # Return the context + return context + + @classmethod + def _extract_macros_from_path( + cls, + path: List[str], + env: Environment, + ctx: Dict[str, Any], + exclude_paths: Optional[List[str]] = None, + ) -> Dict[str, "Macro"]: + """Take a path and extract macros from it. + + Args: + path (List[str]): A list of paths. + env (Environment): The environment object. + ctx (Dict): The context dictionary. + exclude_paths (Optional[[List][str]]): A list of paths to exclude + + Returns: + dict: A dictionary containing the extracted macros. + + Raises: + ValueError: If a path does not exist. + SQLTemplaterError: If there is an error in the Jinja macro file. + """ + macro_ctx: Dict[str, "Macro"] = {} + for path_entry in path: + # Does it exist? It should as this check was done on config load. + if not os.path.exists(path_entry): + raise ValueError(f"Path does not exist: {path_entry}") + + if os.path.isfile(path_entry): + if exclude_paths: + if cls._exclude_macros( + macro_path=path_entry, exclude_macros_path=exclude_paths + ): + continue + # It's a file. Extract macros from it. + with open(path_entry) as opened_file: + template = opened_file.read() + # Update the context with macros from the file. + try: + macro_ctx.update( + cls._extract_macros_from_template(template, env=env, ctx=ctx) + ) + except TemplateSyntaxError as err: + raise SQLTemplaterError( + f"Error in Jinja macro file {os.path.relpath(path_entry)}: " + f"{err.message}", + line_no=err.lineno, + line_pos=1, + ) from err + else: + # It's a directory. Iterate through files in it and extract from them. + for dirpath, _, files in os.walk(path_entry): + for fname in files: + if fname.endswith(".sql"): + macro_ctx.update( + cls._extract_macros_from_path( + [os.path.join(dirpath, fname)], + env=env, + ctx=ctx, + exclude_paths=exclude_paths, + ) + ) + return macro_ctx + + # TODO Potentially reimplement + # def _extract_macros_from_config( + # self, config: FluffConfig, env: Environment, ctx: Dict[str, Any] + # ) -> Dict[str, "Macro"]: + # """Take a config and load any macros from it. + # + # Args: + # config: The config to extract macros from. + # env: The environment. + # ctx: The context. + # + # Returns: + # dict: A dictionary containing the extracted macros. + # """ + # if config: + # loaded_context = ( + # config.get_section((self.templater_selector, self.name, "macros")) or {} + # ) + # else: # pragma: no cover TODO? + # loaded_context = {} + # + # # Iterate to load macros + # macro_ctx: Dict[str, "Macro"] = {} + # for value in loaded_context.values(): + # try: + # macro_ctx.update( + # self._extract_macros_from_template(value, env=env, ctx=ctx) + # ) + # except TemplateSyntaxError as err: + # raise SQLFluffUserError( + # f"Error loading user provided macro:\n`{value}`\n> {err}." + # ) + # return macro_ctx + + # def _extract_libraries_from_config(self, config: FluffConfig) -> Dict[str, Any]: + # """Extracts libraries from the given configuration. + # + # This function iterates over the modules in the library path and + # imports them dynamically. The imported modules are then added to a 'Libraries' + # object, which is returned as a dictionary excluding magic methods. + # + # Args: + # config: The configuration object. + # + # Returns: + # dict: A dictionary containing the extracted libraries. + # """ + # # If a more global library_path is set, let that take precedence. + # library_path = config.get("library_path") or config.get_section( + # ( + # self.templater_selector, + # self.name, + # "library_path", + # ) + # ) + # if not library_path: + # return {} + # + # libraries = JinjaTemplater.Libraries() + # + # # If library_path has __init__.py we parse it as one module, else we parse it + # # a set of modules + # is_library_module = os.path.exists(os.path.join(library_path, "__init__.py")) + # library_module_name = os.path.basename(library_path) + # + # # Need to go one level up to parse as a module correctly + # walk_path = ( + # os.path.join(library_path, "..") if is_library_module else library_path + # ) + # + # for module_finder, module_name, _ in pkgutil.walk_packages([walk_path]): + # # skip other modules that can be near module_dir + # if is_library_module and not module_name.startswith(library_module_name): + # continue + # + # # import_module is deprecated as of python 3.4. This follows roughly + # # the guidance of the python docs: + # # https://docs.python.org/3/library/importlib.html#approximating-importlib-import-module + # spec = module_finder.find_spec(module_name, None) + # assert ( + # spec + # ), f"Module {module_name} failed to be found despite being listed." + # module = importlib.util.module_from_spec(spec) + # sys.modules[module_name] = module + # assert spec.loader, f"Module {module_name} missing expected loader." + # spec.loader.exec_module(module) + # + # if "." in module_name: # nested modules have `.` in module_name + # *module_path, last_module_name = module_name.split(".") + # # find parent module recursively + # parent_module = reduce( + # lambda res, path_part: getattr(res, path_part), + # module_path, + # libraries, + # ) + # + # # set attribute on module object to make jinja working correctly + # setattr(parent_module, last_module_name, module) + # else: + # # set attr on `libraries` obj to make it work in jinja nicely + # setattr(libraries, module_name, module) + # + # if is_library_module: + # # when library is module we have one more root module in hierarchy and we + # # remove it + # libraries = getattr(libraries, library_module_name) + # + # # remove magic methods from result + # return {k: v for k, v in libraries.__dict__.items() if not k.startswith("__")} + + @classmethod + def _crawl_tree( + cls, tree: jinja2.nodes.Node, variable_names: Set[str], raw: str + ) -> Iterator[SQLTemplaterError]: + """Crawl the tree looking for occurrences of the undeclared values.""" + # First iterate through children + for elem in tree.iter_child_nodes(): + yield from cls._crawl_tree(elem, variable_names, raw) + # Then assess self + if ( + isinstance(tree, jinja2.nodes.Name) + and getattr(tree, "name") in variable_names + ): + line_no: int = getattr(tree, "lineno") + tree_name: str = getattr(tree, "name") + line = raw.split("\n")[line_no - 1] + pos = line.index(tree_name) + 1 + yield SQLTemplaterError( + f"Undefined jinja template variable: {tree_name!r}", + line_no=line_no, + line_pos=pos, + ) + + # def _get_jinja_env(self, config: Optional[FluffConfig] = None) -> Environment: + # """Get a properly configured jinja environment. + # + # This method returns a properly configured jinja environment. It + # first checks if the 'ignore' key is present in the config dictionary and + # if it contains the value 'templating'. If so, it creates a subclass of + # FileSystemLoader called SafeFileSystemLoader that overrides the + # get_source method to handle missing templates when templating is ignored. + # If 'ignore' is not present or does not contain 'templating', it uses the + # regular FileSystemLoader. It then sets the extensions to ['jinja2.ext.do'] + # and adds the DBTTestExtension if the _apply_dbt_builtins method returns + # True. Finally, it returns a SandboxedEnvironment object with the + # specified settings. + # + # Args: + # config (dict, optional): A dictionary containing configuration settings. + # + # Returns: + # jinja2.Environment: A properly configured jinja environment. + # """ + # loader: Optional[FileSystemLoader] + # macros_path = self._get_macros_path(config, "load_macros_from_path") + # loader_search_path = self._get_loader_search_path(config) + # final_search_path = (loader_search_path or []) + (macros_path or []) + # + # ignore_templating = config and "templating" in config.get("ignore") + # if ignore_templating: + # + # class SafeFileSystemLoader(FileSystemLoader): + # def get_source( + # self, environment: Environment, name: str + # ) -> Tuple[str, str, Callable[..., Any]]: + # try: + # if not isinstance(name, DummyUndefined): + # return super().get_source(environment, name) + # raise TemplateNotFound(str(name)) + # except TemplateNotFound: + # # When ignore=templating is set, treat missing files + # # or attempts to load an "Undefined" file as the first + # # 'base' part of the name / filename rather than failing. + # templater_logger.debug( + # "Providing dummy contents for Jinja macro file: %s", name + # ) + # value = os.path.splitext(os.path.basename(str(name)))[0] + # return value, f"{value}.sql", lambda: False + # + # loader = SafeFileSystemLoader(final_search_path or []) + # else: + # loader = FileSystemLoader(final_search_path) if final_search_path else None + # extensions: List[Union[str, Type[Extension]]] = ["jinja2.ext.do"] + # if self._apply_dbt_builtins(config): + # extensions.append(DBTTestExtension) + # + # return SandboxedEnvironment( + # # We explicitly want to preserve newlines. + # keep_trailing_newline=True, + # # The do extension allows the "do" directive + # autoescape=False, + # extensions=extensions, + # loader=loader, + # ) + + # def _get_macros_path( + # self, config: Optional[FluffConfig], key: str + # ) -> Optional[List[str]]: + # """Get the list of macros paths from the provided config object. + # + # This method searches for a config section specified by the + # templater_selector, name, and key specified. If the section is + # found, it retrieves the value associated with that section and splits it into + # a list of strings using a comma as the delimiter. The resulting list is + # stripped of whitespace and empty strings and returned. If the section is not + # found or the resulting list is empty, it returns None. + # + # Args: + # config (FluffConfig): The config object to search for the macros path + # section. + # key (str): Key to load the macros path from the config file. + # Also used for loading the excluding macros path from config. + # + # Returns: + # Optional[List[str]]: The list of macros paths if found, None otherwise. + # """ + # if config: + # macros_path = config.get_section((self.templater_selector, self.name, key)) + # if macros_path: + # result = [s.strip() for s in macros_path.split(",") if s.strip()] + # if result: + # return result + # return None + + # def _get_loader_search_path( + # self, config: Optional[FluffConfig] + # ) -> Optional[List[str]]: + # """Get the list of Jinja loader search paths from the provided config object. + # + # This method searches for a config section specified by the + # templater_selector, name, and 'loader_search_path' keys. If the section is + # found, it retrieves the value associated with that section and splits it into + # a list of strings using a comma as the delimiter. The resulting list is + # stripped of whitespace and empty strings and returned. If the section is not + # found or the resulting list is empty, it returns None. + # + # Args: + # config (FluffConfig): The config object to search for the loader search + # path section. + # + # Returns: + # Optional[List[str]]: The list of loader search paths if found, None + # otherwise. + # """ + # if config: + # loader_search_path = config.get_section( + # ( + # self.templater_selector, + # self.name, + # "loader_search_path", + # ) + # ) + # if loader_search_path: + # result = [s.strip() for s in loader_search_path.split(",") if s.strip()] + # if result: + # return result + # return None + + # def _get_jinja_analyzer(self, raw_str: str, env: Environment) -> JinjaAnalyzer: + # """Creates a new object derived from JinjaAnalyzer. + # + # Derived classes can provide their own analyzers (e.g. to support custom Jinja + # tags). + # """ + # return JinjaAnalyzer(raw_str, env) + + # def _apply_dbt_builtins(self, config: Optional[FluffConfig]) -> bool: + # """Check if dbt builtins should be applied from the provided config object. + # + # This method searches for a config section specified by the + # templater_selector, name, and 'apply_dbt_builtins' keys. If the section + # is found, it returns the value associated with that section. If the + # section is not found, it returns False. + # + # Args: + # config (FluffConfig): The config object to search for the apply_dbt_builtins + # section. + # + # Returns: + # bool: True if dbt builtins should be applied, False otherwise. + # """ + # if config: + # apply_dbt_builtins = config.get_section( + # ( + # self.templater_selector, + # self.name, + # "apply_dbt_builtins", + # ) + # ) + # # If the config is totally absent for this templater, default to False, + # # but for any other value that isn't boolean, throw an error. + # if apply_dbt_builtins is None: + # apply_dbt_builtins = False + # assert isinstance(apply_dbt_builtins, bool), ( + # f"`apply_dbt_builtins` for {self.templater_selector}.{self.name} " + # f"must be True/False, not {apply_dbt_builtins!r}" + # ) + # return apply_dbt_builtins + # return False + + # def _get_env_context( + # self, + # fname: Optional[str], + # config: Optional[FluffConfig], + # env: Environment, + # ) -> Dict[str, Any]: + # """Get the templating context from the config. + # + # NOTE: This closely mirrors the `get_context` method which we inherit from the + # python templater, but extends the signature. For that reason we define a new + # method here, which internally refers to `get_context`. + # + # Args: + # fname (str, optional): The name of the file. + # config (dict, optional): The configuration. + # env: The Jinja Environment. + # + # Returns: + # dict: The templating context. + # """ + # # Load the context + # live_context = self.get_context(fname, config) + # # Apply dbt builtin functions if we're allowed. + # if config: + # # first make libraries available in the context + # # so they can be used by the macros too + # libraries = self._extract_libraries_from_config(config=config) + # live_context.update(libraries) + # + # jinja_filters = libraries.get("SQLFLUFF_JINJA_FILTERS") + # if jinja_filters: + # env.filters.update(jinja_filters) + # + # if self._apply_dbt_builtins(config): + # for name in DBT_BUILTINS: + # # Only apply if it hasn't already been set at this stage. + # if name not in live_context: + # live_context[name] = DBT_BUILTINS[name] + # + # # Load macros from path (if applicable) + # if config: + # macros_path = self._get_macros_path(config, "load_macros_from_path") + # exclude_macros_path = self._get_macros_path( + # config, "exclude_macros_from_path" + # ) + # if macros_path: + # live_context.update( + # self._extract_macros_from_path( + # macros_path, + # env=env, + # ctx=live_context, + # exclude_paths=exclude_macros_path, + # ) + # ) + # + # # Load config macros, these will take precedence over macros from the path + # live_context.update( + # self._extract_macros_from_config( + # config=config, env=env, ctx=live_context + # ) + # ) + # + # return live_context + + # def construct_render_func( + # self, fname: Optional[str] = None, + # # config: Optional[FluffConfig] = None + # ) -> Tuple[Environment, Dict[str, Any], Callable[[str], str]]: + # """Builds and returns objects needed to create and run templates. + # + # Args: + # fname (Optional[str]): The name of the file. + # config (Optional[dict]): The configuration settings. + # + # Returns: + # Tuple[Environment, dict, Callable[[str], str]]: A tuple + # containing the following: + # - env (Environment): An instance of the 'Environment' class. + # - live_context (dict): A dictionary containing the live context. + # - render_func (Callable[[str], str]): A callable function + # that is used to instantiate templates. + # """ + # # Load the context + # env = self._get_jinja_env(config) + # live_context = self._get_env_context(fname, config, env) + # + # def render_func(in_str: str) -> str: + # """Used by JinjaTracer to instantiate templates. + # + # This function is a closure capturing internal state from process(). + # Note that creating templates involves quite a bit of state known to + # _this_ function but not to JinjaTracer. + # + # https://www.programiz.com/python-programming/closure + # """ + # try: + # template = env.from_string(in_str, globals=live_context) + # except TemplateSyntaxError as err: # pragma: no cover + # # NOTE: If the template fails to parse, then this clause + # # will be triggered. However in normal that should never + # # happen because the template should already have been + # # validated by the point this is called. Typically that + # # happens when searching for undefined variables. + # raise SQLTemplaterError( + # f"Late failure to parse jinja template: {err}.", + # line_no=err.lineno, + # ) + # return template.render() + # + # return env, live_context, render_func + + def _generate_violations_for_undefined_variables( + self, + in_str: str, + syntax_tree: jinja2.nodes.Template, + undefined_variables: Set[str], + ) -> List[SQLTemplaterError]: + """Generates violations for any undefined variables.""" + violations: List[SQLTemplaterError] = [] + if undefined_variables: + # Go through and find out where they are: + for template_err_val in self._crawl_tree( + syntax_tree, undefined_variables, in_str + ): + violations.append(template_err_val) + return violations + + @staticmethod + def _init_undefined_tracking( + live_context: Dict[str, Any], + potentially_undefined_variables: Iterable[str], + ignore_templating: bool = False, + ) -> Set[str]: + """Sets up tracing of undefined template variables. + + NOTE: This works by mutating the `live_context` which + is being used by the environment. + """ + # NOTE: This set is modified by the `UndefinedRecorder` when run. + undefined_variables: Set[str] = set() + + for val in potentially_undefined_variables: + if val not in live_context: + if ignore_templating: + live_context[val] = DummyUndefined.create(val) + else: + live_context[val] = UndefinedRecorder(val, undefined_variables) + + return undefined_variables + + def process( + self, + *, + in_str: str, + fname: str, + context: Optional[Dict[str, Any]] = None, + # config: Optional[FluffConfig] = None, + # formatter: Optional[FormatterInterface] = None, + ) -> Tuple[TemplatedFile, List[SQLTemplaterError]]: + """Process a string and return the new string. + + Note that the arguments are enforced as keywords + because Templaters can have differences in their `process` + method signature. A Templater that only supports reading + from a file would need the following signature: + process(*, fname, in_str=None, config=None) + (arguments are swapped) + + Args: + in_str (str): The input string. + fname (str, optional): The filename of this string. This is + mostly for loading config files at runtime. + config (FluffConfig): A specific config to use for this + templating operation. Only necessary for some templaters. + formatter (CallbackFormatter): Optional object for output. + + Raises: + ValueError: If the 'config' argument is not provided. + SQLTemplaterError: If templating fails fatally, then this method + should raise a :obj:`SQLTemplaterError` instead which will be + caught and displayed appropriately. + + Returns: + Tuple[TemplatedFile, List[SQLTemplaterError]]: A tuple containing the + templated file and a list of violations. + """ + # if not config: # pragma: no cover + # raise ValueError( + # "For the jinja templater, the `process()` method requires a config " + # "object." + # ) + + env, live_context, render_func = self.construct_render_func( + fname=fname, config=config + ) + + # Attempt to identify any undeclared variables or syntax errors. + # The majority of variables will be found during the _crawl_tree + # step rather than this first Exception which serves only to catch + # catastrophic errors. + try: + syntax_tree = env.parse(in_str) + potentially_undefined_variables = meta.find_undeclared_variables( + syntax_tree + ) + except Exception as err: + templater_error = SQLTemplaterError( + "Failed to parse Jinja syntax. Correct the syntax or select an " + "alternative templater. Error: " + str(err) + ) + # Capture a line number if we can. + if isinstance(err, TemplateSyntaxError): + templater_error.line_no = err.lineno + raise templater_error + + undefined_variables = self._init_undefined_tracking( + live_context, + potentially_undefined_variables, + ignore_templating=("templating" in config.get("ignore")), + ) + + try: + # Slice the file once rendered. + raw_sliced, sliced_file, out_str = self.slice_file( + in_str, + render_func=render_func, + config=config, + ) + return ( + TemplatedFile( + source_str=in_str, + templated_str=out_str, + fname=fname, + sliced_file=sliced_file, + raw_sliced=raw_sliced, + ), + self._generate_violations_for_undefined_variables( + in_str, syntax_tree, undefined_variables + ), + ) + except (TemplateError, TypeError) as err: + templater_logger.info("Unrecoverable Jinja Error: %s", err, exc_info=True) + raise SQLTemplaterError( + ( + "Unrecoverable failure in Jinja templating: {}. Have you " + "correctly configured your variables? " + "https://docs.sqlfluff.com/en/latest/perma/variables.html" + ).format(err), + # We don't have actual line number information, but specify + # line 1 so users can ignore with "noqa" if they want. (The + # default is line 0, which can't be ignored because it's not + # a valid line number.) + line_no=1, + line_pos=1, + ) + + def slice_file( + self, + raw_str: str, + render_func: Callable[[str], str], + config: Optional[FluffConfig] = None, + append_to_templated: str = "", + ) -> Tuple[List[RawFileSlice], List[TemplatedFileSlice], str]: + """Slice the file to determine regions where we can fix. + + Args: + raw_str (str): The raw string to be sliced. + render_func (Callable[[str], str]): The rendering function to be used. + config (optional): Optional configuration. + append_to_templated: Optional string to append to the template. + + Returns: + Tuple[List[RawFileSlice], List[TemplatedFileSlice], str]: + A tuple containing a list of raw file slices, a list of + templated file slices, and the templated string. + """ + # The JinjaTracer slicing algorithm is more robust, but it requires + # us to create and render a second template (not raw_str). + + templater_logger.info("Slicing File Template") + templater_logger.debug(" Raw String: %r", raw_str[:80]) + analyzer = self._get_jinja_analyzer(raw_str, self._get_jinja_env()) + tracer = analyzer.analyze(render_func) + trace = tracer.trace(append_to_templated=append_to_templated) + return trace.raw_sliced, trace.sliced_file, trace.templated_str + + @staticmethod + def _rectify_templated_slices( + length_deltas: Dict[int, int], sliced_template: List[TemplatedFileSlice] + ) -> List[TemplatedFileSlice]: + """This method rectifies the source slices of a variant template. + + :TRICKY: We want to yield variants that _look like_ they were + rendered from the original template. However, they were actually + rendered from a modified template, which means they have source + indices which won't line up with the source files. We correct that + here by using the length deltas generated earlier from the + modifications. + + This should ensure that lint issues and fixes for the variants are + handled correctly and can be combined with those from the original + template. + """ + # NOTE: We sort the stack because it's important that it's in order + # because we're going to be popping from one end of it. There's no + # guarantee that the items are in a particular order a) because it's + # a dict and b) because they may have been generated out of order. + delta_stack = sorted(length_deltas.items(), key=lambda t: t[0]) + + adjusted_slices: List[TemplatedFileSlice] = [] + carried_delta = 0 + for tfs in sliced_template: + if delta_stack: + idx, d = delta_stack[0] + if idx == tfs.source_slice.start + carried_delta: + adjusted_slices.append( + tfs._replace( + # "stretch" the slice by adjusting the end more + # than the start. + source_slice=slice( + tfs.source_slice.start + carried_delta, + tfs.source_slice.stop + carried_delta - d, + ) + ) + ) + carried_delta -= d + delta_stack.pop(0) + continue + + # No delta match. Just shift evenly. + adjusted_slices.append( + tfs._replace( + source_slice=slice( + tfs.source_slice.start + carried_delta, + tfs.source_slice.stop + carried_delta, + ) + ) + ) + return adjusted_slices + + @staticmethod + def _calculate_variant_score( + raw_sliced: List[RawFileSlice], + sliced_file: List[TemplatedFileSlice], + uncovered_slices: Set[int], + original_source_slices: Dict[int, slice], + ) -> int: + """Compute a score for the variant based from size of covered slices. + + NOTE: We need to map this back to the positions in the original + file, and only have the positions in the modified file here. + That means we go translate back via the slice index in raw file. + """ + # First, work out the literal positions in the modified file which + # are now covered. + covered_source_positions = { + tfs.source_slice.start + for tfs in sliced_file + if tfs.slice_type == "literal" and not is_zero_slice(tfs.templated_slice) + } + # Second, convert these back into indices so we can use them to + # refer to the unmodified source file. + covered_raw_slice_idxs = [ + idx + for idx, raw_slice in enumerate(raw_sliced) + if raw_slice.source_idx in covered_source_positions + ] + + return sum( + slice_length(original_source_slices[idx]) + for idx in covered_raw_slice_idxs + if idx in uncovered_slices + ) + + def _handle_unreached_code( + self, + in_str: str, + render_func: Callable[[str], str], + uncovered_slices: Set[int], + append_to_templated: str = "", + ) -> Iterator[Tuple[List[RawFileSlice], List[TemplatedFileSlice], str]]: + """Address uncovered slices by tweaking the template to hit them. + + Args: + in_str (:obj:`str`): The raw source file. + render_func (:obj:`callable`): The render func for the templater. + uncovered_slices (:obj:`set` of :obj:`int`): Indices of slices in the raw + file which are not rendered in the original rendering. These are the + slices we'll attempt to hit by modifying the template. NOTE: These are + indices in the _sequence of slices_, not _character indices_ in the + raw source file. + append_to_templated (:obj:`str`, optional): Optional string to append + to the templated file. + """ + analyzer = self._get_jinja_analyzer(in_str, self._get_jinja_env()) + tracer_copy = analyzer.analyze(render_func) + + max_variants_generated = 10 + max_variants_returned = 5 + variants: Dict[str, Tuple[int, JinjaTrace, Dict[int, int]]] = {} + + # Create a mapping of the original source slices before modification so + # we can adjust the positions post-modification. + original_source_slices = { + idx: raw_slice.source_slice() + for idx, raw_slice in enumerate(tracer_copy.raw_sliced) + } + + for uncovered_slice in sorted(uncovered_slices)[:max_variants_generated]: + tracer_probe = copy.deepcopy(tracer_copy) + tracer_trace = copy.deepcopy(tracer_copy) + override_raw_slices = [] + # `length_deltas` is to keep track of the length changes associated + # with the changes we're making so we can correct the positions in + # the resulting template. + length_deltas: Dict[int, int] = {} + # Find a path that takes us to 'uncovered_slice'. + choices = tracer_probe.move_to_slice(uncovered_slice, 0) + for branch, options in choices.items(): + raw_file_slice = tracer_probe.raw_sliced[branch] + if raw_file_slice.tag in ("if", "elif"): + # Replace the existing "if" of "elif" expression with a new, + # hardcoded value that hits the target slice in the template + # (here that is options[0]). + new_value = "True" if options[0] == branch + 1 else "False" + new_source = f"{{% {raw_file_slice.tag} {new_value} %}}" + tracer_trace.raw_slice_info[ + raw_file_slice + ].alternate_code = new_source + override_raw_slices.append(branch) + length_deltas[raw_file_slice.source_idx] = len(new_source) - len( + raw_file_slice.raw + ) + + # Render and analyze the template with the overrides. + variant_key = tuple( + ( + cast(str, tracer_trace.raw_slice_info[rs].alternate_code) + if idx in override_raw_slices + and tracer_trace.raw_slice_info[rs].alternate_code is not None + else rs.raw + ) + for idx, rs in enumerate(tracer_trace.raw_sliced) + ) + # In some cases (especially with nested if statements), we may + # generate a variant that duplicates an existing variant. Skip + # those. + variant_raw_str = "".join(variant_key) + if variant_raw_str not in variants: + analyzer = self._get_jinja_analyzer( + variant_raw_str, self._get_jinja_env() + ) + tracer_trace = analyzer.analyze(render_func) + try: + trace = tracer_trace.trace( + append_to_templated=append_to_templated, + ) + except Exception: + # If we get an error tracing the variant, skip it. This may + # happen for a variety of reasons. Basically there's no + # guarantee that the variant will be valid Jinja. + continue + else: + # Compute a score for the variant based on the size of initially + # uncovered literal slices it hits. + score = self._calculate_variant_score( + raw_sliced=trace.raw_sliced, + sliced_file=trace.sliced_file, + uncovered_slices=uncovered_slices, + original_source_slices=original_source_slices, + ) + + variants[variant_raw_str] = (score, trace, length_deltas) + + # Return the top-scoring variants. + sorted_variants: List[Tuple[int, JinjaTrace, Dict[int, int]]] = sorted( + variants.values(), key=lambda v: v[0], reverse=True + ) + for _, trace, deltas in sorted_variants[:max_variants_returned]: + # Rectify the source slices of the generated template, which should + # ensure that lint issues and fixes for the variants are handled + # correctly and can be combined with those from the original template. + adjusted_slices = self._rectify_templated_slices( + deltas, + trace.sliced_file, + ) + yield ( + tracer_copy.raw_sliced, + adjusted_slices, + trace.templated_str, + ) + + def process_with_variants( + self, + *, + in_str: str, + fname: str, + # config: Optional[FluffConfig] = None, + # formatter: Optional[FormatterInterface] = None, + ) -> Iterator[Tuple[TemplatedFile, List[SQLTemplaterError]]]: + """Process a string and return one or more variant renderings. + + Note that the arguments are enforced as keywords + because Templaters can have differences in their + `process` method signature. + A Templater that only supports reading from a file + would need the following signature: + process(*, fname, in_str=None, config=None) + (arguments are swapped) + + Args: + in_str (:obj:`str`): The input string. + fname (:obj:`str`, optional): The filename of this string. This is + mostly for loading config files at runtime. + config (:obj:`FluffConfig`): A specific config to use for this + templating operation. Only necessary for some templaters. + formatter (:obj:`CallbackFormatter`): Optional object for output. + + """ + templated_file, violations = self.process( + in_str=in_str, fname=fname, config=config, formatter=formatter + ) + yield templated_file, violations + + # Find uncovered code (if any), tweak the template to hit that code. + # First, identify the literals which _are_ covered. + covered_literal_positions = { + tfs.source_slice.start + for tfs in templated_file.sliced_file + # It's covered if it's rendered + if not is_zero_slice(tfs.templated_slice) + } + templater_logger.debug( + "Covered literal positions %s", covered_literal_positions + ) + + uncovered_literal_idxs = { + idx + for idx, raw_slice in enumerate(templated_file.raw_sliced) + if raw_slice.slice_type == "literal" + and raw_slice.source_idx not in covered_literal_positions + } + templater_logger.debug( + "Uncovered literals correspond to slices %s", uncovered_literal_idxs + ) + + # NOTE: No validation required as all validation done in the `.process()` + # call above. + _, _, render_func = self.construct_render_func(fname=fname, config=config) + + for raw_sliced, sliced_file, templated_str in self._handle_unreached_code( + in_str, render_func, uncovered_literal_idxs + ): + yield ( + TemplatedFile( + source_str=in_str, + templated_str=templated_str, + fname=fname, + sliced_file=sliced_file, + raw_sliced=raw_sliced, + ), + violations, + ) + + @staticmethod + def _exclude_macros(macro_path: str, exclude_macros_path: List[str]) -> bool: + """Determines if a macro is within the exclude macros path. + + These macros will be ignored and not loaded into context + + Args: + macro_path (str): Str of the path to the macro + exclude_macros_path (List[str]): Str of the path to the macros to exclude + + Returns: + bool: True if the macro should be excluded + """ + for exclude_path in exclude_macros_path: + macro_path_normalized = os.path.normpath(os.path.abspath(macro_path)) + exclude_path_normalized = os.path.normpath(exclude_path) + if exclude_path_normalized in macro_path_normalized: + templater_logger.debug("Skipping this macro file: %s", macro_path) + return True + return False + + +class DummyUndefined(jinja2.Undefined): + """Acts as a dummy value to try and avoid template failures. + + Inherits from jinja2.Undefined so Jinja's default() filter will + treat it as a missing value, even though it has a non-empty value + in normal contexts. + """ + + # Tell Jinja this object is safe to call and does not alter data. + # https://jinja.palletsprojects.com/en/2.9.x/sandbox/#jinja2.sandbox.SandboxedEnvironment.is_safe_callable + unsafe_callable = False + # https://jinja.palletsprojects.com/en/3.0.x/sandbox/#jinja2.sandbox.SandboxedEnvironment.is_safe_callable + alters_data = False + + def __init__(self, name: str) -> None: + super().__init__() + self.name = name + + def __str__(self) -> str: + return self.name.replace(".", "_") + + @classmethod + def create(cls, name: str) -> "DummyUndefined": + """Factory method. + + When ignoring=templating is configured, use 'name' as the value for + undefined variables. We deliberately avoid recording and reporting + undefined variables as errors. Using 'name' as the value won't always + work, but using 'name', combined with implementing the magic methods + (such as __eq__, see above), works well in most cases. + """ + templater_logger.debug( + "Providing dummy value for undefined Jinja variable: %s", name + ) + result = DummyUndefined(name) + return result + + def __getattr__(self, item: str) -> "DummyUndefined": + """Intercept any calls to undefined attributes. + + Args: + item (str): The name of the attribute. + + Returns: + object: A dynamically created instance of this class. + """ + return self.create(f"{self.name}.{item}") + + # Implement the most common magic methods. This helps avoid + # templating errors for undefined variables. + # https://www.tutorialsteacher.com/python/magic-methods-in-python + def _self_impl(self, *args: Any, **kwargs: Any) -> "DummyUndefined": + """Return an instance of the class itself. + + Args: + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Returns: + object: An instance of the class itself. + """ + return self + + def _bool_impl(self, *args: Any, **kwargs: Any) -> bool: + """Return a boolean value. + + Args: + *args: Variable length argument list. + **kwargs: Arbitrary keyword arguments. + + Returns: + bool: A boolean value. + """ + return True + + # We're intentionally changing the behaviour here compared to the base + # class of Undefined. That means we're going to silence the `assignment` + # mypy warnings. Operations on an undefined result in another undefined. + __add__ = __sub__ = __mul__ = _self_impl # type: ignore[assignment] + __floordiv__ = __truediv__ = _self_impl # type: ignore[assignment] + __mod__ = __pow__ = _self_impl # type: ignore[assignment] + __pos__ = __neg__ = _self_impl # type: ignore[assignment] + __lshift__ = __rshift__ = _self_impl + __getitem__ = _self_impl # type: ignore[assignment] + __invert__ = _self_impl + __call__ = _self_impl # type: ignore[assignment] + # Boolean operations on an undefined are handled separately. + __and__ = __or__ = __xor__ = __bool__ = _bool_impl + __lt__ = __le__ = __ge__ = __gt__ = _bool_impl # type: ignore[assignment] + __eq__ = __ne__ = _bool_impl + + def __hash__(self) -> int: # pragma: no cover + """Return a constant hash value. + + Returns: + int: A constant hash value. + """ + # This is called by the "in" operator, among other things. + return 0 + + def __iter__(self) -> Iterator["DummyUndefined"]: + """Return an iterator that contains only the instance of the class itself. + + Returns: + iterator: An iterator. + """ + return [self].__iter__() + + +class DBTTestExtension(Extension): + """Jinja extension to handle the dbt test tag.""" + + tags = {"test"} + + def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.Macro: + """Parses out the contents of the test tag.""" + node = jinja2.nodes.Macro(lineno=next(parser.stream).lineno) + test_name = parser.parse_assign_target(name_only=True).name + + parser.parse_signature(node) + node.name = f"test_{test_name}" + node.body = parser.parse_statements(("name:endtest",), drop_needle=True) + return node + + +def process_from_rust( + string: str, + fname: str, + live_context: Dict[str, Any], +) -> TemplatedFile: + """Process the call from the rust side.""" + templater = JinjaTemplater(override_context=live_context) + (output, errors) = templater.process( + in_str=string, + fname=fname, + context=live_context, + ) + if errors != []: + raise ValueError + return output diff --git a/crates/lib/src/templaters/jinja_templater_builtins_common.py b/crates/lib/src/templaters/jinja_templater_builtins_common.py new file mode 100644 index 000000000..2c09ab05c --- /dev/null +++ b/crates/lib/src/templaters/jinja_templater_builtins_common.py @@ -0,0 +1,31 @@ +"""Common classes and functions for defining templating builtins.""" + +from typing import Any, Callable + +from .python_templater import SQLTemplaterError + + +class FunctionWrapper: + """Class to wrap a callable, for better error handling. + + When called, it just delegates to the provided callable, but if + it is rendered as a string directly, it generates a templating + error. + """ + + def __init__(self, name: str, callable: Callable[..., Any]): + self._name = name + self._callable = callable + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + """When the wrapper is called, call the internal function.""" + return self._callable(*args, **kwargs) + + def __str__(self) -> str: + """If we try and render the wrapper directly, throw an error.""" + raise SQLTemplaterError( + f"Unable to render builtin callable {self._name!r} as a " + "variable because it is defined as a function. To remove " + "this function from the context, set `apply_dbt_builtins` " + "to False." + ) \ No newline at end of file diff --git a/crates/lib/src/templaters/jinja_templater_builtins_dbt.py b/crates/lib/src/templaters/jinja_templater_builtins_dbt.py new file mode 100644 index 000000000..ba4d03e9b --- /dev/null +++ b/crates/lib/src/templaters/jinja_templater_builtins_dbt.py @@ -0,0 +1,60 @@ +"""Defines the jinja builtins for dbt.""" + +from typing import Any, Union + +from templaters.jinja_templater_builtins_common import FunctionWrapper + + +class RelationEmulator: + """A class which emulates the `this` class from dbt.""" + + # Tell Jinja this object is safe to call and does not alter data. + # https://jinja.palletsprojects.com/en/3.0.x/sandbox/#jinja2.sandbox.SandboxedEnvironment.is_safe_callable + unsafe_callable = False + alters_data = False + + identifier = "this_model" + schema = "this_schema" + database = "this_database" + + def __init__(self, identifier: str = "this_model") -> None: + self.identifier = identifier + + def __call__(self, *args: Any, **kwargs: Any) -> "RelationEmulator": + """When relation(*) is called return self as another relation.""" + return self + + def __getattr__(self, name: str) -> Union["RelationEmulator", bool]: + """When relation.attribute is called return self as another relation. + + NOTE: If the attribute begins with `is_`, then return a boolean True. + """ + if name[0:3] == "is_": + return True + return self + + def __str__(self) -> str: + return self.identifier + + +# NOTE: we use `FunctionWrapper` on all of the callable builtins here +# so that there's a sensible error message if someone tries to render +# them directly. +DBT_BUILTINS = { + "ref": FunctionWrapper("ref", lambda *args, **kwargs: RelationEmulator(args[-1])), + # In case of a cross project ref in dbt, model_ref is the second + # argument. Otherwise it is the only argument. + "source": FunctionWrapper( + "source", + lambda source_name, table: RelationEmulator(f"{source_name}_{table}"), + ), + "config": FunctionWrapper("config", lambda **kwargs: ""), + "var": FunctionWrapper("var", lambda variable, default="": "item"), + # `is_incremental()` renders as True, always in this case. + # TODO: This means we'll never parse other parts of the query, + # that are only reachable when `is_incremental()` returns False. + # We should try to find a solution to that. Perhaps forcing the file + # to be parsed TWICE if it uses this variable. + "is_incremental": FunctionWrapper("is_incremental", lambda: True), + "this": RelationEmulator(), +} \ No newline at end of file diff --git a/crates/lib/src/templaters/jinja_templater_test.py b/crates/lib/src/templaters/jinja_templater_test.py new file mode 100644 index 000000000..fd560cf49 --- /dev/null +++ b/crates/lib/src/templaters/jinja_templater_test.py @@ -0,0 +1,1901 @@ +"""Tests for the jinja templater. + +These tests also test much of the core lexer, especially +the treatment of templated sections which only really make +sense to test in the context of a templater which supports +loops and placeholders. +""" + +# import logging +# from collections import defaultdict +# from pathlib import Path +# from typing import List, NamedTuple, Union +# +import pytest + +from templaters.jinja_templater import process_from_rust + +# from jinja2 import Environment, nodes +# from jinja2.exceptions import UndefinedError +# from jinja2.ext import Extension +# from jinja2.nodes import Node +# from jinja2.parser import Parser +# +# from .python_templater import FluffConfig, Linter +# from .python_templater import SQLFluffSkipFile, SQLFluffUserError, SQLTemplaterError +# from .python_templater import BaseSegment, RawFileSlice, TemplatedFile +# from .jinja_templater import JinjaTemplater, DummyUndefined +# from .jinja_templater_tracers import JinjaAnalyzer, JinjaTagConfiguration + +JINJA_STRING = ( + "SELECT * FROM {% for c in blah %}{{c}}{% if not loop.last %}, " + "{% endif %}{% endfor %} WHERE {{condition}}\n\n" +) + +JINJA_MACRO_CALL_SQL = ( + "{% macro render_name(title) %}\n" + " '{{ title }}. foo' as {{ caller() }}\n" + "{% endmacro %}\n" + "SELECT\n" + " {% call render_name('Sir') %}\n" + " bar\n" + " {% endcall %}\n" + "FROM baz\n" +) + + +# def get_parsed(path: str) -> BaseSegment: +# """Testing helper to parse paths.""" +# linter = Linter() +# # Get the first file matching the path string +# first_path = next(linter.parse_path(path)) +# # Delegate parse assertions to the `.tree` property +# return first_path.tree + + +@pytest.mark.parametrize( + "instr, expected_outstr", + [ + ( + JINJA_STRING, + "SELECT * FROM f, o, o WHERE a < 10\n\n", + ), + # Test for issue #968. This was previously raising an UnboundLocalError. + ( + """ +{% set event_columns = ['campaign', 'click_item'] %} + +SELECT + event_id + {% for event_column in event_columns %} + , {{ event_column }} + {% endfor %} +FROM events + """, + ( + "\n\n\nSELECT\n event_id\n \n , campaign\n \n , " + "click_item\n \nFROM events\n " + ), + ), + ], + ids=["simple", "unboundlocal_bugfix"], +) +def test__templater_jinja(instr: str, expected_outstr): + """Test jinja templating and the treatment of whitespace.""" + outstr, _ = process_from_rust( + string=instr, fname="test", live_context=dict(blah="foo", condition="a < 10") + ) + + assert str(outstr) == expected_outstr + + +# class RawTemplatedTestCase(NamedTuple): +# """Instances of this object are test cases for test__templater_jinja_slices.""" +# +# name: str +# instr: str +# templated_str: str +# +# # These fields are used to check TemplatedFile.sliced_file. +# expected_templated_sliced__source_list: List[str] +# expected_templated_sliced__templated_list: List[str] +# +# # This field is used to check TemplatedFile.raw_sliced. +# expected_raw_sliced__source_list: List[str] +# +# +# @pytest.mark.parametrize( +# "case", +# [ +# RawTemplatedTestCase( +# name="basic_block", +# instr="\n\n{% set x = 42 %}\nSELECT 1, 2\n", +# templated_str="\n\n\nSELECT 1, 2\n", +# expected_templated_sliced__source_list=[ +# "\n\n", +# "{% set x = 42 %}", +# "\nSELECT 1, 2\n", +# ], +# expected_templated_sliced__templated_list=[ +# "\n\n", +# "", +# "\nSELECT 1, 2\n", +# ], +# expected_raw_sliced__source_list=[ +# "\n\n", +# "{% set x = 42 %}", +# "\nSELECT 1, 2\n", +# ], +# ), +# RawTemplatedTestCase( +# name="strip_left_block", +# instr="\n\n{%- set x = 42 %}\nSELECT 1, 2\n", +# templated_str="\nSELECT 1, 2\n", +# expected_templated_sliced__source_list=[ +# "\n\n", +# "{%- set x = 42 %}", +# "\nSELECT 1, 2\n", +# ], +# expected_templated_sliced__templated_list=[ +# "", +# "", +# "\nSELECT 1, 2\n", +# ], +# expected_raw_sliced__source_list=[ +# "\n\n", +# "{%- set x = 42 %}", +# "\nSELECT 1, 2\n", +# ], +# ), +# RawTemplatedTestCase( +# name="strip_both_block", +# instr="\n\n{%- set x = 42 -%}\nSELECT 1, 2\n", +# templated_str="SELECT 1, 2\n", +# expected_templated_sliced__source_list=[ +# "\n\n", +# "{%- set x = 42 -%}", +# "\n", +# "SELECT 1, 2\n", +# ], +# expected_templated_sliced__templated_list=[ +# "", +# "", +# "", +# "SELECT 1, 2\n", +# ], +# expected_raw_sliced__source_list=[ +# "\n\n", +# "{%- set x = 42 -%}", +# "\n", +# "SELECT 1, 2\n", +# ], +# ), +# RawTemplatedTestCase( +# name="strip_and_templated_whitespace", +# instr="SELECT {{- ' ' -}} 1{{ ' , 2' -}}\n", +# templated_str="SELECT 1 , 2", +# expected_templated_sliced__source_list=[ +# "SELECT", +# " ", +# "{{- ' ' -}}", +# " ", +# "1", +# "{{ ' , 2' -}}", +# "\n", +# ], +# expected_templated_sliced__templated_list=[ +# "SELECT", +# "", # Placeholder for consumed whitespace +# " ", # Placeholder for templated whitespace +# "", # Placeholder for consumed whitespace +# "1", +# " , 2", +# "", # Placeholder for consumed newline +# ], +# expected_raw_sliced__source_list=[ +# "SELECT", +# " ", +# "{{- ' ' -}}", +# " ", +# "1", +# "{{ ' , 2' -}}", +# "\n", +# ], +# ), +# RawTemplatedTestCase( +# name="strip_both_block_hard", +# instr="SELECT {%- set x = 42 %} 1 {%- if true -%} , 2{% endif -%}\n", +# templated_str="SELECT 1, 2", +# expected_templated_sliced__source_list=[ +# "SELECT", +# # NB: Even though the jinja tag consumes whitespace, we still +# # get it here as a placeholder. +# " ", +# "{%- set x = 42 %}", +# " 1", +# # This whitespace is a separate from the 1 because it's consumed. +# " ", +# "{%- if true -%}", +# " ", +# ", 2", +# "{% endif -%}", +# "\n", +# ], +# expected_templated_sliced__templated_list=[ +# "SELECT", +# "", # Consumed whitespace placeholder +# "", # Jinja block placeholder +# " 1", +# "", # Consumed whitespace +# "", # Jinja block placeholder +# "", # More consumed whitespace +# ", 2", +# "", # Jinja block +# "", # Consumed final newline. +# ], +# expected_raw_sliced__source_list=[ +# "SELECT", +# " ", +# "{%- set x = 42 %}", +# " 1", +# " ", +# "{%- if true -%}", +# " ", +# ", 2", +# "{% endif -%}", +# "\n", +# ], +# ), +# RawTemplatedTestCase( +# name="basic_data", +# instr="""select +# c1, +# {{ 'c' }}2 as user_id +# """, +# templated_str="""select +# c1, +# c2 as user_id +# """, +# expected_templated_sliced__source_list=[ +# "select\n c1,\n ", +# "{{ 'c' }}", +# "2 as user_id\n", +# ], +# expected_templated_sliced__templated_list=[ +# "select\n c1,\n ", +# "c", +# "2 as user_id\n", +# ], +# expected_raw_sliced__source_list=[ +# "select\n c1,\n ", +# "{{ 'c' }}", +# "2 as user_id\n", +# ], +# ), +# # Note this is basically identical to the "basic_data" case above. +# # "Right strip" is not actually a thing in Jinja. +# RawTemplatedTestCase( +# name="strip_right_data", +# instr="""SELECT +# {{ 'col1,' -}} +# col2 +# """, +# templated_str="""SELECT +# col1,col2 +# """, +# expected_templated_sliced__source_list=[ +# "SELECT\n ", +# "{{ 'col1,' -}}", +# "\n ", +# "col2\n", +# ], +# expected_templated_sliced__templated_list=[ +# "SELECT\n ", +# "col1,", +# "", +# "col2\n", +# ], +# expected_raw_sliced__source_list=[ +# "SELECT\n ", +# "{{ 'col1,' -}}", +# "\n ", +# "col2\n", +# ], +# ), +# RawTemplatedTestCase( +# name="strip_both_data", +# instr="""select +# c1, +# {{- 'c' -}} +# 2 as user_id +# """, +# templated_str="""select +# c1,c2 as user_id +# """, +# expected_templated_sliced__source_list=[ +# "select\n c1,", +# "\n ", +# "{{- 'c' -}}", +# "\n", +# "2 as user_id\n", +# ], +# expected_templated_sliced__templated_list=[ +# "select\n c1,", +# "", +# "c", +# "", +# "2 as user_id\n", +# ], +# expected_raw_sliced__source_list=[ +# "select\n c1,", +# "\n ", +# "{{- 'c' -}}", +# "\n", +# "2 as user_id\n", +# ], +# ), +# RawTemplatedTestCase( +# name="strip_both_comment", +# instr="""select +# c1, +# {#- Column 2 -#} c2 as user_id +# """, +# templated_str="""select +# c1,c2 as user_id +# """, +# expected_templated_sliced__source_list=[ +# "select\n c1,", +# "\n ", +# "{#- Column 2 -#}", +# " ", +# "c2 as user_id\n", +# ], +# expected_templated_sliced__templated_list=[ +# "select\n c1,", +# "", +# "", +# "", +# "c2 as user_id\n", +# ], +# expected_raw_sliced__source_list=[ +# "select\n c1,", +# "\n ", +# "{#- Column 2 -#}", +# " ", +# "c2 as user_id\n", +# ], +# ), +# RawTemplatedTestCase( +# name="union_all_loop1", +# instr="""{% set products = [ +# 'table1', +# 'table2', +# ] %} +# +# {% for product in products %} +# SELECT +# brand +# FROM +# {{ product }} +# {% if not loop.last -%} UNION ALL {%- endif %} +# {% endfor %} +# """, +# templated_str=( +# "\n\n\nSELECT\n brand\nFROM\n table1\nUNION ALL\n\nSELECT\n " +# "brand\nFROM\n table2\n\n\n" +# ), +# expected_templated_sliced__source_list=[ +# "{% set products = [\n 'table1',\n 'table2',\n ] %}", +# "\n\n", +# "{% for product in products %}", +# "\nSELECT\n brand\nFROM\n ", +# "{{ product }}", +# "\n", +# "{% if not loop.last -%}", +# " ", +# "UNION ALL", +# " ", +# "{%- endif %}", +# "\n", +# "{% endfor %}", +# "\nSELECT\n brand\nFROM\n ", +# "{{ product }}", +# "\n", +# "{% if not loop.last -%}", +# "{%- endif %}", +# "\n", +# "{% endfor %}", +# "\n", +# ], +# expected_templated_sliced__templated_list=[ +# "", +# "\n\n", +# "", +# "\nSELECT\n brand\nFROM\n ", +# "table1", +# "\n", +# "", +# "", +# "UNION ALL", +# "", +# "", +# "\n", +# "", +# "\nSELECT\n brand\nFROM\n ", +# "table2", +# "\n", +# "", +# "", +# "\n", +# "", +# "\n", +# ], +# expected_raw_sliced__source_list=[ +# "{% set products = [\n 'table1',\n 'table2',\n ] %}", +# "\n\n", +# "{% for product in products %}", +# "\nSELECT\n brand\nFROM\n ", +# "{{ product }}", +# "\n", +# "{% if not loop.last -%}", +# " ", +# "UNION ALL", +# " ", +# "{%- endif %}", +# "\n", +# "{% endfor %}", +# "\n", +# ], +# ), +# RawTemplatedTestCase( +# "set_multiple_variables_and_define_macro", +# """{% macro echo(text) %} +# {{text}} +# {% endmacro %} +# +# {% set a, b = 1, 2 %} +# +# SELECT +# {{ echo(a) }}, +# {{ echo(b) }}""", +# "\n\n\n\nSELECT\n \n1\n,\n \n2\n", +# [ +# "{% macro echo(text) %}", +# "\n", +# "{{text}}", +# "\n", +# "{% endmacro %}", +# "\n\n", +# "{% set a, b = 1, 2 %}", +# "\n\nSELECT\n ", +# "{{ echo(a) }}", +# ",\n ", +# "{{ echo(b) }}", +# ], +# [ +# "", +# "", +# "", +# "", +# "", +# "\n\n", +# "", +# "\n\nSELECT\n ", +# "\n1\n", +# ",\n ", +# "\n2\n", +# ], +# [ +# "{% macro echo(text) %}", +# "\n", +# "{{text}}", +# "\n", +# "{% endmacro %}", +# "\n\n", +# "{% set a, b = 1, 2 %}", +# "\n\nSELECT\n ", +# "{{ echo(a) }}", +# ",\n ", +# "{{ echo(b) }}", +# ], +# ), +# ], +# ids=lambda case: case.name, +# ) +# def test__templater_jinja_slices(case: RawTemplatedTestCase): +# """Test that Jinja templater slices raw and templated file correctly.""" +# t = JinjaTemplater() +# templated_file, _ = t.process( +# in_str=case.instr, +# fname="test", +# config=FluffConfig(overrides={"dialect": "ansi"}), +# ) +# assert templated_file is not None +# assert templated_file.source_str == case.instr +# assert templated_file.templated_str == case.templated_str +# # Build and check the list of source strings referenced by "sliced_file". +# actual_ts_source_list = [ +# case.instr[ts.source_slice] for ts in templated_file.sliced_file +# ] +# assert actual_ts_source_list == case.expected_templated_sliced__source_list +# +# # Build and check the list of templated strings referenced by "sliced_file". +# actual_ts_templated_list = [ +# templated_file.templated_str[ts.templated_slice] +# for ts in templated_file.sliced_file +# ] +# assert actual_ts_templated_list == case.expected_templated_sliced__templated_list +# +# # Build and check the list of source strings referenced by "raw_sliced". +# previous_rs = None +# actual_rs_source_list: List[RawFileSlice] = [] +# for rs in templated_file.raw_sliced + [None]: # type: ignore +# if previous_rs: +# if rs: +# actual_source = case.instr[previous_rs.source_idx : rs.source_idx] +# else: +# actual_source = case.instr[previous_rs.source_idx :] +# actual_rs_source_list.append(actual_source) +# previous_rs = rs +# assert actual_rs_source_list == case.expected_raw_sliced__source_list +# +# +# def test_templater_set_block_handling(): +# """Test handling of literals in {% set %} blocks. +# +# Specifically, verify they are not modified in the alternate template. +# """ +# +# def run_query(sql): +# # Prior to the bug fix, this assertion failed. This was bad because, +# # inside JinjaTracer, dbt templates similar to the one in this test +# # would call the database with funky SQL (including weird strings it +# # uses internally like: 00000000000000000000000000000002. +# assert sql == "\n\nselect 1 from foobarfoobarfoobarfoobar_dev\n\n" +# return sql +# +# t = JinjaTemplater(override_context=dict(run_query=run_query)) +# instr = """{% set my_query1 %} +# select 1 from foobarfoobarfoobarfoobar_{{ "dev" }} +# {% endset %} +# {% set my_query2 %} +# {{ my_query1 }} +# {% endset %} +# +# {{ run_query(my_query2) }} +# """ +# outstr, vs = t.process( +# in_str=instr, fname="test", config=FluffConfig(overrides={"dialect": "ansi"}) +# ) +# assert str(outstr) == "\n\n\n\n\nselect 1 from foobarfoobarfoobarfoobar_dev\n\n\n" +# assert len(vs) == 0 +# +# +# def test__templater_jinja_error_variable(): +# """Test missing variable error handling in the jinja templater.""" +# t = JinjaTemplater(override_context=dict(blah="foo")) +# instr = JINJA_STRING +# outstr, vs = t.process( +# in_str=instr, fname="test", config=FluffConfig(overrides={"dialect": "ansi"}) +# ) +# assert str(outstr) == "SELECT * FROM f, o, o WHERE \n\n" +# # Check we have violations. +# assert len(vs) > 0 +# # Check one of them is a templating error on line 1 +# assert any(v.rule_code() == "TMP" and v.line_no == 1 for v in vs) +# +# +# def test__templater_jinja_dynamic_variable_no_violations(): +# """Test no templater violation for variable defined within template.""" +# t = JinjaTemplater(override_context=dict(blah="foo")) +# instr = """{% if True %} +# {% set some_var %}1{% endset %} +# SELECT {{some_var}} +# {% endif %} +# """ +# outstr, vs = t.process( +# in_str=instr, fname="test", config=FluffConfig(overrides={"dialect": "ansi"}) +# ) +# assert str(outstr) == "\n \n SELECT 1\n\n" +# # Check we have no violations. +# assert len(vs) == 0 +# +# +# def test__templater_jinja_error_syntax(): +# """Test syntax problems in the jinja templater.""" +# t = JinjaTemplater() +# instr = "SELECT {{foo} FROM jinja_error\n" +# with pytest.raises(SQLTemplaterError) as excinfo: +# t.process( +# in_str=instr, +# fname="test", +# config=FluffConfig(overrides={"dialect": "ansi"}), +# ) +# templater_exception = excinfo.value +# assert templater_exception.rule_code() == "TMP" +# assert templater_exception.line_no == 1 +# assert "Failed to parse Jinja syntax" in str(templater_exception) +# +# +# def test__templater_jinja_error_catastrophic(): +# """Test error handling in the jinja templater.""" +# t = JinjaTemplater(override_context=dict(blah=7)) +# instr = JINJA_STRING +# with pytest.raises(SQLTemplaterError) as excinfo: +# t.process( +# in_str=instr, +# fname="test", +# config=FluffConfig(overrides={"dialect": "ansi"}), +# ) +# templater_exception = excinfo.value +# assert templater_exception.rule_code() == "TMP" +# assert templater_exception.line_no == 1 +# assert "Unrecoverable failure in Jinja templating" in str(templater_exception) +# +# +# def test__templater_jinja_error_macro_path_does_not_exist(): +# """Tests that an error is raised if macro path doesn't exist.""" +# with pytest.raises(ValueError) as e: +# JinjaTemplater().construct_render_func( +# config=FluffConfig.from_path( +# "test/fixtures/templater/jinja_macro_path_does_not_exist" +# ) +# ) +# assert str(e.value).startswith("Path does not exist") +# +# +# def test__templater_jinja_error_macro_invalid(): +# """Tests that an error is raised if a macro is invalid.""" +# invalid_macro_config_string = ( +# "[sqlfluff]\n" +# "templater = jinja\n" +# "dialect = ansi\n" +# "[sqlfluff:templater:jinja:macros]\n" +# "a_macro_def = {% macro pkg.my_macro() %}pass{% endmacro %}\n" +# ) +# config = FluffConfig.from_string(invalid_macro_config_string) +# with pytest.raises(SQLFluffUserError) as e: +# JinjaTemplater().construct_render_func(config=config) +# error_string = str(e.value) +# assert error_string.startswith("Error loading user provided macro") +# assert "{% macro pkg.my_macro() %}pass{% endmacro %}" in error_string +# +# +# def test__templater_jinja_lint_empty(): +# """Check that parsing a file which renders to an empty string. +# +# No exception should be raised, and we should get a single templated element. +# """ +# lntr = Linter(dialect="ansi") +# parsed = lntr.parse_string(in_str='{{ "" }}') +# parsed_variant = parsed.parsed_variants[0] +# assert parsed_variant.templated_file.source_str == '{{ "" }}' +# assert parsed_variant.templated_file.templated_str == "" +# # Get the types of the segments +# print(f"Segments: {parsed_variant.tree.raw_segments}") +# seg_types = [seg.get_type() for seg in parsed_variant.tree.raw_segments] +# assert seg_types == ["placeholder", "end_of_file"] +# +# +# def assert_structure(yaml_loader, path, code_only=True, include_meta=False): +# """Check that a parsed sql file matches the yaml file with the same name.""" +# parsed = get_parsed(path + ".sql") +# # Whitespace is important here to test how that's treated +# tpl = parsed.to_tuple(code_only=code_only, show_raw=True, include_meta=include_meta) +# # Check nothing unparsable +# if "unparsable" in parsed.type_set(): +# print(parsed.stringify()) +# raise ValueError("Input file is unparsable.") +# _, expected = yaml_loader(path + ".yml") +# assert tpl == expected +# +# +# @pytest.mark.parametrize( +# "subpath,code_only,include_meta", +# [ +# # Config Scalar +# ("jinja_a/jinja", True, False), +# # Macros +# ("jinja_b/jinja", False, False), +# # dbt builtins +# ("jinja_c_dbt/dbt_builtins_cross_ref", True, False), +# ("jinja_c_dbt/dbt_builtins_config", True, False), +# ("jinja_c_dbt/dbt_builtins_is_incremental", True, False), +# ("jinja_c_dbt/dbt_builtins_ref", True, False), +# ("jinja_c_dbt/dbt_builtins_source", True, False), +# ("jinja_c_dbt/dbt_builtins_this", True, False), +# ("jinja_c_dbt/dbt_builtins_this_callable", True, False), +# ("jinja_c_dbt/dbt_builtins_var_default", True, False), +# ("jinja_c_dbt/dbt_builtins_test", True, False), +# # do directive +# ("jinja_e/jinja", True, False), +# # case sensitivity and python literals +# ("jinja_f/jinja", True, False), +# # Macro loading from a folder +# ("jinja_g_macros/jinja", True, False), +# # Excluding macros +# ("jinja_exclude_macro_path/jinja", True, False), +# # Excluding macros with running from subdirectory +# ("jinja_exclude_macro_path/model_directory/jinja_sub_directory", True, False), +# # jinja raw tag +# ("jinja_h_macros/jinja", True, False), +# ("jinja_i_raw/raw_tag", True, False), +# ("jinja_i_raw/raw_tag_2", True, False), +# # Library Loading from a folder +# ("jinja_j_libraries/jinja", True, False), +# # Priority of macros +# ("jinja_k_config_override_path_macros/jinja", True, False), +# # Placeholders and metas +# ("jinja_l_metas/001", False, True), +# ("jinja_l_metas/002", False, True), +# ("jinja_l_metas/003", False, True), +# ("jinja_l_metas/004", False, True), +# ("jinja_l_metas/005", False, True), +# ("jinja_l_metas/006", False, True), +# ("jinja_l_metas/007", False, True), +# ("jinja_l_metas/008", False, True), +# ("jinja_l_metas/009", False, True), +# ("jinja_l_metas/010", False, True), +# ("jinja_l_metas/011", False, True), +# # Library Loading from a folder when library is module +# ("jinja_m_libraries_module/jinja", True, False), +# ("jinja_n_nested_macros/jinja", True, False), +# # Test more dbt configurations +# ("jinja_o_config_override_dbt_builtins/override_dbt_builtins", True, False), +# ("jinja_p_disable_dbt_builtins/disable_dbt_builtins", True, False), +# # Load all the macros +# ("jinja_q_multiple_path_macros/jinja", True, False), +# ("jinja_s_filters_in_library/jinja", True, False), +# # Jinja loader search path, without also loading macros into global namespace +# ("jinja_t_loader_search_path/jinja", True, False), +# ], +# ) +# def test__templater_full(subpath, code_only, include_meta, yaml_loader, caplog): +# """Check structure can be parsed from jinja templated files.""" +# # Log the templater and lexer throughout this test +# caplog.set_level(logging.DEBUG, logger="sqlfluff.templater") +# caplog.set_level(logging.DEBUG, logger="sqlfluff.lexer") +# +# assert_structure( +# yaml_loader, +# "test/fixtures/templater/" + subpath, +# code_only=code_only, +# include_meta=include_meta, +# ) +# +# +# def test__templater_jinja_block_matching(caplog): +# """Test the block UUID matching works with a complicated case.""" +# caplog.set_level(logging.DEBUG, logger="sqlfluff.lexer") +# path = "test/fixtures/templater/jinja_l_metas/002.sql" +# # Parse the file. +# parsed = get_parsed(path) +# # We only care about the template elements +# template_segments = [ +# seg +# for seg in parsed.raw_segments +# if seg.is_type("template_loop") +# or ( +# seg.is_type("placeholder") +# and seg.block_type in ("block_start", "block_end", "block_mid") +# ) +# ] +# +# # Group them together by block UUID +# assert all(seg.block_uuid for seg in template_segments), ( +# "All templated segments should have a block uuid!" +# ) +# grouped = defaultdict(list) +# for seg in template_segments: +# grouped[seg.block_uuid].append(seg.pos_marker.working_loc) +# +# print(grouped) +# +# # Now the matching block IDs should be found at the following positions. +# # NOTE: These are working locations in the rendered file. +# groups = { +# "for actions clause 1": [(6, 5), (9, 5), (12, 5), (15, 5)], +# "for actions clause 2": [(17, 5), (21, 5), (29, 5), (37, 5)], +# # NOTE: all the if loop clauses are grouped together. +# "if loop.first": [ +# (18, 9), +# (20, 9), +# (20, 9), +# (22, 9), +# (22, 9), +# (28, 9), +# (30, 9), +# (30, 9), +# (36, 9), +# ], +# } +# +# # Check all are accounted for: +# for clause in groups.keys(): +# for block_uuid, locations in grouped.items(): +# if groups[clause] == locations: +# print(f"Found {clause}, locations with UUID: {block_uuid}") +# break +# else: +# raise ValueError(f"Couldn't find appropriate grouping of blocks: {clause}") +# +# +# class DerivedJinjaAnalyzer(JinjaAnalyzer): +# """An analyzer that includes some custom Jinja tags. +# +# This is used for tests that show the analyzer can be extended for custom plugin +# templaters that support custom tags. +# """ +# +# @classmethod +# def _get_tag_configuration(cls, tag: str) -> JinjaTagConfiguration: +# tag_map = { +# "up": JinjaTagConfiguration( +# block_type="block_start", +# block_tracking=True, +# ), +# "down": JinjaTagConfiguration( +# block_type="block_mid", +# block_tracking=True, +# ), +# "end": JinjaTagConfiguration( +# block_type="block_end", +# block_tracking=True, +# ), +# } +# return tag_map.get(tag, super()._get_tag_configuration(tag)) +# +# +# @pytest.mark.parametrize( +# "test,result,analyzer_class", +# [ +# ("", [], JinjaAnalyzer), +# ("foo", [("foo", "literal", 0)], JinjaAnalyzer), +# ( +# "foo {{bar}} z ", +# [ +# ("foo ", "literal", 0), +# ("{{bar}}", "templated", 4), +# (" z ", "literal", 11), +# ], +# JinjaAnalyzer, +# ), +# ( +# ( +# "SELECT {# A comment #} {{field}} {% for i in [1, 3]%}, " +# "fld_{{i}}{% endfor %} FROM my_schema.{{my_table}} " +# ), +# [ +# ("SELECT ", "literal", 0), +# ("{# A comment #}", "comment", 7), +# (" ", "literal", 22), +# ("{{field}}", "templated", 23), +# (" ", "literal", 32), +# ("{% for i in [1, 3]%}", "block_start", 33, 1, "for"), +# (", fld_", "literal", 53, 1), +# ("{{i}}", "templated", 59, 1), +# ("{% endfor %}", "block_end", 64, 1, "endfor"), +# (" FROM my_schema.", "literal", 76, 2), +# ("{{my_table}}", "templated", 92, 2), +# (" ", "literal", 104, 2), +# ], +# JinjaAnalyzer, +# ), +# ( +# "{% set thing %}FOO{% endset %} BAR", +# [ +# ("{% set thing %}", "block_start", 0, 1, "set"), +# ("FOO", "literal", 15, 1), +# ("{% endset %}", "block_end", 18, 1, "endset"), +# (" BAR", "literal", 30, 2), +# ], +# JinjaAnalyzer, +# ), +# ( +# # Tests Jinja "block assignment" syntax. Also tests the use of +# # template substitution within the block: {{ "dev" }}. +# """{% set my_query %} +# select 1 from foobarfoobarfoobarfoobar_{{ "dev" }} +# {% endset %} +# {{ my_query }} +# """, +# [ +# ("{% set my_query %}", "block_start", 0, 1, "set"), +# ("\nselect 1 from foobarfoobarfoobarfoobar_", "literal", 18, 1), +# ('{{ "dev" }}', "templated", 58, 1), +# ("\n", "literal", 69, 1), +# ("{% endset %}", "block_end", 70, 1, "endset"), +# ("\n", "literal", 82, 2), +# ("{{ my_query }}", "templated", 83, 2), +# ("\n", "literal", 97, 2), +# ], +# JinjaAnalyzer, +# ), +# # Tests for jinja blocks that consume whitespace. +# ( +# """SELECT 1 FROM {%+if true-%} {{ref('foo')}} {%-endif%}""", +# [ +# ("SELECT 1 FROM ", "literal", 0), +# ("{%+if true-%}", "block_start", 14, 1, "if"), +# (" ", "literal", 27, 1), +# ("{{ref('foo')}}", "templated", 28, 1), +# (" ", "literal", 42, 1), +# ("{%-endif%}", "block_end", 43, 1, "endif"), +# ], +# JinjaAnalyzer, +# ), +# ( +# """{% for item in some_list -%} +# SELECT * +# FROM some_table +# {{ "UNION ALL\n" if not loop.last }} +# {%- endfor %}""", +# [ +# ("{% for item in some_list -%}", "block_start", 0, 1, "for"), +# # This gets consumed in the templated file, but it's still here. +# ("\n ", "literal", 28, 1), +# ("SELECT *\n FROM some_table\n", "literal", 33, 1), +# ('{{ "UNION ALL\n" if not loop.last }}', "templated", 62, 1), +# ("\n", "literal", 97, 1), +# ("{%- endfor %}", "block_end", 98, 1, "endfor"), +# ], +# JinjaAnalyzer, +# ), +# ( +# JINJA_MACRO_CALL_SQL, +# [ +# ("{% macro render_name(title) %}", "block_start", 0, 1, "macro"), +# ("\n '", "literal", 30, 1), +# ("{{ title }}", "templated", 34, 1), +# (". foo' as ", "literal", 45, 1), +# ("{{ caller() }}", "templated", 55, 1), +# ("\n", "literal", 69, 1), +# ("{% endmacro %}", "block_end", 70, 1, "endmacro"), +# ("\nSELECT\n ", "literal", 84, 2), +# ("{% call render_name('Sir') %}", "block_start", 96, 3, "call"), +# ("\n bar\n ", "literal", 125, 3), +# ("{% endcall %}", "block_end", 142, 3, "endcall"), +# ("\nFROM baz\n", "literal", 155, 4), +# ], +# JinjaAnalyzer, +# ), +# ( +# # Test of tag heuristics in the default _get_tag_configuration +# """{% randomtagstart %} +# SELECT 1; +# {% elphony %} +# SELECT 2; +# {% endsomethingweird %}""", +# [ +# ("{% randomtagstart %}", "block_start", 0, 1, "randomtagstart"), +# ("\n SELECT 1;\n", "literal", 20, 1), +# ("{% elphony %}", "block_mid", 35, 1, "elphony"), +# ("\n SELECT 2;\n", "literal", 48, 1), +# ("{% endsomethingweird %}", "block_end", 63, 1, "endsomethingweird"), +# ], +# JinjaAnalyzer, +# ), +# ( +# # Basic test with a derived JinjaAnalyzer that supports some custom tags +# """{% up 'create table xyz' %} +# CREATE TABLE xyz (id int); +# {% down %} +# DROP TABLE xyz; +# {% end %}""", +# [ +# ("{% up 'create table xyz' %}", "block_start", 0, 1, "up"), +# ("\n CREATE TABLE xyz (id int);\n", "literal", 27, 1), +# ("{% down %}", "block_mid", 59, 1, "down"), +# ("\n DROP TABLE xyz;\n", "literal", 69, 1), +# ("{% end %}", "block_end", 90, 1, "end"), +# ], +# DerivedJinjaAnalyzer, +# ), +# ], +# ) +# def test__templater_jinja_slice_template(test, result, analyzer_class): +# """Test _slice_template.""" +# templater = JinjaTemplater() +# env, _, render_func = templater.construct_render_func() +# +# analyzer = analyzer_class(test, env) +# analyzer.analyze(render_func=render_func) +# resp = analyzer.raw_sliced +# # check contiguous (unless there's a comment in it) +# if "{#" not in test: +# assert "".join(elem.raw for elem in resp) == test +# # check indices +# idx = 0 +# for raw_slice in resp: +# assert raw_slice.source_idx == idx +# idx += len(raw_slice.raw) +# # Check total result +# assert resp == [RawFileSlice(*args) for args in result] +# +# +# class DBMigrationExtension(Extension): +# """Example of a hypothetical custom Jinja extension. +# +# This extension might ostensibly be used to represent up/down database migrations. +# """ +# +# tags = {"up"} +# +# def parse(self, parser: Parser) -> Union[Node, List[Node]]: +# """Parse the up/down blocks.""" +# # {% up 'migration name' %} +# next(parser.stream) # skip the "up" token +# parser.parse_expression() # skip the name of this migration +# up_body = parser.parse_statements(("name:down",)) +# # {% down %} +# next(parser.stream) # skip the "down" token +# down_body = parser.parse_statements(("name:end",)) +# # {% end %} +# next(parser.stream) +# +# # This is just a test, so output the blocks verbatim one after the other: +# return [nodes.Scope(up_body), nodes.Scope(down_body)] +# +# +# class DerivedJinjaTemplater(JinjaTemplater): +# """A templater that includes some custom Jinja tags. +# +# This is used for tests that show the templater can be extended for custom plugin +# templaters that support custom tags. +# """ +# +# name = "derivedtemplater" +# +# def _get_jinja_env(self, config=None): +# env = super()._get_jinja_env(config) +# env.add_extension(DBMigrationExtension) +# return env +# +# def _get_jinja_analyzer(self, raw_str: str, env: Environment) -> JinjaAnalyzer: +# return DerivedJinjaAnalyzer(raw_str, env) +# +# +# def _statement(*args, **kwargs): +# # NOTE: The standard dbt statement() call returns nothing. +# return "" +# +# +# def _load_result(*args, **kwargs): +# return "_load_result" +# +# +# @pytest.mark.parametrize( +# "raw_file,override_context,result,templater_class", +# [ +# ("", None, [], JinjaTemplater), +# ( +# "foo", +# None, +# [("literal", slice(0, 3, None), slice(0, 3, None))], +# JinjaTemplater, +# ), +# # Example with no loops +# ( +# "SELECT {{blah}}, boo {# comment #} from something", +# dict(blah="foobar"), +# [ +# ("literal", slice(0, 7, None), slice(0, 7, None)), +# ("templated", slice(7, 15, None), slice(7, 13, None)), +# ("literal", slice(15, 21, None), slice(13, 19, None)), +# ("comment", slice(21, 34, None), slice(19, 19, None)), +# ("literal", slice(34, 49, None), slice(19, 34, None)), +# ], +# JinjaTemplater, +# ), +# # Example with loops +# ( +# ( +# "SELECT {# A comment #} {{field}} {% for i in [1, 3, 7]%}, " +# "fld_{{i}}_x{% endfor %} FROM my_schema.{{my_table}} " +# ), +# dict(field="foobar", my_table="barfoo"), +# [ +# ("literal", slice(0, 7, None), slice(0, 7, None)), +# ("comment", slice(7, 22, None), slice(7, 7, None)), +# ("literal", slice(22, 23, None), slice(7, 8, None)), +# ("templated", slice(23, 32, None), slice(8, 14, None)), +# ("literal", slice(32, 33, None), slice(14, 15, None)), +# ("block_start", slice(33, 56, None), slice(15, 15, None)), +# ("literal", slice(56, 62, None), slice(15, 21, None)), +# ("templated", slice(62, 67, None), slice(21, 22, None)), +# ("literal", slice(67, 69, None), slice(22, 24, None)), +# ("block_end", slice(69, 81, None), slice(24, 24, None)), +# ("literal", slice(56, 62, None), slice(24, 30, None)), +# ("templated", slice(62, 67, None), slice(30, 31, None)), +# ("literal", slice(67, 69, None), slice(31, 33, None)), +# ("block_end", slice(69, 81, None), slice(33, 33, None)), +# ("literal", slice(56, 62, None), slice(33, 39, None)), +# ("templated", slice(62, 67, None), slice(39, 40, None)), +# ("literal", slice(67, 69, None), slice(40, 42, None)), +# ("block_end", slice(69, 81, None), slice(42, 42, None)), +# ("literal", slice(81, 97, None), slice(42, 58, None)), +# ("templated", slice(97, 109, None), slice(58, 64, None)), +# ("literal", slice(109, 110, None), slice(64, 65, None)), +# ], +# JinjaTemplater, +# ), +# # Example with loops (and utilising the end slice code) +# ( +# ( +# "SELECT {# A comment #} {{field}} {% for i in [1, 3, 7]%}, " +# "fld_{{i}}{% endfor %} FROM my_schema.{{my_table}} " +# ), +# dict(field="foobar", my_table="barfoo"), +# [ +# ("literal", slice(0, 7, None), slice(0, 7, None)), +# ("comment", slice(7, 22, None), slice(7, 7, None)), +# ("literal", slice(22, 23, None), slice(7, 8, None)), +# ("templated", slice(23, 32, None), slice(8, 14, None)), +# ("literal", slice(32, 33, None), slice(14, 15, None)), +# ("block_start", slice(33, 56, None), slice(15, 15, None)), +# ("literal", slice(56, 62, None), slice(15, 21, None)), +# ("templated", slice(62, 67, None), slice(21, 22, None)), +# ("block_end", slice(67, 79, None), slice(22, 22, None)), +# ("literal", slice(56, 62, None), slice(22, 28, None)), +# ("templated", slice(62, 67, None), slice(28, 29, None)), +# ("block_end", slice(67, 79, None), slice(29, 29, None)), +# ("literal", slice(56, 62, None), slice(29, 35, None)), +# ("templated", slice(62, 67, None), slice(35, 36, None)), +# ("block_end", slice(67, 79, None), slice(36, 36, None)), +# ("literal", slice(79, 95, None), slice(36, 52, None)), +# ("templated", slice(95, 107, None), slice(52, 58, None)), +# ("literal", slice(107, 108, None), slice(58, 59, None)), +# ], +# JinjaTemplater, +# ), +# # Test a trailing split, and some variables which don't refer anything. +# ( +# "{{ config(materialized='view') }}\n\nSELECT 1 FROM {{ source('finance', " +# "'reconciled_cash_facts') }}\n\n", +# dict( +# config=lambda *args, **kwargs: "", +# source=lambda *args, **kwargs: "finance_reconciled_cash_facts", +# ), +# [ +# ("templated", slice(0, 33, None), slice(0, 0, None)), +# ("literal", slice(33, 49, None), slice(0, 16, None)), +# ("templated", slice(49, 97, None), slice(16, 45, None)), +# ("literal", slice(97, 99, None), slice(45, 47, None)), +# ], +# JinjaTemplater, +# ), +# # Test splitting with a loop. +# ( +# "SELECT\n " +# "{% for i in [1, 2, 3] %}\n , " +# "c_{{i}}+42 AS the_meaning_of_li{{ 'f' * i }}\n " +# "{% endfor %}\n" +# "FROM my_table", +# None, +# [ +# ("literal", slice(0, 11, None), slice(0, 11, None)), +# ("block_start", slice(11, 35, None), slice(11, 11, None)), +# ("literal", slice(35, 48, None), slice(11, 24, None)), +# ("templated", slice(48, 53, None), slice(24, 25, None)), +# ("literal", slice(53, 77, None), slice(25, 49, None)), +# ("templated", slice(77, 90, None), slice(49, 50, None)), +# ("literal", slice(90, 95, None), slice(50, 55, None)), +# ("block_end", slice(95, 107, None), slice(55, 55, None)), +# ("literal", slice(35, 48, None), slice(55, 68, None)), +# ("templated", slice(48, 53, None), slice(68, 69, None)), +# ("literal", slice(53, 77, None), slice(69, 93, None)), +# ("templated", slice(77, 90, None), slice(93, 95, None)), +# ("literal", slice(90, 95, None), slice(95, 100, None)), +# ("block_end", slice(95, 107, None), slice(100, 100, None)), +# ("literal", slice(35, 48, None), slice(100, 113, None)), +# ("templated", slice(48, 53, None), slice(113, 114, None)), +# ("literal", slice(53, 77, None), slice(114, 138, None)), +# ("templated", slice(77, 90, None), slice(138, 141, None)), +# ("literal", slice(90, 95, None), slice(141, 146, None)), +# ("block_end", slice(95, 107, None), slice(146, 146, None)), +# ("literal", slice(107, 121, None), slice(146, 160, None)), +# ], +# JinjaTemplater, +# ), +# # Test an example where a block is removed entirely. +# ( +# "{% set thing %}FOO{% endset %} SELECT 1", +# None, +# [ +# ("block_start", slice(0, 15, None), slice(0, 0, None)), +# ("literal", slice(15, 18, None), slice(0, 0, None)), +# ("block_end", slice(18, 30, None), slice(0, 0, None)), +# ("literal", slice(30, 39, None), slice(0, 9, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Tests Jinja "include" directive. +# """{% include 'subdir/include_comment.sql' %} +# +# SELECT 1 +# """, +# None, +# [ +# ("templated", slice(0, 42, None), slice(0, 18, None)), +# ("literal", slice(42, 53, None), slice(18, 29, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Tests Jinja "import" directive. +# """{% import 'echo.sql' as echo %} +# +# SELECT 1 +# """, +# None, +# [ +# ("templated", slice(0, 31, None), slice(0, 0, None)), +# ("literal", slice(31, 42, None), slice(0, 11, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Tests Jinja "from import" directive.. +# """{% from 'echo.sql' import echo %} +# {% from 'echoecho.sql' import echoecho %} +# +# SELECT +# {{ echo("foo") }}, +# {{ echoecho("bar") }} +# """, +# None, +# [ +# ("templated", slice(0, 33, None), slice(0, 0, None)), +# ("literal", slice(33, 34, None), slice(0, 1, None)), +# ("templated", slice(34, 75, None), slice(1, 1, None)), +# ("literal", slice(75, 88, None), slice(1, 14, None)), +# ("templated", slice(88, 105, None), slice(14, 19, None)), +# ("literal", slice(105, 111, None), slice(19, 25, None)), +# ("templated", slice(111, 132, None), slice(25, 34, None)), +# ("literal", slice(132, 133, None), slice(34, 35, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Tests Jinja "do" directive. Should be treated as a +# # templated instead of block - issue 4603. +# """{% do true %} +# +# {% if true %} +# select 1 +# {% endif %}""", +# None, +# [ +# ("templated", slice(0, 13, None), slice(0, 0, None)), +# ("literal", slice(13, 15, None), slice(0, 2, None)), +# ("block_start", slice(15, 28, None), slice(2, 2, None)), +# ("literal", slice(28, 42, None), slice(2, 16, None)), +# ("block_end", slice(42, 53, None), slice(16, 16, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Tests issue 2541, a bug where the {%- endfor %} was causing +# # IndexError: list index out of range. +# """{% for x in ['A', 'B'] %} +# {% if x != 'A' %} +# SELECT 'E' +# {% endif %} +# {%- endfor %} +# """, +# None, +# [ +# ("block_start", slice(0, 25, None), slice(0, 0, None)), +# ("literal", slice(25, 30, None), slice(0, 5, None)), +# ("block_start", slice(30, 47, None), slice(5, 5, None)), +# ("block_end", slice(67, 78, None), slice(5, 5, None)), +# ("literal", slice(78, 79, None), slice(5, 5, None)), +# ("block_end", slice(79, 92, None), slice(5, 5, None)), +# ("literal", slice(25, 30, None), slice(5, 10, None)), +# ("block_start", slice(30, 47, None), slice(10, 10, None)), +# ("literal", slice(47, 67, None), slice(10, 30, None)), +# ("block_end", slice(67, 78, None), slice(30, 30, None)), +# ("literal", slice(78, 79, None), slice(30, 30, None)), +# ("block_end", slice(79, 92, None), slice(30, 30, None)), +# ("literal", slice(92, 93, None), slice(30, 31, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Similar to the test above for issue 2541, but it's even trickier: +# # whitespace control everywhere and NO NEWLINES or other characters +# # between Jinja segments. In order to get a thorough-enough trace, +# # JinjaTracer has to build the alternate template with whitespace +# # control removed, as this increases the amount of trace output. +# "{%- for x in ['A', 'B'] -%}" +# "{%- if x == 'B' -%}" +# "SELECT 'B';" +# "{%- endif -%}" +# "{%- if x == 'A' -%}" +# "SELECT 'A';" +# "{%- endif -%}" +# "{%- endfor -%}", +# None, +# [ +# ("block_start", slice(0, 27, None), slice(0, 0, None)), +# ("block_start", slice(27, 46, None), slice(0, 0, None)), +# ("block_end", slice(57, 70, None), slice(0, 0, None)), +# ("block_start", slice(70, 89, None), slice(0, 0, None)), +# ("literal", slice(89, 100, None), slice(0, 11, None)), +# ("block_end", slice(100, 113, None), slice(11, 11, None)), +# ("block_end", slice(113, 127, None), slice(11, 11, None)), +# ("block_start", slice(27, 46, None), slice(11, 11, None)), +# ("literal", slice(46, 57, None), slice(11, 22, None)), +# ("block_end", slice(57, 70, None), slice(22, 22, None)), +# ("block_start", slice(70, 89, None), slice(22, 22, None)), +# ("block_end", slice(100, 113, None), slice(22, 22, None)), +# ("block_end", slice(113, 127, None), slice(22, 22, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Test for issue 2786. Also lots of whitespace control. In this +# # case, removing whitespace control alone wasn't enough. In order +# # to get a good trace, JinjaTracer had to be updated so the +# # alternate template included output for the discarded whitespace. +# """select +# id, +# {%- for features in ["value4", "value5"] %} +# {%- if features in ["value7"] %} +# {{features}} +# {%- if not loop.last -%},{% endif %} +# {%- else -%} +# {{features}} +# {%- if not loop.last -%},{% endif %} +# {%- endif -%} +# {%- endfor %} +# from my_table +# """, +# None, +# [ +# ("literal", slice(0, 14, None), slice(0, 14, None)), +# ("literal", slice(14, 19, None), slice(14, 14, None)), +# ("block_start", slice(19, 62, None), slice(14, 14, None)), +# ("literal", slice(62, 71, None), slice(14, 14, None)), +# ("block_start", slice(71, 103, None), slice(14, 14, None)), +# ("block_mid", slice(186, 198, None), slice(14, 14, None)), +# ("literal", slice(198, 211, None), slice(14, 14, None)), +# ("templated", slice(211, 223, None), slice(14, 20, None)), +# ("literal", slice(223, 236, None), slice(20, 20, None)), +# ("block_start", slice(236, 260, None), slice(20, 20, None)), +# ("literal", slice(260, 261, None), slice(20, 21, None)), +# ("block_end", slice(261, 272, None), slice(21, 21, None)), +# ("literal", slice(272, 281, None), slice(21, 21, None)), +# ("block_end", slice(281, 294, None), slice(21, 21, None)), +# ("literal", slice(294, 299, None), slice(21, 21, None)), +# ("block_end", slice(299, 312, None), slice(21, 21, None)), +# ("literal", slice(62, 71, None), slice(21, 21, None)), +# ("block_start", slice(71, 103, None), slice(21, 21, None)), +# ("block_mid", slice(186, 198, None), slice(21, 21, None)), +# ("literal", slice(198, 211, None), slice(21, 21, None)), +# ("templated", slice(211, 223, None), slice(21, 27, None)), +# ("literal", slice(223, 236, None), slice(27, 27, None)), +# ("block_start", slice(236, 260, None), slice(27, 27, None)), +# ("block_end", slice(261, 272, None), slice(27, 27, None)), +# ("literal", slice(272, 281, None), slice(27, 27, None)), +# ("block_end", slice(281, 294, None), slice(27, 27, None)), +# ("literal", slice(294, 299, None), slice(27, 27, None)), +# ("block_end", slice(299, 312, None), slice(27, 27, None)), +# ("literal", slice(312, 327, None), slice(27, 42, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Test for issue 2835. There's no space between "col" and "=". +# # Also tests for issue 3750 that self contained set statements +# # are parsed as "templated" and not "block_start". +# """{% set col= "col1" %} +# SELECT {{ col }} +# """, +# None, +# [ +# ("templated", slice(0, 21, None), slice(0, 0, None)), +# ("literal", slice(21, 29, None), slice(0, 8, None)), +# ("templated", slice(29, 38, None), slice(8, 12, None)), +# ("literal", slice(38, 39, None), slice(12, 13, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Another test for issue 2835. The {% for %} loop inside the +# # {% set %} caused JinjaTracer to think the {% set %} ended +# # at the {% endfor %} +# """{% set some_part_of_the_query %} +# {% for col in ["col1"] %} +# {{col}} +# {% endfor %} +# {% endset %} +# +# SELECT {{some_part_of_the_query}} +# FROM SOME_TABLE +# """, +# None, +# [ +# ("block_start", slice(0, 32, None), slice(0, 0, None)), +# ("literal", slice(32, 37, None), slice(0, 0, None)), +# ("block_start", slice(37, 62, None), slice(0, 0, None)), +# ("literal", slice(62, 67, None), slice(0, 0, None)), +# ("templated", slice(67, 74, None), slice(0, 0, None)), +# ("literal", slice(74, 79, None), slice(0, 0, None)), +# ("block_end", slice(79, 91, None), slice(0, 0, None)), +# ("literal", slice(91, 92, None), slice(0, 0, None)), +# ("block_end", slice(92, 104, None), slice(0, 0, None)), +# ("literal", slice(104, 113, None), slice(0, 9, None)), +# ("templated", slice(113, 139, None), slice(9, 29, None)), +# ("literal", slice(139, 156, None), slice(29, 46, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Third test for issue 2835. This was the original SQL provided in +# # the issue report. +# # Also tests for issue 3750 that self contained set statements +# # are parsed as "templated" and not "block_start". +# """{% set whitelisted= [ +# {'name': 'COL_1'}, +# {'name': 'COL_2'}, +# {'name': 'COL_3'} +# ] %} +# +# {% set some_part_of_the_query %} +# {% for col in whitelisted %} +# {{col.name}}{{ ", " if not loop.last }} +# {% endfor %} +# {% endset %} +# +# SELECT {{some_part_of_the_query}} +# FROM SOME_TABLE +# """, +# None, +# [ +# ("templated", slice(0, 94, None), slice(0, 0, None)), +# ("literal", slice(94, 96, None), slice(0, 2, None)), +# ("block_start", slice(96, 128, None), slice(2, 2, None)), +# ("literal", slice(128, 133, None), slice(2, 2, None)), +# ("block_start", slice(133, 161, None), slice(2, 2, None)), +# ("literal", slice(161, 166, None), slice(2, 2, None)), +# ("templated", slice(166, 178, None), slice(2, 2, None)), +# ("templated", slice(178, 205, None), slice(2, 2, None)), +# ("literal", slice(205, 210, None), slice(2, 2, None)), +# ("block_end", slice(210, 222, None), slice(2, 2, None)), +# ("literal", slice(222, 223, None), slice(2, 2, None)), +# ("block_end", slice(223, 235, None), slice(2, 2, None)), +# ("literal", slice(235, 244, None), slice(2, 11, None)), +# ("templated", slice(244, 270, None), slice(11, 66, None)), +# ("literal", slice(270, 287, None), slice(66, 83, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Test for issue 2822: Handle slicing when there's no newline after +# # the Jinja block end. +# "{% if true %}\nSELECT 1 + 1\n{%- endif %}", +# None, +# [ +# ("block_start", slice(0, 13, None), slice(0, 0, None)), +# ("literal", slice(13, 26, None), slice(0, 13, None)), +# ("literal", slice(26, 27, None), slice(13, 13, None)), +# ("block_end", slice(27, 39, None), slice(13, 13, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Test for issue 3434: Handle {% block %}. +# "SELECT {% block table_name %}block_contents{% endblock %} " +# "FROM {{ self.table_name() }}\n", +# None, +# [ +# ("literal", slice(0, 7, None), slice(0, 7, None)), +# ("literal", slice(29, 43, None), slice(7, 21, None)), +# ("block_start", slice(7, 29, None), slice(21, 21, None)), +# ("literal", slice(29, 43, None), slice(21, 21, None)), +# ("block_end", slice(43, 57, None), slice(21, 21, None)), +# ("literal", slice(57, 63, None), slice(21, 27, None)), +# ("templated", slice(63, 86, None), slice(27, 27, None)), +# ("literal", slice(29, 43, None), slice(27, 41, None)), +# ("literal", slice(86, 87, None), slice(41, 42, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Another test for issue 3434: Similar to the first, but uses +# # the block inside a loop. +# """{% block table_name %}block_contents{% endblock %} +# SELECT +# {% for j in [4, 5, 6] %} +# FROM {{ j }}{{ self.table_name() }} +# {% endfor %} +# """, +# None, +# [ +# ("literal", slice(22, 36, None), slice(0, 14, None)), +# ("block_start", slice(0, 22, None), slice(14, 14, None)), +# ("literal", slice(22, 36, None), slice(14, 14, None)), +# ("block_end", slice(36, 50, None), slice(14, 14, None)), +# ("literal", slice(50, 58, None), slice(14, 22, None)), +# ("block_start", slice(58, 82, None), slice(22, 22, None)), +# ("literal", slice(82, 88, None), slice(22, 28, None)), +# ("templated", slice(88, 95, None), slice(28, 29, None)), +# ("templated", slice(95, 118, None), slice(29, 29, None)), +# ("literal", slice(22, 36, None), slice(29, 43, None)), +# ("literal", slice(118, 119, None), slice(43, 44, None)), +# ("block_end", slice(119, 131, None), slice(44, 44, None)), +# ("literal", slice(82, 88, None), slice(44, 50, None)), +# ("templated", slice(88, 95, None), slice(50, 51, None)), +# ("templated", slice(95, 118, None), slice(51, 51, None)), +# ("literal", slice(22, 36, None), slice(51, 65, None)), +# ("literal", slice(118, 119, None), slice(65, 66, None)), +# ("block_end", slice(119, 131, None), slice(66, 66, None)), +# ("literal", slice(82, 88, None), slice(66, 72, None)), +# ("templated", slice(88, 95, None), slice(72, 73, None)), +# ("templated", slice(95, 118, None), slice(73, 73, None)), +# ("literal", slice(22, 36, None), slice(73, 87, None)), +# ("literal", slice(118, 119, None), slice(87, 88, None)), +# ("block_end", slice(119, 131, None), slice(88, 88, None)), +# ("literal", slice(131, 132, None), slice(88, 89, None)), +# ], +# JinjaTemplater, +# ), +# ( +# "{{ statement('variables', fetch_result=true) }}\n", +# dict( +# statement=_statement, +# load_result=_load_result, +# ), +# [ +# ("templated", slice(0, 47, None), slice(0, 0, None)), +# ("literal", slice(47, 48, None), slice(0, 1, None)), +# ], +# JinjaTemplater, +# ), +# ( +# "{% call statement('variables', fetch_result=true) %}\n" +# "select 1 as test\n" +# "{% endcall %}\n" +# "select 2 as foo\n", +# dict( +# statement=_statement, +# load_result=_load_result, +# ), +# [ +# ("block_start", slice(0, 52, None), slice(0, 0, None)), +# ("literal", slice(52, 70, None), slice(0, 0, None)), +# ("block_end", slice(70, 83, None), slice(0, 0, None)), +# ("literal", slice(83, 100, None), slice(0, 17, None)), +# ], +# JinjaTemplater, +# ), +# ( +# JINJA_MACRO_CALL_SQL, +# None, +# [ +# # First all of this is the call block. +# ("block_start", slice(0, 30, None), slice(0, 0, None)), +# ("literal", slice(30, 34, None), slice(0, 0, None)), +# ("templated", slice(34, 45, None), slice(0, 0, None)), +# ("literal", slice(45, 55, None), slice(0, 0, None)), +# ("templated", slice(55, 69, None), slice(0, 0, None)), +# ("literal", slice(69, 70, None), slice(0, 0, None)), +# ("block_end", slice(70, 84, None), slice(0, 0, None)), +# # Then the actual query. +# ("literal", slice(84, 96, None), slice(0, 12, None)), +# # The block_start (call) contains the actual content. +# ("block_start", slice(96, 125, None), slice(12, 47, None)), +# # The middle and end of the call, have zero length in the template +# ("literal", slice(125, 142, None), slice(47, 47, None)), +# ("block_end", slice(142, 155, None), slice(47, 47, None)), +# ("literal", slice(155, 165, None), slice(47, 57, None)), +# ], +# JinjaTemplater, +# ), +# ( +# # Simple test of a derived templater with custom tags +# """{% up 'create table xyz' %} +# CREATE TABLE xyz (id int); +# {% down %} +# DROP TABLE xyz; +# {% end %}""", +# None, +# [ +# ("block_start", slice(0, 27, None), slice(0, 0, None)), +# ("literal", slice(27, 59, None), slice(0, 32, None)), +# ("block_mid", slice(59, 69, None), slice(32, 32, None)), +# ("literal", slice(69, 90, None), slice(32, 53, None)), +# ("block_end", slice(90, 99, None), slice(53, 53, None)), +# ], +# DerivedJinjaTemplater, +# ), +# ( +# # test for issue 6121: The first rendered element +# # inside the loop is far from the start position of the loop. +# """ +# {% for i in range(2) %}{% set a = 0 %}{% set b = 0 %}{% set c = 0 %} +# SELECT 1; +# {% endfor %} +# """, +# None, +# [ +# ("literal", slice(0, 1, None), slice(0, 1, None)), +# ("block_start", slice(1, 24, None), slice(1, 1, None)), +# ("templated", slice(24, 39, None), slice(1, 1, None)), +# ("templated", slice(39, 54, None), slice(1, 1, None)), +# ("templated", slice(54, 69, None), slice(1, 1, None)), +# ("literal", slice(69, 80, None), slice(1, 12, None)), +# ("block_end", slice(80, 92, None), slice(12, 12, None)), +# ("templated", slice(24, 39, None), slice(12, 12, None)), +# ("templated", slice(39, 54, None), slice(12, 12, None)), +# ("templated", slice(54, 69, None), slice(12, 12, None)), +# ("literal", slice(69, 80, None), slice(12, 23, None)), +# ("block_end", slice(80, 92, None), slice(23, 23, None)), +# ("literal", slice(92, 93, None), slice(23, 24, None)), +# ], +# JinjaTemplater, +# ), +# ], +# ) +# def test__templater_jinja_slice_file( +# raw_file, override_context, result, templater_class, caplog +# ): +# """Test slice_file.""" +# templater = templater_class(override_context=override_context) +# _, _, render_func = templater.construct_render_func( +# config=FluffConfig.from_path( +# "test/fixtures/templater/jinja_slice_template_macros" +# ) +# ) +# +# with caplog.at_level(logging.DEBUG, logger="sqlfluff.templater"): +# raw_sliced, sliced_file, templated_str = templater.slice_file( +# raw_file, render_func=render_func +# ) +# # Create a TemplatedFile from the results. This runs some useful sanity +# # checks. +# _ = TemplatedFile(raw_file, "<>", templated_str, sliced_file, raw_sliced) +# # Check contiguous on the TEMPLATED VERSION +# print(sliced_file) +# prev_slice = None +# for elem in sliced_file: +# print(elem) +# if prev_slice: +# assert elem[2].start == prev_slice.stop +# prev_slice = elem[2] +# # Check that all literal segments have a raw slice +# for elem in sliced_file: +# if elem[0] == "literal": +# assert elem[1] is not None +# # check result +# actual = [ +# ( +# templated_file_slice.slice_type, +# templated_file_slice.source_slice, +# templated_file_slice.templated_slice, +# ) +# for templated_file_slice in sliced_file +# ] +# assert actual == result +# +# +# def test__templater_jinja_large_file_check(): +# """Test large file skipping. +# +# The check is separately called on each .process() method +# so it makes sense to test a few templaters. +# """ +# # First check we can process the file normally without specific config. +# # i.e. check the defaults work and the default is high. +# JinjaTemplater().process( +# in_str="SELECT 1", +# fname="", +# config=FluffConfig(overrides={"dialect": "ansi"}), +# ) +# # Second check setting the value low disables the check +# JinjaTemplater().process( +# in_str="SELECT 1", +# fname="", +# config=FluffConfig( +# overrides={"dialect": "ansi", "large_file_skip_char_limit": 0} +# ), +# ) +# # Finally check we raise a skip exception when config is set low. +# with pytest.raises(SQLFluffSkipFile) as excinfo: +# JinjaTemplater().process( +# in_str="SELECT 1", +# fname="", +# config=FluffConfig( +# overrides={"dialect": "ansi", "large_file_skip_char_limit": 2}, +# ), +# ) +# +# assert "Length of file" in str(excinfo.value) +# +# +# @pytest.mark.parametrize( +# "in_str, ignore, expected_violation", +# [ +# ( +# """WITH a AS ({{ b(c=d, e=f) }}) SELECT * FROM final""", +# "", +# SQLTemplaterError("Undefined jinja template variable: 'b'"), +# ), +# ("""WITH a AS ({{ b(c=d, e=f) }}) SELECT * FROM final""", "templating", None), +# ( +# # https://github.com/sqlfluff/sqlfluff/issues/6360 +# """{% for tbl in tbl_list %}SELECT a FROM {{ tbl }};{% endfor %}""", +# "", +# SQLTemplaterError("Undefined jinja template variable: 'tbl_list'"), +# ), +# ( +# """SELECT a FROM {{ tbl['name'] }};""", +# "", +# SQLTemplaterError("Undefined jinja template variable: 'tbl'"), +# ), +# ], +# ) +# def test_jinja_undefined_callable(in_str, ignore, expected_violation): +# """Test undefined callable returns TemplatedFile and sensible error.""" +# templater = JinjaTemplater() +# templated_file, violations = templater.process( +# in_str=in_str, +# fname="test.sql", +# config=FluffConfig(overrides={"dialect": "ansi", "ignore": ignore}), +# ) +# # This was previously failing to process, due to UndefinedRecorder not +# # supporting __call__(), also Jinja thinking it was not *safe* to call. +# assert templated_file is not None +# if expected_violation: +# assert len(violations) == 1 +# isinstance(violations[0], type(expected_violation)) +# assert str(violations[0]) == str(expected_violation) +# else: +# assert len(violations) == 0 +# +# +# def test_dummy_undefined_fail_with_undefined_error(): +# """Tests that a recursion error bug no longer occurs.""" +# ud = DummyUndefined("name") +# with pytest.raises(UndefinedError): +# # This was previously causing a recursion error. +# ud._fail_with_undefined_error() +# +# +# def test_undefined_magic_methods(): +# """Test all the magic methods defined on DummyUndefined.""" +# ud = DummyUndefined("name") +# +# # _self_impl +# assert ud + ud is ud +# assert ud - ud is ud +# assert ud / ud is ud +# assert ud // ud is ud +# assert ud % ud is ud +# assert ud**ud is ud +# assert +ud is ud +# assert -ud is ud +# assert ud << ud is ud +# assert ud[ud] is ud +# assert ~ud is ud +# assert ud(ud) is ud +# +# # _bool_impl +# assert ud and ud +# assert ud or ud +# assert ud ^ ud +# assert bool(ud) +# assert ud < ud +# assert ud <= ud +# assert ud == ud +# assert ud != ud +# assert ud >= ud +# assert ud > ud +# +# assert ud + ud is ud +# +# +# @pytest.mark.parametrize( +# "sql_path, expected_renderings", +# [ +# pytest.param( +# "simple_if_true.sql", +# [ +# "\nSELECT 1\n\n", +# "\nSELECT 2\n\n", +# ], +# id="simple_if_true", +# ), +# pytest.param( +# "simple_if_false.sql", +# [ +# "\nSELECT 2\n\n", +# "\nSELECT 1\n\n", +# ], +# id="simple_if_false", +# ), +# pytest.param( +# "if_elif_else.sql", +# [ +# "\nSELECT 1\n\n", +# "\nSELECT 2\n\n", +# "\nSELECT 3\n\n", +# ], +# id="if_elif_else", +# ), +# pytest.param( +# "if_else_if_nested.sql", +# [ +# "\nSELECT 1\n\n", +# "\n\nSELECT 2\n\n\n", +# "\n\nSELECT 3\n\n\n", +# ], +# id="if_else_if_nested", +# ), +# # This test case exercises the scoring function. Generates up to 10 +# # variants, but only the top 5 are returned. +# pytest.param( +# "if_elif_else_chain_scoring.sql", +# [ +# "\nSELECT 1\n\n", +# "\nSELECT 100000000\n\n", +# "\nSELECT 10000000\n\n", +# "\nSELECT 1000000\n\n", +# "\nSELECT 100000\n\n", +# "\nSELECT 10000\n\n", +# ], +# id="if_elif_else_chain_scoring", +# ), +# # This test case results in a TypeError executing the variant. This +# # should be ignored, and only the primary should be returned. +# pytest.param( +# "if_true_elif_type_error_else.sql", +# [ +# "\nSELECT 1\n\n", +# "\nSELECT 2\n\n", +# ], +# id="if_true_elif_type_error_else", +# ), +# # https://github.com/sqlfluff/sqlfluff/issues/5803 +# pytest.param( +# "inline_select.sql", +# [ +# "select 2\n", +# "select 1\n", +# ], +# id="inline_select", +# ), +# ], +# ) +# def test__templater_lint_unreached_code(sql_path: str, expected_renderings): +# """Test that Jinja templater slices raw and templated file correctly.""" +# test_dir = Path("test/fixtures/templater/jinja_lint_unreached_code") +# t = JinjaTemplater() +# renderings = [] +# raw_slicings = [] +# final_source_slices = [] +# for templated_file, _ in t.process_with_variants( +# in_str=(test_dir / sql_path).read_text(), +# fname=str(sql_path), +# config=FluffConfig.from_path(str(test_dir)), +# ): +# renderings.append(templated_file.templated_str) +# raw_slicings.append(templated_file.raw_sliced) +# # Capture the final slice for all of them. +# final_source_slices.append(templated_file.sliced_file[-1].source_slice) +# assert renderings == expected_renderings +# # Compare all of the additional raw slicings to make sure they're the +# # same as the root. +# root_slicing = raw_slicings[0] +# for additional_slicing in raw_slicings[1:]: +# assert additional_slicing == root_slicing +# # Check that the final source slices also line up in the templated files. +# # NOTE: Clearly the `templated_slice` values _won't_ be the same. +# # We're doing the _final_ slice, because it's very likely to be the same +# # _type_ and if it's in the right place, we can assume that all of the +# # others probably are. +# root_final_slice = final_source_slices[0] +# for additional_final_slice in final_source_slices[1:]: +# assert additional_final_slice == root_final_slice diff --git a/crates/lib/src/templaters/jinja_templater_tracers.py b/crates/lib/src/templaters/jinja_templater_tracers.py new file mode 100644 index 000000000..b18a5d336 --- /dev/null +++ b/crates/lib/src/templaters/jinja_templater_tracers.py @@ -0,0 +1,905 @@ +"""'Trace' Jinja template execution to map output back to the raw template. + +This is a newer slicing algorithm that handles cases heuristic.py does not. +""" + +# Import annotations for py 3.7 to allow `regex.Match[str]` +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import ( + Callable, + ClassVar, + Dict, + List, + NamedTuple, + Optional, + Tuple, + Union, + cast, +) + +import regex +from jinja2 import Environment +from jinja2.exceptions import TemplateSyntaxError + +from templaters.python_templater import RawFileSlice, TemplatedFileSlice + +# Instantiate the templater logger +templater_logger = logging.getLogger("sqlfluff.templater") + + +class JinjaTrace(NamedTuple): + """Returned by JinjaTracer.trace().""" + + # Template output + templated_str: str + # Raw (i.e. before rendering) Jinja template sliced into tokens + raw_sliced: List[RawFileSlice] + # Rendered Jinja template (i.e. output) mapped back to rwa_str source + sliced_file: List[TemplatedFileSlice] + + +@dataclass +class RawSliceInfo: + """JinjaTracer-specific info about each RawFileSlice.""" + + unique_alternate_id: Optional[str] + alternate_code: Optional[str] + next_slice_indices: List[int] = field(default_factory=list) + inside_block: bool = field(default=False) # {% block %} + + +class JinjaTracer: + """Records execution path of a Jinja template.""" + + def __init__( + self, + raw_str: str, + raw_sliced: List[RawFileSlice], + raw_slice_info: Dict[RawFileSlice, RawSliceInfo], + sliced_file: List[TemplatedFileSlice], + render_func: Callable[[str], str], + ): + # Input + self.raw_str = raw_str + self.raw_sliced = raw_sliced + self.raw_slice_info = raw_slice_info + self.sliced_file = sliced_file + self.render_func = render_func + + # Internal bookkeeping + self.program_counter: int = 0 + self.source_idx: int = 0 + + def trace( + self, + append_to_templated: str = "", + ) -> JinjaTrace: + """Executes raw_str. Returns template output and trace.""" + trace_template_str = "".join( + ( + cast(str, self.raw_slice_info[rs].alternate_code) + if self.raw_slice_info[rs].alternate_code is not None + else rs.raw + ) + for rs in self.raw_sliced + ) + trace_template_output = self.render_func(trace_template_str) + # Split output by section. Each section has two possible formats. + trace_entries: List[regex.Match[str]] = list( + regex.finditer(r"\0", trace_template_output) + ) + # If the file has no templated entries, we should just iterate + # through the raw slices to add all the placeholders. + if not trace_entries: + for raw_idx, _ in enumerate(self.raw_sliced): + self.record_trace(0, raw_idx) + + for match_idx, match in enumerate(trace_entries): + pos1 = match.span()[0] + try: + pos2 = trace_entries[match_idx + 1].span()[0] + except IndexError: + pos2 = len(trace_template_output) + p = trace_template_output[pos1 + 1 : pos2] + m_id = regex.match(r"^([0-9a-f]+)(_(\d+))?", p) + if not m_id: + raise ValueError( # pragma: no cover + "Internal error. Trace template output does not match expected " + "format." + ) + if m_id.group(3): + # E.g. "00000000000000000000000000000001_83". The number after + # "_" is the length (in characters) of a corresponding literal + # in raw_str. + alt_id, slice_length = m_id.group(1), int(m_id.group(3)) + else: + # E.g. "00000000000000000000000000000002 a < 10". The characters + # after the slice ID are executable code from raw_str. + alt_id, slice_length = m_id.group(0), len(p[len(m_id.group(0)) + 1 :]) + + target_slice_idx = self.find_slice_index(alt_id) + target_inside_block = self.raw_slice_info[ + self.raw_sliced[target_slice_idx] + ].inside_block + if not target_inside_block: + # Normal case: Walk through the template. + self.move_to_slice(target_slice_idx, slice_length) + else: + # {% block %} executes code elsewhere in the template but does + # not move there. It's a bit like macro invocation. + self.record_trace(slice_length, target_slice_idx) + + # TRICKY: The 'append_to_templated' parameter is only used by the dbt + # templater, passing "\n" for this parameter if we need to add one back. + # (The Jinja templater does not pass this parameter, so + # 'append_to_templated' gets the default value of "", empty string.) + # For more detail, see the comments near the call to slice_file() in + # plugins/sqlfluff-templater-dbt/sqlfluff_templater_dbt/templater.py. + templated_str = self.render_func(self.raw_str) + append_to_templated + return JinjaTrace(templated_str, self.raw_sliced, self.sliced_file) + + def find_slice_index(self, slice_identifier: Union[int, str]) -> int: + """Given a slice identifier, return its index. + + A slice identifier is a string like 00000000000000000000000000000002. + """ + raw_slices_search_result = [ + idx + for idx, rs in enumerate(self.raw_sliced) + if self.raw_slice_info[rs].unique_alternate_id == slice_identifier + ] + if len(raw_slices_search_result) != 1: + raise ValueError( # pragma: no cover + f"Internal error. Unable to locate slice for {slice_identifier}." + ) + return raw_slices_search_result[0] + + def move_to_slice( + self, + target_slice_idx: int, + target_slice_length: int, + ) -> Dict[int, List[int]]: + """Given a template location, walk execution to that point. + + This updates the internal `program_counter` to the appropriate + location. + + Returns: + :obj:`dict`: For each step in the template, a :obj:`list` of + which steps are accessible. In many cases each step will + only have one accessible next step (the following one), + however for branches in the program there may be more than + one. + """ + step_candidates = {} + while self.program_counter < len(self.raw_sliced): + self.record_trace( + target_slice_length if self.program_counter == target_slice_idx else 0 + ) + current_raw_slice = self.raw_sliced[self.program_counter] + if self.program_counter == target_slice_idx: + # Reached the target slice. Go to next location and stop. + self.program_counter += 1 + break + + # Choose the next step. + # We could simply go to the next slice (sequential execution). + candidates = [self.program_counter + 1] + # If we have other options, consider those. + candidates.extend( + filter( + # They're a valid possibility if + # they don't take us past the target. + lambda idx: idx <= target_slice_idx, + self.raw_slice_info[current_raw_slice].next_slice_indices, + ) + ) + # Choose the candidate that takes us closest to the target. + candidates.sort(key=lambda c: abs(target_slice_idx - c)) + # Save all the candidates for each step so we can return them later. + step_candidates[self.program_counter] = candidates + # Step forward to the best step found. + + # https://github.com/sqlfluff/sqlfluff/issues/6121 + next_indices = self.raw_slice_info[current_raw_slice].next_slice_indices + if ( + current_raw_slice.tag == "endfor" # noqa + # Elements of inside_set_macro_or_call have empty next_slice_indices + and next_indices + # The next_slice_indices[0] of the 'endfor' is the first element of the + # loop.If the target is within the current loop, the program_counter + # should move to the first element of this loop. Otherwise, it will + # exit this loop and lose the content rendered in the next iteration + and next_indices[0] <= target_slice_idx < self.program_counter + ): + self.program_counter = next_indices[0] + else: + self.program_counter = candidates[0] + + # Return the candidates at each step. + return step_candidates + + def record_trace( + self, + target_slice_length: int, + slice_idx: Optional[int] = None, + slice_type: Optional[str] = None, + ) -> None: + """Add the specified (default: current) location to the trace. + + Args: + target_slice_length (int): The length of the target slice. + slice_idx (Optional[int], optional): The index of the slice. + Defaults to None. + slice_type (Optional[str], optional): The type of the slice. + Defaults to None. + """ + if slice_idx is None: + slice_idx = self.program_counter + if slice_type is None: + slice_type = self.raw_sliced[slice_idx].slice_type + self.sliced_file.append( + TemplatedFileSlice( + slice_type, + slice( + self.raw_sliced[slice_idx].source_idx, + ( + self.raw_sliced[slice_idx + 1].source_idx + if slice_idx + 1 < len(self.raw_sliced) + else len(self.raw_str) + ), + ), + slice(self.source_idx, self.source_idx + target_slice_length), + ) + ) + if target_slice_length: + self.source_idx += target_slice_length + + +@dataclass(frozen=True) +class JinjaTagConfiguration: + """Provides information about a Jinja tag and how it affects JinjaAnalyzer behavior. + + Attributes: + block_type (str): The block type that the Jinja tag maps to; eventually stored + in TemplatedFileSlice.slice_type and RawFileSlice.slice_type. + block_tracking (bool): Whether the Jinja tag should be traced by JinjaTracer. + If True, the Jinja tag will be treated as a conditional block similar to a + "for/endfor" or "if/else/endif" block, and JinjaTracer will track potential + execution path through the block. + block_may_loop (bool): Whether the Jinja tag begins a block that might loop, + similar to a "for" tag. If True, JinjaTracer will track the execution path + through the block and record a potential backward jump to the loop + beginning. + """ + + block_type: str + block_tracking: bool = False + block_may_loop: bool = False + + +class JinjaAnalyzer: + """Analyzes a Jinja template to prepare for tracing.""" + + re_open_tag = regex.compile(r"^\s*({[{%])[\+\-]?\s*") + re_close_tag = regex.compile(r"\s*[\+\-]?([}%]})\s*$") + + def __init__(self, raw_str: str, env: Environment) -> None: + # Input + self.raw_str: str = raw_str + self.env = env + + # Output + self.raw_sliced: List[RawFileSlice] = [] + self.raw_slice_info: Dict[RawFileSlice, RawSliceInfo] = {} + self.sliced_file: List[TemplatedFileSlice] = [] + + # Internal bookkeeping + self.slice_id: int = 0 + # {% set %} or {% macro %} or {% call %} + self.inside_set_macro_or_call: bool = False + self.inside_block = False # {% block %} + self.stack: List[int] = [] + self.idx_raw: int = 0 + + __known_tag_configurations: ClassVar[dict[str, JinjaTagConfiguration]] = { + # Conditional blocks: "if/elif/else/endif" blocks + "if": JinjaTagConfiguration( + block_type="block_start", + block_tracking=True, + ), + "elif": JinjaTagConfiguration( + block_type="block_mid", + block_tracking=True, + ), + # NOTE: "else" is also used in for loops if there are no iterations + "else": JinjaTagConfiguration( + block_type="block_mid", + block_tracking=True, + ), + "endif": JinjaTagConfiguration( + block_type="block_end", + block_tracking=True, + ), + # Conditional blocks: "for" loops + "for": JinjaTagConfiguration( + block_type="block_start", + block_tracking=True, + block_may_loop=True, + ), + "endfor": JinjaTagConfiguration( + block_type="block_end", + block_tracking=True, + ), + # Inclusions and imports + # :TRICKY: Syntactically, the Jinja {% include %} directive looks like + # a block, but its behavior is basically syntactic sugar for + # {{ open("somefile).read() }}. Thus, treat it as templated code. + # It's a similar situation with {% import %} and {% from ... import %}. + "include": JinjaTagConfiguration( + block_type="templated", + ), + "import": JinjaTagConfiguration( + block_type="templated", + ), + "from": JinjaTagConfiguration( + block_type="templated", + ), + "extends": JinjaTagConfiguration( + block_type="block_start", + ), + # Macros and macro-like tags + "macro": JinjaTagConfiguration( + block_type="block_start", + ), + "endmacro": JinjaTagConfiguration( + block_type="block_end", + ), + "call": JinjaTagConfiguration( + block_type="block_start", + ), + "endcall": JinjaTagConfiguration( + block_type="block_end", + ), + "set": JinjaTagConfiguration( + block_type="block_start", + ), + "endset": JinjaTagConfiguration( + block_type="block_end", + ), + "block": JinjaTagConfiguration( + block_type="block_start", + ), + "endblock": JinjaTagConfiguration( + block_type="block_end", + ), + "filter": JinjaTagConfiguration( + block_type="block_start", + ), + "endfilter": JinjaTagConfiguration( + block_type="block_end", + ), + # Common extensions + # Expression statement (like {{ ... }} but doesn't actually print anything) + "do": JinjaTagConfiguration( + block_type="templated", + ), + } + + @classmethod + def _get_tag_configuration(cls, tag: str) -> JinjaTagConfiguration: + """Return information about the behaviors of a tag.""" + # Ideally, we should have a known configuration for this Jinja tag. Derived + # classes can override this method to provide additional information about the + # tags they know about. + if tag in cls.__known_tag_configurations: + return cls.__known_tag_configurations[tag] + + # If we don't have a firm configuration for this tag that is most likely + # provided by a Jinja extension, we'll try to make some guesses about it based + # on some heuristics. But there's a decent chance we'll get this wrong, and + # the user should instead consider overriding this method in a derived class to + # handle their tag types. + if tag.startswith("end"): + return JinjaTagConfiguration( + block_type="block_end", + ) + elif tag.startswith("el"): + # else, elif + return JinjaTagConfiguration( + block_type="block_mid", + ) + return JinjaTagConfiguration( + block_type="block_start", + ) + + def _get_jinja_tracer( + self, + raw_str: str, + raw_sliced: List[RawFileSlice], + raw_slice_info: Dict[RawFileSlice, RawSliceInfo], + sliced_file: List[TemplatedFileSlice], + render_func: Callable[[str], str], + ) -> JinjaTracer: + """Creates a new object derived from JinjaTracer. + + Derived classes can provide their own tracers with custom functionality. + """ + return JinjaTracer( + raw_str, raw_sliced, raw_slice_info, sliced_file, render_func + ) + + def next_slice_id(self) -> str: + """Returns a new, unique slice ID.""" + result = "{0:#0{1}x}".format(self.slice_id, 34)[2:] + self.slice_id += 1 + return result + + def slice_info_for_literal(self, length: int, prefix: str = "") -> RawSliceInfo: + """Returns a RawSliceInfo for a literal. + + In the alternate template, literals are replaced with a uniquely + numbered, easy-to-parse literal. JinjaTracer uses this output as + a "breadcrumb trail" to deduce the execution path through the template. + + This is important even if the original literal (i.e. in the raw SQL + file) was empty, as is the case when Jinja whitespace control is used + (e.g. "{%- endif -%}"), because fewer breadcrumbs means JinjaTracer has + to *guess* the path, in which case it assumes simple, straight-line + execution, which can easily be wrong with loops and conditionals. + """ + unique_alternate_id = self.next_slice_id() + alternate_code = f"\0{prefix}{unique_alternate_id}_{length}" + return self.make_raw_slice_info( + unique_alternate_id, alternate_code, inside_block=self.inside_block + ) + + def update_inside_set_call_macro_or_block( + self, + block_type: str, + trimmed_parts: List[str], + m_open: Optional[regex.Match[str]], + m_close: Optional[regex.Match[str]], + tag_contents: List[str], + ) -> Tuple[Optional[RawSliceInfo], str]: + """Based on block tag, update whether in a set/call/macro/block section.""" + if block_type == "block_start" and trimmed_parts[0] in ( + "block", + "call", + "macro", + "set", + ): + # Jinja supports two forms of {% set %}: + # - {% set variable = value %} + # - {% set variable %}value{% endset %} + # https://jinja.palletsprojects.com/en/2.10.x/templates/#block-assignments + # When the second format is used, set one of the fields + # 'inside_set_or_macro' or 'inside_block' to True. This info is + # used elsewhere, as other code inside these regions require + # special handling. (Generally speaking, JinjaAnalyzer ignores + # the contents of these blocks, treating them like opaque templated + # regions.) + try: + # Entering a set/macro block. Build a source string consisting + # of just this one Jinja command and see if it parses. If so, + # it's a standalone command. OTOH, if it fails with "Unexpected + # end of template", it was the opening command for a block. + self.env.from_string( + f"{self.env.block_start_string} {' '.join(trimmed_parts)} " + f"{self.env.block_end_string}" + ) + # Here we should mutate the block type to just templated + # so we don't treat it as a block. + # https://github.com/sqlfluff/sqlfluff/issues/3750 + block_type = "templated" + except TemplateSyntaxError as e: + if ( + isinstance(e.message, str) + and "Unexpected end of template" in e.message + ): + # It was opening a block, thus we're inside a set, macro, or + # block. + if trimmed_parts[0] == "block": + self.inside_block = True + else: + result = None + if trimmed_parts[0] == "call": + assert m_open and m_close + result = self.track_call(m_open, m_close, tag_contents) + self.inside_set_macro_or_call = True + return result, block_type + else: + raise # pragma: no cover + elif block_type == "block_end": + if trimmed_parts[0] in ("endcall", "endmacro", "endset"): + # Exiting a set or macro or block. + self.inside_set_macro_or_call = False + elif trimmed_parts[0] == "endblock": + # Exiting a {% block %} block. + self.inside_block = False + return None, block_type + + def make_raw_slice_info( + self, + unique_alternate_id: Optional[str], + alternate_code: Optional[str], + inside_block: bool = False, + ) -> RawSliceInfo: + """Create RawSliceInfo as given, or "empty" if in set/macro block.""" + if not self.inside_set_macro_or_call: + return RawSliceInfo(unique_alternate_id, alternate_code, [], inside_block) + else: + return RawSliceInfo(None, None, [], False) + + # We decide the "kind" of element we're dealing with using its _closing_ + # tag rather than its opening tag. The types here map back to similar types + # of sections in the python slicer. + block_types = { + "variable_end": "templated", + "block_end": "block", + "comment_end": "comment", + # Raw tags should behave like blocks. Note that + # raw_end and raw_begin are whole tags rather + # than blocks and comments where we get partial + # tags. + "raw_end": "block", + "raw_begin": "block", + } + + def analyze(self, render_func: Callable[[str], str]) -> JinjaTracer: + """Slice template in jinja.""" + # str_buff and str_parts are two ways we keep track of tokens received + # from Jinja. str_buff concatenates them together, while str_parts + # accumulates the individual strings. We generally prefer using + # str_parts. That's because Jinja doesn't just split on whitespace, so + # by keeping tokens as Jinja returns them, the code is more robust. + # Consider the following: + # {% set col= "col1" %} + # Note there's no space after col. Jinja splits this up for us. If we + # simply concatenated the parts together and later split on whitespace, + # we'd need some ugly, fragile logic to handle various whitespace + # possibilities: + # {% set col= "col1" %} + # {% set col = "col1" %} + # {% set col ="col1" %} + # By using str_parts and letting Jinja handle this, it just works. + + str_buff = "" + str_parts = [] + + # https://jinja.palletsprojects.com/en/2.11.x/api/#jinja2.Environment.lex + block_idx = 0 + for _, elem_type, raw in self.env.lex(self.raw_str): + if elem_type == "data": + self.track_literal(raw, block_idx) + continue + str_buff += raw + str_parts.append(raw) + + if elem_type.endswith("_begin"): + self.handle_left_whitespace_stripping(raw, block_idx) + + raw_slice_info: RawSliceInfo = self.make_raw_slice_info(None, None) + tag_contents = [] + # raw_end and raw_begin behave a little differently in + # that the whole tag shows up in one go rather than getting + # parts of the tag at a time. + m_open = None + m_close = None + if elem_type.endswith("_end") or elem_type == "raw_begin": + block_type = self.block_types[elem_type] + block_tag = None + # Handle starts and ends of blocks + if block_type in ("block", "templated"): + m_open = self.re_open_tag.search(str_parts[0]) + m_close = self.re_close_tag.search(str_parts[-1]) + if m_open and m_close: + tag_contents = self.extract_tag_contents( + str_parts, m_close, m_open, str_buff + ) + + if block_type == "block" and tag_contents: + block_type = self._get_tag_configuration( + tag_contents[0] + ).block_type + block_tag = tag_contents[0] + if block_type == "templated" and tag_contents: + assert m_open and m_close + raw_slice_info = self.track_templated( + m_open, m_close, tag_contents + ) + ( + raw_slice_info_temp, + block_type, + ) = self.update_inside_set_call_macro_or_block( + block_type, tag_contents, m_open, m_close, tag_contents + ) + if raw_slice_info_temp: + raw_slice_info = raw_slice_info_temp + m_strip_right = regex.search( + r"\s+$", raw, regex.MULTILINE | regex.DOTALL + ) + if block_type == "block_start": + block_idx += 1 + if elem_type.endswith("_end") and raw.startswith("-") and m_strip_right: + # Right whitespace was stripped after closing block. Split + # off the trailing whitespace into a separate slice. The + # desired behavior is to behave similarly as the left + # stripping case. Note that the stakes are a bit lower here, + # because lex() hasn't *omitted* any characters from the + # strings it returns, it has simply grouped them differently + # than we want. + trailing_chars = len(m_strip_right.group(0)) + self.raw_sliced.append( + RawFileSlice( + str_buff[:-trailing_chars], + block_type, + self.idx_raw, + block_idx, + block_tag, + ) + ) + self.raw_slice_info[self.raw_sliced[-1]] = raw_slice_info + slice_idx = len(self.raw_sliced) - 1 + self.idx_raw += len(str_buff) - trailing_chars + if block_type == "block_end": + block_idx += 1 + self.raw_sliced.append( + RawFileSlice( + str_buff[-trailing_chars:], + "literal", + self.idx_raw, + block_idx, + ) + ) + self.raw_slice_info[self.raw_sliced[-1]] = ( + self.slice_info_for_literal(0) + ) + self.idx_raw += trailing_chars + else: + self.raw_sliced.append( + RawFileSlice( + str_buff, + block_type, + self.idx_raw, + block_idx, + block_tag, + ) + ) + self.raw_slice_info[self.raw_sliced[-1]] = raw_slice_info + slice_idx = len(self.raw_sliced) - 1 + self.idx_raw += len(str_buff) + if block_type == "block_end": + block_idx += 1 + if block_type.startswith("block"): + self.track_block_end(block_type, tag_contents[0]) + self.update_next_slice_indices( + slice_idx, block_type, tag_contents[0] + ) + str_buff = "" + str_parts = [] + return self._get_jinja_tracer( + self.raw_str, + self.raw_sliced, + self.raw_slice_info, + self.sliced_file, + render_func, + ) + + def track_templated( + self, + m_open: regex.Match[str], + m_close: regex.Match[str], + tag_contents: List[str], + ) -> RawSliceInfo: + """Compute tracking info for Jinja templated region, e.g. {{ foo }}. + + Args: + m_open (regex.Match): A regex match object representing the opening tag. + m_close (regex.Match): A regex match object representing the closing tag. + tag_contents (List[str]): A list of strings representing the contents of the + tag. + + Returns: + RawSliceInfo: A RawSliceInfo object containing the computed + tracking info. + """ + unique_alternate_id = self.next_slice_id() + open_ = m_open.group(1) + close_ = m_close.group(1) + # Here, we still need to evaluate the original tag contents, e.g. in + # case it has intentional side effects, but also return a slice ID + # for tracking. + alternate_code = ( + f"\0{unique_alternate_id} {open_} {''.join(tag_contents)} {close_}" + ) + return self.make_raw_slice_info(unique_alternate_id, alternate_code) + + def track_call( + self, + m_open: regex.Match[str], + m_close: regex.Match[str], + tag_contents: List[str], + ) -> RawSliceInfo: + """Set up tracking for "{% call ... %}". + + Args: + m_open (regex.Match): A regex match object representing the opening tag. + m_close (regex.Match): A regex match object representing the closing tag. + tag_contents (List[str]): A list of strings representing the contents of the + tag. + + Returns: + RawSliceInfo: A RawSliceInfo object containing the computed + tracking info. + """ + unique_alternate_id = self.next_slice_id() + open_ = m_open.group(1) + close_ = m_close.group(1) + # Here, we still need to evaluate the original tag contents, e.g. in + # case it has intentional side effects, but also return a slice ID + # for tracking. + alternate_code = ( + f"\0{unique_alternate_id} {open_} {''.join(tag_contents)} {close_}" + ) + return self.make_raw_slice_info(unique_alternate_id, alternate_code) + + def track_literal(self, raw: str, block_idx: int) -> None: + """Set up tracking for a Jinja literal.""" + self.raw_sliced.append( + RawFileSlice( + raw, + "literal", + self.idx_raw, + block_idx, + ) + ) + # Replace literal text with a unique ID. + self.raw_slice_info[self.raw_sliced[-1]] = self.slice_info_for_literal( + len(raw), "" + ) + self.idx_raw += len(raw) + + @staticmethod + def extract_tag_contents( + str_parts: List[str], + m_close: regex.Match[str], + m_open: regex.Match[str], + str_buff: str, + ) -> List[str]: + """Given Jinja tag info, return the stuff inside the braces. + + I.e. Trim off the brackets and the whitespace. + + Args: + str_parts (List[str]): A list of string parts. + m_close (regex.Match[str]): The regex match for the closing tag. + m_open (regex.Match[str]): The regex match for the opening tag. + str_buff (str): The string buffer. + + Returns: + List[str]: The trimmed parts inside the Jinja tag. + """ + if len(str_parts) >= 3: + # Handle a tag received as individual parts. + trimmed_parts = str_parts[1:-1] + if trimmed_parts[0].isspace(): + del trimmed_parts[0] + if trimmed_parts[-1].isspace(): + del trimmed_parts[-1] + else: + # Handle a tag received in one go. + trimmed_content = str_buff[len(m_open.group(0)) : -len(m_close.group(0))] + trimmed_parts = trimmed_content.split() + return trimmed_parts + + def track_block_end(self, block_type: str, tag_name: str) -> None: + """On ending a 'for' or 'if' block, set up tracking. + + Args: + block_type (str): The type of block ('block_start', 'block_mid', + 'block_end'). + tag_name (str): The name of the tag ('for', 'if', or other configured tag). + """ + if ( + block_type == "block_end" + and self._get_tag_configuration(tag_name).block_tracking + ): + # Replace RawSliceInfo for this slice with one that has alternate ID + # and code for tracking. This ensures, for instance, that if a file + # ends with "{% endif %} (with no newline following), that we still + # generate a TemplateSliceInfo for it. + unique_alternate_id = self.next_slice_id() + alternate_code = f"{self.raw_sliced[-1].raw}\0{unique_alternate_id}_0" + self.raw_slice_info[self.raw_sliced[-1]] = self.make_raw_slice_info( + unique_alternate_id, alternate_code + ) + + def update_next_slice_indices( + self, slice_idx: int, block_type: str, tag_name: str + ) -> None: + """Based on block, update conditional jump info.""" + if ( + block_type == "block_start" + and self._get_tag_configuration(tag_name).block_tracking + ): + self.stack.append(slice_idx) + return None + elif not self.stack: + return None + + _idx = self.stack[-1] + _raw_slice = self.raw_sliced[_idx] + _slice_info = self.raw_slice_info[_raw_slice] + if ( + block_type == "block_mid" + and self._get_tag_configuration(tag_name).block_tracking + ): + # Record potential forward jump over this block. + _slice_info.next_slice_indices.append(slice_idx) + self.stack.pop() + self.stack.append(slice_idx) + elif ( + block_type == "block_end" + and self._get_tag_configuration(tag_name).block_tracking + ): + if not self.inside_set_macro_or_call: + # Record potential forward jump over this block. + _slice_info.next_slice_indices.append(slice_idx) + self.stack.pop() + if _raw_slice.slice_type == "block_start": + assert _raw_slice.tag + if self._get_tag_configuration(_raw_slice.tag).block_may_loop: + # Record potential backward jump to the loop beginning. + self.raw_slice_info[ + self.raw_sliced[slice_idx] + ].next_slice_indices.append(_idx + 1) + + def handle_left_whitespace_stripping(self, token: str, block_idx: int) -> None: + """If block open uses whitespace stripping, record it. + + When a "begin" tag (whether block, comment, or data) uses whitespace + stripping + (https://jinja.palletsprojects.com/en/3.0.x/templates/#whitespace-control) + the Jinja lex() function handles this by discarding adjacent whitespace + from 'raw_str'. For more insight, see the tokeniter() function in this file: + https://github.com/pallets/jinja/blob/main/src/jinja2/lexer.py + + We want to detect and correct for this in order to: + - Correctly update "idx" (if this is wrong, that's a potential + DISASTER because lint fixes use this info to update the source file, + and incorrect values often result in CORRUPTING the user's file so + it's no longer valid SQL. :-O + - Guarantee that the slices we return fully "cover" the contents of + 'in_str'. + + We detect skipped characters by looking ahead in in_str for the token + just returned from lex(). The token text will either be at the current + 'idx_raw' position (if whitespace stripping did not occur) OR it'll be + farther along in 'raw_str', but we're GUARANTEED that lex() only skips + over WHITESPACE; nothing else. + """ + # Find the token returned. Did lex() skip over any characters? + num_chars_skipped = self.raw_str.index(token, self.idx_raw) - self.idx_raw + if not num_chars_skipped: + return + + # Yes. It skipped over some characters. Compute a string + # containing the skipped characters. + skipped_str = self.raw_str[self.idx_raw : self.idx_raw + num_chars_skipped] + + # Sanity check: Verify that Jinja only skips over + # WHITESPACE, never anything else. + if not skipped_str.isspace(): # pragma: no cover + templater_logger.warning( + "Jinja lex() skipped non-whitespace: %s", skipped_str + ) + # Treat the skipped whitespace as a literal. + self.raw_sliced.append( + RawFileSlice(skipped_str, "literal", self.idx_raw, block_idx) + ) + self.raw_slice_info[self.raw_sliced[-1]] = self.slice_info_for_literal(0) + self.idx_raw += num_chars_skipped diff --git a/crates/lib/src/templaters/python.rs b/crates/lib/src/templaters/python.rs index 2271e4d17..f2a01ea5b 100644 --- a/crates/lib/src/templaters/python.rs +++ b/crates/lib/src/templaters/python.rs @@ -200,7 +200,7 @@ impl<'py> FromPyObject<'py> for PythonRawFileSlice { } #[derive(FromPyObject)] -struct PythonTemplatedFile { +pub struct PythonTemplatedFile { source_str: String, fname: String, templated_str: Option, @@ -209,7 +209,7 @@ struct PythonTemplatedFile { } impl PythonTemplatedFile { - fn to_templated_file(&self) -> TemplatedFile { + pub fn to_templated_file(&self) -> TemplatedFile { TemplatedFile::new( self.source_str.to_string(), self.fname.to_string(), diff --git a/pyproject.toml b/pyproject.toml index 8b5090c7d..735ce158c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,10 @@ name = "sqruff" version = "0.0.1" readme = "README.md" requires-python = ">=3.9" -dependencies = [] +dependencies = [ + "Jinja2>=3.0.0", + "regex" +] [project.optional-dependencies] dev = [