-
Notifications
You must be signed in to change notification settings - Fork 321
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Inital code for data query library supporting Log Analytics, Security graph and WDATP * Updates to base64unpack and iocextract. Updating and expanding test cases. * Adding unit tests and fixing some things * Adding more unit tests and refactoring the driver/provider structure. Also simplified the entityschema stuff a little to use internal __dict__ for properties. * A few fixes + black formatting * Updating gitignore to ignore vscode settings * Fixing linting errors * Flake8 line len and pylint error * And updating the version to match Pete's PR
- Loading branch information
Showing
35 changed files
with
3,469 additions
and
525 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,3 +104,4 @@ venv.bak/ | |
.mypy_cache/ | ||
/msticpy.code-workspace | ||
/docs/source/_build/** | ||
**/.vscode* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,2 @@ | ||
"""Version file.""" | ||
VERSION = "0.1.7" | ||
VERSION = "0.1.8" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. See License.txt in the project root for | ||
# license information. | ||
# -------------------------------------------------------------------------- | ||
"""Data provider loader.""" | ||
from functools import partial | ||
from os import path | ||
from typing import Union, Any | ||
|
||
import pandas as pd | ||
|
||
from .drivers import DriverBase, KqlDriver, SecurityGraphDriver | ||
from .query_store import QueryStore | ||
from .param_extractor import extract_query_params | ||
from ..nbtools.query_defns import DataEnvironment | ||
from ..nbtools.utility import export | ||
from .._version import VERSION | ||
|
||
__version__ = VERSION | ||
__author__ = "Ian Hellen" | ||
|
||
_PROVIDER_DIR = "providers" | ||
_QUERY_DEF_DIR = "queries" | ||
|
||
_ENVIRONMENT_DRIVERS = { | ||
DataEnvironment.LogAnalytics: KqlDriver, | ||
DataEnvironment.AzureSecurityCenter: KqlDriver, | ||
DataEnvironment.SecurityGraph: SecurityGraphDriver, | ||
} | ||
|
||
|
||
class AttribHolder: | ||
"""Empty class used to create hierarchical attributes.""" | ||
|
||
def __len__(self): | ||
"""Retrun number of items in the attribute collection.""" | ||
return len(self.__dict__) | ||
|
||
def __iter__(self): | ||
"""Return iterator over the attributes.""" | ||
return iter(self.__dict__.items()) | ||
|
||
|
||
@export | ||
class QueryProvider: | ||
""" | ||
Container for query store and query execution provider. | ||
Instances of this class hold the query set and execution | ||
methods for a specific data environment. | ||
""" | ||
|
||
def __init__( | ||
self, data_environment: Union[str, DataEnvironment], driver: DriverBase = None | ||
): | ||
""" | ||
Query provider interface to queries. | ||
Parameters | ||
---------- | ||
data_environment : Union[str, DataEnvironment] | ||
Name or Enum of environment for the QueryProvider | ||
driver : DriverBase, optional | ||
Override the builtin driver (query execution class) | ||
and use your own driver (must inherit from | ||
`DriverBase`) | ||
See Also | ||
-------- | ||
DataProviderBase : base class for data query providers. | ||
""" | ||
if isinstance(data_environment, str): | ||
data_environment = DataEnvironment.parse(data_environment) | ||
|
||
self._environment = data_environment.name | ||
|
||
if driver is None: | ||
driver_class = _ENVIRONMENT_DRIVERS[data_environment] | ||
if issubclass(driver_class, DriverBase): | ||
driver = driver_class() | ||
else: | ||
raise LookupError( | ||
"Could not find suitable data provider for", | ||
f" {data_environment.name}", | ||
) | ||
|
||
self._query_provider = driver | ||
|
||
# Find the path of this module and build sub-path | ||
query_path = path.join(path.dirname(__file__), _QUERY_DEF_DIR) | ||
|
||
# Load data query definitions for environment | ||
data_environments = QueryStore.import_files( | ||
source_path=query_path, recursive=True | ||
) | ||
self._query_store = data_environments[data_environment.name] | ||
|
||
self.all_queries = AttribHolder() | ||
self._add_query_functions() | ||
|
||
def connect(self, connection_str: str, **kwargs): | ||
""" | ||
Connect to data source. | ||
Parameters | ||
---------- | ||
connection_string : str | ||
Connection string for the data source | ||
""" | ||
return self._query_provider.connect(connection_str=connection_str, **kwargs) | ||
|
||
def import_query_file(self, query_file: str): | ||
""" | ||
Import a yaml data source definition. | ||
Parameters | ||
---------- | ||
query_file : str | ||
Path to the file to import | ||
""" | ||
self._query_store.import_file(query_file) | ||
|
||
def list_queries(self): | ||
""" | ||
Return list of family.query in the store. | ||
Returns | ||
------- | ||
Iterable[str] | ||
List of queries | ||
""" | ||
return self._query_store.query_names | ||
|
||
def query_help(self, query_name): | ||
"""Print help for query.""" | ||
self._query_store[query_name].help() | ||
|
||
def _execute_query(self, *args, **kwargs) -> Union[pd.DataFrame, Any]: | ||
if not self._query_provider.loaded: | ||
raise ValueError("Provider is not loaded.") | ||
if not self._query_provider.connected: | ||
raise ValueError( | ||
"No connection to a data source.", | ||
"Please call connect(connection_str) and retry.", | ||
) | ||
query_name = kwargs.pop("query_name") | ||
family = kwargs.pop("data_family") | ||
|
||
query_source = self._query_store.get_query( | ||
data_family=family, query_name=query_name | ||
) | ||
if "help" in args or "?" in args: | ||
query_source.help() | ||
return None | ||
|
||
params, missing = extract_query_params(query_source, *args, **kwargs) | ||
if missing: | ||
query_source.help() | ||
raise ValueError(f"No values found for these parameters: {missing}") | ||
|
||
query_str = query_source.create_query(**params) | ||
return self._query_provider.query(query_str) | ||
|
||
def _add_query_functions(self): | ||
"""Add queries to the module as callable methods.""" | ||
for qual_query_name in self.list_queries(): | ||
|
||
family, query_name = qual_query_name.split(".") | ||
if not hasattr(self, family): | ||
setattr(self, family, AttribHolder()) | ||
query_family = getattr(self, family) | ||
|
||
# Create the partial function | ||
query_func = partial( | ||
self._execute_query, data_family=family, query_name=query_name | ||
) | ||
query_func.__doc__ = self._query_store.get_query( | ||
family, query_name | ||
).create_doc_string() | ||
|
||
setattr(query_family, query_name, query_func) | ||
setattr(self.all_queries, query_name, query_func) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. See License.txt in the project root for | ||
# license information. | ||
# -------------------------------------------------------------------------- | ||
"""Data query definition reader.""" | ||
from typing import Tuple, Dict, Iterable, Any | ||
from pathlib import Path | ||
import yaml | ||
|
||
from ..nbtools.query_defns import DataFamily, DataEnvironment | ||
from .._version import VERSION | ||
|
||
__version__ = VERSION | ||
__author__ = "Ian Hellen" | ||
|
||
|
||
def find_yaml_files(source_path: str, recursive: bool = False) -> Iterable[Path]: | ||
"""Return iterable of yaml files found in `source_path`. | ||
Parameters | ||
---------- | ||
source_path : str | ||
The source path to search in. | ||
recursive : bool, optional | ||
Whether to recurse through subfolders. | ||
By default False | ||
Returns | ||
------- | ||
Iterable[str] | ||
File paths of yanl files found. | ||
""" | ||
recurse_pfx = "**/" if recursive else "" | ||
file_glob = Path(source_path).glob(f"{recurse_pfx}*.yaml") | ||
for file_path in file_glob: | ||
if not file_path.is_file(): | ||
continue | ||
yield file_path | ||
|
||
|
||
def read_query_def_file(query_file: str) -> Tuple[Dict, Dict, Dict]: | ||
""" | ||
Read a yaml data query definition file. | ||
Parameters | ||
---------- | ||
query_file : str | ||
Path to yaml query defintion file | ||
Returns | ||
------- | ||
Tuple[Dict, Dict, Dict] | ||
Tuple of dictionaries. | ||
sources - dictionary of query definitions | ||
defaults - the default parameters from the file | ||
metadata - the global metadata from the file | ||
""" | ||
data_map = None | ||
with open(query_file) as f_handle: | ||
# use safe_load instead load | ||
data_map = yaml.safe_load(f_handle) | ||
|
||
validate_query_defs(query_def_dict=data_map) | ||
|
||
defaults = data_map.get("defaults", {}) | ||
sources = data_map.get("sources", {}) | ||
metadata = data_map.get("metadata", {}) | ||
|
||
return sources, defaults, metadata | ||
|
||
|
||
def validate_query_defs(query_def_dict: Dict[str, Any]) -> bool: | ||
"""Validate content of query definition. | ||
Parameters | ||
---------- | ||
query_def_dict : dict | ||
Dictionary of query definition yaml file contents. | ||
Returns | ||
------- | ||
bool | ||
True if validation succeeds. | ||
Raises | ||
------ | ||
ValueError | ||
The validation failure reason is returned in the | ||
exception message (arg[0]) | ||
""" | ||
# verify that sources and metadata are in the data dict | ||
if "sources" not in query_def_dict or not query_def_dict["sources"]: | ||
raise ValueError("Imported file has no sources defined") | ||
if "metadata" not in query_def_dict or not query_def_dict["metadata"]: | ||
raise ValueError("Imported file has no metadata defined") | ||
|
||
# data_environments and data_families must be defined at with at least | ||
# one value | ||
_validate_data_categories(query_def_dict) | ||
|
||
return True | ||
|
||
|
||
def _validate_data_categories(query_def_dict: Dict): | ||
if ( | ||
"data_environments" not in query_def_dict["metadata"] | ||
or not query_def_dict["metadata"]["data_environments"] | ||
): | ||
raise ValueError("Imported file has no data_environments defined") | ||
|
||
for env in query_def_dict["metadata"]["data_environments"]: | ||
if not DataEnvironment.parse(env): | ||
raise ValueError( | ||
f"Unknown data evironment {env} in metadata. ", | ||
"Valid values are\n", | ||
", ".join([e.name for e in DataEnvironment]), | ||
) | ||
if ( | ||
"data_families" not in query_def_dict["metadata"] | ||
or not query_def_dict["metadata"]["data_families"] | ||
): | ||
raise ValueError("Imported file has no data families defined") | ||
|
||
for fam in query_def_dict["metadata"]["data_families"]: | ||
if not DataFamily.parse(fam): | ||
raise ValueError( | ||
f"Unknown data family {fam} in metadata. ", | ||
"Valid values are\n", | ||
", ".join([f.name for f in DataFamily]), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# ------------------------------------------------------------------------- | ||
# Copyright (c) Microsoft Corporation. All rights reserved. | ||
# Licensed under the MIT License. See License.txt in the project root for | ||
# license information. | ||
# -------------------------------------------------------------------------- | ||
"""Data provider sub-package.""" | ||
# flake8: noqa: F403 | ||
from . driver_base import DriverBase | ||
from . kql_driver import KqlDriver | ||
from . security_graph_driver import SecurityGraphDriver |
Oops, something went wrong.