Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support args via config file #1124

Draft
wants to merge 8 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
327 changes: 200 additions & 127 deletions cartography/cli.py

Large diffs are not rendered by default.

38 changes: 38 additions & 0 deletions cartography/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
class Config:
# TODO reorder the params
# TODO add comments for the new attributes
"""
A common interface for cartography configuration.

Expand All @@ -19,6 +21,8 @@ class Config:
:param neo4j_database: The name of the database in Neo4j to connect to. If not specified, uses your Neo4j database
settings to infer which database is set to default.
See https://neo4j.com/docs/api/python-driver/4.4/api.html#database. Optional.
:type selected_modules: str
:param selected_modules: Comma-separated list of cartography top-level modules to sync. Optional.
:type update_tag: int
:param update_tag: Update tag for a cartography sync run. Optional.
:type aws_sync_all_profiles: bool
Expand Down Expand Up @@ -85,6 +89,8 @@ class Config:
:param gsuite_auth_method: Auth method (delegated, oauth) used for Google Workspace. Optional.
:type gsuite_config: str
:param gsuite_config: Base64 encoded config object or config file path for Google Workspace. Optional.
:type config: str
:param gsuite_config: Path to cartography.yaml config file. Optional.
"""

def __init__(
Expand All @@ -94,6 +100,7 @@ def __init__(
neo4j_password=None,
neo4j_max_connection_lifetime=None,
neo4j_database=None,
selected_modules=None,
update_tag=None,
aws_sync_all_profiles=False,
aws_best_effort_mode=False,
Expand All @@ -102,40 +109,58 @@ def __init__(
azure_tenant_id=None,
azure_client_id=None,
azure_client_secret=None,
azure_client_secret_env_var=None,
aws_requested_syncs=None,
analysis_job_directory=None,
crxcavator_api_base_uri=None,
crxcavator_api_key=None,
crxcavator_api_key_env_var=None,
oci_sync_all_profiles=None,
okta_org_id=None,
okta_api_key=None,
okta_saml_role_regex=None,
okta_api_key_env_var=None,
github_config=None,
github_config_env_var=None,
digitalocean_token=None,
digitalocean_token_env_var=None,
permission_relationships_file=None,
jamf_base_uri=None,
jamf_user=None,
jamf_password=None,
jamf_password_env_var=None,
k8s_kubeconfig=None,
statsd_enabled=False,
statsd_prefix=None,
statsd_host=None,
statsd_port=None,
pagerduty_api_key=None,
pagerduty_request_timeout=None,
pagerduty_api_key_env_var=None,
nist_cve_url=None,
cve_enabled=False,
crowdstrike_client_id=None,
crowdstrike_client_secret=None,
crowdstrike_api_url=None,
crowdstrike_client_id_env_var=None,
crowdstrike_client_secret_env_var=None,
gsuite_auth_method=None,
gsuite_config=None,
gsuite_tokens_env_var=None,
config=None,
verbose=None,
quiet=None,
neo4j_password_prompt=None,
neo4j_password_env_var=None,
):
self.neo4j_uri = neo4j_uri
self.neo4j_user = neo4j_user
self.neo4j_password = neo4j_password
self.neo4j_password_prompt = neo4j_password_prompt
self.neo4j_password_env_var = neo4j_password_env_var
self.neo4j_max_connection_lifetime = neo4j_max_connection_lifetime
self.neo4j_database = neo4j_database
self.selected_modules = selected_modules
self.update_tag = update_tag
self.aws_sync_all_profiles = aws_sync_all_profiles
self.aws_best_effort_mode = aws_best_effort_mode
Expand All @@ -144,31 +169,44 @@ def __init__(
self.azure_tenant_id = azure_tenant_id
self.azure_client_id = azure_client_id
self.azure_client_secret = azure_client_secret
self.azure_client_secret_env_var = azure_client_secret_env_var
self.aws_requested_syncs = aws_requested_syncs
self.analysis_job_directory = analysis_job_directory
self.crxcavator_api_base_uri = crxcavator_api_base_uri
self.crxcavator_api_key = crxcavator_api_key
self.crxcavator_api_key_env_var = crxcavator_api_key_env_var
self.oci_sync_all_profiles = oci_sync_all_profiles
self.okta_org_id = okta_org_id
self.okta_api_key = okta_api_key
self.okta_api_key_env_var = okta_api_key_env_var,
self.okta_saml_role_regex = okta_saml_role_regex
self.github_config = github_config
self.github_config_env_var = github_config_env_var
self.digitalocean_token = digitalocean_token
self.digitalocean_token_env_var = digitalocean_token_env_var
self.permission_relationships_file = permission_relationships_file
self.jamf_base_uri = jamf_base_uri
self.jamf_user = jamf_user
self.jamf_password = jamf_password
self.jamf_password_env_var = jamf_password_env_var
self.k8s_kubeconfig = k8s_kubeconfig
self.statsd_enabled = statsd_enabled
self.statsd_prefix = statsd_prefix
self.statsd_host = statsd_host
self.statsd_port = statsd_port
self.pagerduty_api_key = pagerduty_api_key
self.pagerduty_request_timeout = pagerduty_request_timeout
self.pagerduty_api_key_env_var = pagerduty_api_key_env_var
self.nist_cve_url = nist_cve_url
self.cve_enabled = cve_enabled
self.crowdstrike_client_id = crowdstrike_client_id
self.crowdstrike_client_secret = crowdstrike_client_secret
self.crowdstrike_api_url = crowdstrike_api_url
self.crowdstrike_client_id_env_var = crowdstrike_client_id_env_var
self.crowdstrike_client_secret_env_var = crowdstrike_client_secret_env_var
self.gsuite_auth_method = gsuite_auth_method
self.gsuite_config = gsuite_config
self.gsuite_tokens_env_var = gsuite_tokens_env_var
self.config = config
self.verbose = verbose
self.quiet = quiet
69 changes: 55 additions & 14 deletions cartography/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@
logger = logging.getLogger(__name__)


TOP_LEVEL_MODULES = OrderedDict({ # preserve order so that the default sync always runs `analysis` at the very end
'create-indexes': cartography.intel.create_indexes.run,
'aws': cartography.intel.aws.start_aws_ingestion,
'azure': cartography.intel.azure.start_azure_ingestion,
'crowdstrike': cartography.intel.crowdstrike.start_crowdstrike_ingestion,
'gcp': cartography.intel.gcp.start_gcp_ingestion,
'gsuite': cartography.intel.gsuite.start_gsuite_ingestion,
'crxcavator': cartography.intel.crxcavator.start_extension_ingestion,
'cve': cartography.intel.cve.start_cve_ingestion,
'oci': cartography.intel.oci.start_oci_ingestion,
'okta': cartography.intel.okta.start_okta_ingestion,
'github': cartography.intel.github.start_github_ingestion,
'digitalocean': cartography.intel.digitalocean.start_digitalocean_ingestion,
'kubernetes': cartography.intel.kubernetes.start_k8s_ingestion,
'analysis': cartography.intel.analysis.run,
})


class Sync:
"""
A cartography sync task.
Expand Down Expand Up @@ -172,19 +190,42 @@ def build_default_sync() -> Sync:
"""
sync = Sync()
sync.add_stages([
('create-indexes', cartography.intel.create_indexes.run),
('aws', cartography.intel.aws.start_aws_ingestion),
('azure', cartography.intel.azure.start_azure_ingestion),
('crowdstrike', cartography.intel.crowdstrike.start_crowdstrike_ingestion),
('gcp', cartography.intel.gcp.start_gcp_ingestion),
('gsuite', cartography.intel.gsuite.start_gsuite_ingestion),
('crxcavator', cartography.intel.crxcavator.start_extension_ingestion),
('cve', cartography.intel.cve.start_cve_ingestion),
('oci', cartography.intel.oci.start_oci_ingestion),
('okta', cartography.intel.okta.start_okta_ingestion),
('github', cartography.intel.github.start_github_ingestion),
('digitalocean', cartography.intel.digitalocean.start_digitalocean_ingestion),
('kubernetes', cartography.intel.kubernetes.start_k8s_ingestion),
('analysis', cartography.intel.analysis.run),
(stage_name, stage_func) for stage_name, stage_func in TOP_LEVEL_MODULES.items()
])
return sync


def parse_and_validate_selected_modules(selected_modules: str) -> List[str]:
"""
Ensures that user-selected modules passed through the CLI are valid and parses them to a list of str.
:param selected_modules: comma separated string of module names provided by user
:return: A validated list of module names that we will run
"""
validated_modules: List[str] = []
for module in selected_modules.split(','):
module = module.strip()

if module in TOP_LEVEL_MODULES.keys():
validated_modules.append(module)
else:
valid_modules = ', '.join(TOP_LEVEL_MODULES.keys())
raise ValueError(
f'Error parsing `selected_modules`. You specified "{selected_modules}". '
f'Please check that your string is formatted properly. '
f'Example valid input looks like "aws,gcp,analysis" or "azure, oci, crowdstrike". '
f'Our full list of valid values is: {valid_modules}.',
)
return validated_modules


def build_sync(selected_modules_as_str: str) -> Sync:
"""
Returns a cartography sync object where all the sync stages are from the user-specified comma separated list of
modules to run.
"""
selected_modules = parse_and_validate_selected_modules(selected_modules_as_str)
sync = Sync()
sync.add_stages(
[(sync_name, TOP_LEVEL_MODULES[sync_name]) for sync_name in selected_modules],
)
return sync
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,4 @@ allow_redefinition = true
ignore_errors = true

[coverage:report]
fail_under = 30
fail_under = 40
45 changes: 45 additions & 0 deletions tests/data/test_cartography_conf.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
analysis_job_directory: null
aws_best_effort_mode: false
aws_requested_syncs: null
aws_sync_all_profiles: false
azure_client_id: null
azure_client_secret_env_var: null
azure_sp_auth: false
azure_sync_all_subscriptions: false
azure_tenant_id: null
crowdstrike_api_url: null
crowdstrike_client_id_env_var: null
crowdstrike_client_secret_env_var: null
crxcavator_api_base_uri: https://api.crxcavator.io/v1
crxcavator_api_key_env_var: null
cve_enabled: false
digitalocean_token_env_var: null
github_config_env_var: null
gsuite_auth_method: delegated
gsuite_tokens_env_var: GSUITE_GOOGLE_APPLICATION_CREDENTIALS
jamf_base_uri: null
jamf_password_env_var: null
jamf_user: null
k8s_kubeconfig: null
neo4j_database: null
neo4j_max_connection_lifetime: 3600
neo4j_password_env_var: null
neo4j_password_prompt: false
neo4j_uri: bolt://localhost:7687
neo4j_user: null
nist_cve_url: https://nvd.nist.gov/feeds/json/cve/1.1
oci_sync_all_profiles: false
okta_api_key_env_var: null
okta_org_id: null
okta_saml_role_regex: ^aws\#\S+\#(?{{role}}[\w\-]+)\#(?{{accountid}}\d+)$
pagerduty_api_key_env_var: null
pagerduty_request_timeout: null
permission_relationships_file: cartography/data/permission_relationships.yaml
quiet: false
selected_modules: aws
statsd_enabled: false
statsd_host: 127.0.0.1
statsd_port: 8125
statsd_prefix: ''
update_tag: null
verbose: false
34 changes: 30 additions & 4 deletions tests/integration/cartography/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,37 @@
import unittest.mock
from unittest.mock import MagicMock
from unittest.mock import patch

import cartography.cli
from cartography.cli import CLI
from tests.integration import settings


def test_cli():
sync = unittest.mock.MagicMock()
cli = cartography.cli.CLI(sync, 'test')
cli.main(["--neo4j-uri", settings.get("NEO4J_URL")])
"""
Simulate running `cartography --neo4j-uri URI sync` and ensure the sync gets run.
"""
sync = MagicMock()
cli = CLI(sync, 'test')
cli.main(["--neo4j-uri", settings.get("NEO4J_URL"), "sync"])
sync.run.assert_called_once()


@patch.object(cartography.cli, 'run_with_config', return_value=0)
def test_cli_load_yaml(mock_run_with_config: MagicMock):
"""
Simulate running `cartography --config tests/data/test_cartography_conf.yaml` and ensure that the sync starts.
"""
argv = [
"--config",
"tests/data/test_cartography_conf.yaml",
"sync",
]

# Act
CLI(prog='cartography').main(argv)

# Assert
mock_run_with_config.assert_called()


# TODO test that the okta_saml_role_regex reaches the AWS module as expected
49 changes: 49 additions & 0 deletions tests/unit/cartography/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from unittest.mock import MagicMock
from unittest.mock import patch

import cartography.sync
from cartography.cli import CLI


def test_cli_selected_modules():
"""
Test that we correctly parse the --selected-modules arg
"""
# Arrange
argv = [
"sync",
"--selected-modules",
"aws",
]

# Act
cli = CLI(prog='cartography')

# Assert that the argparser created by the CLI knows that we want to run the aws module
parsed_args = cli.parser.parse_args(argv)
assert parsed_args.selected_modules == 'aws'
# TODO - remove this when ready; this is an easy way to hook in and get yaml for copy pasta
#
# args = vars(parsed_args)
# config = dump(args)
#
#
# assert False


@patch.object(cartography.cli, 'run_with_config', return_value=0)
def test_cli_main(mock_run_with_config: MagicMock):
"""
Test that processing a cartography Config object with CLI.main() works.
"""
argv = [
"sync",
"--selected-modules",
"aws",
]

# Act
CLI(prog='cartography').main(argv)

# Assert
mock_run_with_config.assert_called()
Loading