diff --git a/cartography/cli.py b/cartography/cli.py index 10418b395..a88986c76 100644 --- a/cartography/cli.py +++ b/cartography/cli.py @@ -3,11 +3,16 @@ import logging import os import sys +from typing import List +from typing import Optional +from typing import Union + +import yaml import cartography.config -import cartography.sync import cartography.util from cartography.intel.aws.util.common import parse_and_validate_aws_requested_syncs +from cartography.sync import run_with_config logger = logging.getLogger(__name__) @@ -21,12 +26,12 @@ class CLI: :param prog: The name of the command line program. This will be displayed in usage and help output. """ - def __init__(self, sync, prog=None): + def __init__(self, sync: Optional[cartography.sync.Sync] = None, prog: Optional[str] = None): + self.sync = sync if sync else cartography.sync.build_default_sync() self.prog = prog - self.sync = sync self.parser = self._build_parser() - def _build_parser(self): + def _build_parser(self) -> argparse.ArgumentParser: """ :rtype: argparse.ArgumentParser :return: A cartography argument parser. Calling parse_args on the argument parser will return an object which @@ -47,19 +52,34 @@ def _build_parser(self): ), epilog='For more documentation please visit: https://github.com/lyft/cartography', ) - parser.add_argument( + + log_level_group = parser.add_mutually_exclusive_group() + log_level_group.add_argument( '-v', '--verbose', action='store_true', help='Enable verbose logging for cartography.', ) - parser.add_argument( + log_level_group.add_argument( '-q', '--quiet', action='store_true', help='Restrict cartography logging to warnings and errors only.', ) + parser.add_argument( + '-c', + '--config', + type=str, + default=None, + help=( + 'Path to cartography config file. If specified, all other arguments below are ignored. If not ' + 'specified (default), uses only CLI args.' + ), + ) + + neo4j_arg_group = parser.add_argument_group('neo4j') + neo4j_arg_group.add_argument( '--neo4j-uri', type=str, default='bolt://localhost:7687', @@ -69,19 +89,19 @@ def _build_parser(self): 'structure of a Neo4j URI.' ), ) - parser.add_argument( + neo4j_arg_group.add_argument( '--neo4j-user', type=str, default=None, help='A username with which to authenticate to Neo4j.', ) - parser.add_argument( + neo4j_arg_group.add_argument( '--neo4j-password-env-var', type=str, default=None, help='The name of an environment variable containing a password with which to authenticate to Neo4j.', ) - parser.add_argument( + neo4j_arg_group.add_argument( '--neo4j-password-prompt', action='store_true', help=( @@ -89,7 +109,7 @@ def _build_parser(self): 'supersedes other methods of supplying a Neo4j password.' ), ) - parser.add_argument( + neo4j_arg_group.add_argument( '--neo4j-max-connection-lifetime', type=int, default=3600, @@ -100,7 +120,7 @@ def _build_parser(self): '.' ), ) - parser.add_argument( + neo4j_arg_group.add_argument( '--neo4j-database', type=str, default=None, @@ -110,8 +130,59 @@ def _build_parser(self): 'See https://neo4j.com/docs/api/python-driver/4.4/api.html#database.' ), ) - # TODO add the below parameters to a 'sync' subparser - parser.add_argument( + + statsd_arg_group = parser.add_argument_group('statsd') + statsd_arg_group.add_argument( + '--statsd-enabled', + action='store_true', + help=( + 'If set, enables sending metrics using statsd to a server of your choice.' + ), + ) + statsd_arg_group.add_argument( + '--statsd-prefix', + type=str, + default='', + help=( + 'The string to prefix statsd metrics with. Only used if --statsd-enabled is on. Default = empty string.' + ), + ) + statsd_arg_group.add_argument( + '--statsd-host', + type=str, + default='127.0.0.1', + help=( + 'The IP address of your statsd server. Only used if --statsd-enabled is on. Default = 127.0.0.1.' + ), + ) + statsd_arg_group.add_argument( + '--statsd-port', + type=int, + default=8125, + help=( + 'The port of your statsd server. Only used if --statsd-enabled is on. Default = UDP 8125.' + ), + ) + + # 'sync' subparser + subparsers = parser.add_subparsers() + sync_subparser = subparsers.add_parser('sync') + sync_subparser.add_argument( + '--selected-modules', + type=str, + default=None, + help=( + 'Comma-separated list of cartography top-level modules to sync. Example 1: "aws,gcp" to run AWS and GCP' + 'modules. See the full list available in source code at cartography.sync. ' + 'If not specified, cartography by default will run all modules available and log warnings when it ' + 'does not find credentials configured for them. ' + # TODO remove this mention about the create-indexes module when everything is using auto-indexes. + 'We recommend that you always specify the `create-indexes` module first in this list. ' + 'If you specify the `analysis` module, we recommend that you include it as the LAST item of this list, ' + '(because it does not make sense to perform analysis on an empty/out-of-date graph).' + ), + ) + sync_subparser.add_argument( '--update-tag', type=int, default=None, @@ -121,7 +192,9 @@ def _build_parser(self): 'removed from the graph. By default, cartography will use a UNIX timestamp as the update tag.' ), ) - parser.add_argument( + + aws_arg_group = sync_subparser.add_argument_group('aws') + aws_arg_group.add_argument( '--aws-sync-all-profiles', action='store_true', help=( @@ -136,7 +209,7 @@ def _build_parser(self): 'respects the AWS CLI/SDK environment variables and does not override them.' ), ) - parser.add_argument( + aws_arg_group.add_argument( '--aws-best-effort-mode', action='store_true', help=( @@ -144,7 +217,28 @@ def _build_parser(self): 'syncing other accounts and delay raising an exception until the very end.' ), ) - parser.add_argument( + aws_arg_group.add_argument( + '--aws-requested-syncs', + type=str, + default=None, + help=( + 'Comma-separated list of AWS resources to sync. Example 1: "ecr,s3,ec2:instance" for ECR, S3, and all ' + 'EC2 instance resources. See the full list available in source code at cartography.intel.aws.resources.' + ' If not specified, cartography by default will run all AWS sync modules available.' + ), + ) + aws_arg_group.add_argument( + '--permission-relationships-file', + type=str, + default="cartography/data/permission_relationships.yaml", + help=( + 'The path to the permission relationships mapping file.' + 'If omitted the default permission relationships will be created' + ), + ) + + oci_arg_group = sync_subparser.add_argument_group('oci') + oci_arg_group.add_argument( '--oci-sync-all-profiles', action='store_true', help=( @@ -155,7 +249,8 @@ def _build_parser(self): 'default OCI credentials available in your environment to run the OCI sync once.' ), ) - parser.add_argument( + azure_arg_group = sync_subparser.add_argument_group('azure') + azure_arg_group.add_argument( '--azure-sync-all-subscriptions', action='store_true', help=( @@ -163,14 +258,14 @@ def _build_parser(self): 'discover all configured Azure subscriptions.' ), ) - parser.add_argument( + azure_arg_group.add_argument( '--azure-sp-auth', action='store_true', help=( 'Use Service Principal authentication for Azure sync.' ), ) - parser.add_argument( + azure_arg_group.add_argument( '--azure-tenant-id', type=str, default=None, @@ -178,7 +273,7 @@ def _build_parser(self): 'Azure Tenant Id for Service Principal Authentication.' ), ) - parser.add_argument( + azure_arg_group.add_argument( '--azure-client-id', type=str, default=None, @@ -186,7 +281,7 @@ def _build_parser(self): 'Azure Client Id for Service Principal Authentication.' ), ) - parser.add_argument( + azure_arg_group.add_argument( '--azure-client-secret-env-var', type=str, default=None, @@ -194,17 +289,9 @@ def _build_parser(self): 'The name of environment variable containing Azure Client Secret for Service Principal Authentication.' ), ) - parser.add_argument( - '--aws-requested-syncs', - type=str, - default=None, - help=( - 'Comma-separated list of AWS resources to sync. Example 1: "ecr,s3,ec2:instance" for ECR, S3, and all ' - 'EC2 instance resources. See the full list available in source code at cartography.intel.aws.resources.' - ' If not specified, cartography by default will run all AWS sync modules available.' - ), - ) - parser.add_argument( + + crxcavator_arg_group = sync_subparser.add_argument_group('crxcavator') + crxcavator_arg_group.add_argument( '--crxcavator-api-base-uri', type=str, default='https://api.crxcavator.io/v1', @@ -212,7 +299,7 @@ def _build_parser(self): 'Base URI for the CRXcavator API. Defaults to public API endpoint.' ), ) - parser.add_argument( + crxcavator_arg_group.add_argument( '--crxcavator-api-key-env-var', type=str, default=None, @@ -221,7 +308,7 @@ def _build_parser(self): 'Required if you are using the CRXcavator intel module. Ignored otherwise.' ), ) - parser.add_argument( + sync_subparser.add_argument( '--analysis-job-directory', type=str, default=None, @@ -233,7 +320,9 @@ def _build_parser(self): 'jobs are executed.' ), ) - parser.add_argument( + + okta_arg_group = sync_subparser.add_argument_group('okta') + okta_arg_group.add_argument( '--okta-org-id', type=str, default=None, @@ -241,7 +330,7 @@ def _build_parser(self): 'Okta organizational id to sync. Required if you are using the Okta intel module. Ignored otherwise.' ), ) - parser.add_argument( + okta_arg_group.add_argument( '--okta-api-key-env-var', type=str, default=None, @@ -250,7 +339,7 @@ def _build_parser(self): 'Required if you are using the Okta intel module. Ignored otherwise.' ), ) - parser.add_argument( + okta_arg_group.add_argument( '--okta-saml-role-regex', type=str, default=r"^aws\#\S+\#(?{{role}}[\w\-]+)\#(?{{accountid}}\d+)$", @@ -261,7 +350,9 @@ def _build_parser(self): 'The regex must contain the {{role}} and {{accountid}} tags' ), ) - parser.add_argument( + + github_arg_group = sync_subparser.add_argument_group('github') + github_arg_group.add_argument( '--github-config-env-var', type=str, default=None, @@ -270,7 +361,9 @@ def _build_parser(self): 'Required if you are using the GitHub intel module. Ignored otherwise.' ), ) - parser.add_argument( + + digitalocean_arg_group = sync_subparser.add_argument_group('digitalocean') + digitalocean_arg_group.add_argument( '--digitalocean-token-env-var', type=str, default=None, @@ -279,16 +372,9 @@ def _build_parser(self): 'Required if you are using the DigitalOcean intel module. Ignored otherwise.' ), ) - parser.add_argument( - '--permission-relationships-file', - type=str, - default="cartography/data/permission_relationships.yaml", - help=( - 'The path to the permission relationships mapping file.' - 'If omitted the default permission relationships will be created' - ), - ) - parser.add_argument( + + jamf_arg_group = sync_subparser.add_argument_group('jamf') + jamf_arg_group.add_argument( '--jamf-base-uri', type=str, default=None, @@ -297,19 +383,21 @@ def _build_parser(self): 'Required if you are using the Jamf intel module. Ignored otherwise.' ), ) - parser.add_argument( + jamf_arg_group.add_argument( '--jamf-user', type=str, default=None, help='A username with which to authenticate to Jamf.', ) - parser.add_argument( + jamf_arg_group.add_argument( '--jamf-password-env-var', type=str, default=None, help='The name of an environment variable containing a password with which to authenticate to Jamf.', ) - parser.add_argument( + + k8s_arg_group = sync_subparser.add_argument_group('k8s') + k8s_arg_group.add_argument( '--k8s-kubeconfig', default=None, type=str, @@ -317,7 +405,9 @@ def _build_parser(self): 'The path to kubeconfig file specifying context to access K8s cluster(s).' ), ) - parser.add_argument( + + cve_arg_group = sync_subparser.add_argument_group('cve') + cve_arg_group.add_argument( '--nist-cve-url', type=str, default='https://nvd.nist.gov/feeds/json/cve/1.1', @@ -325,45 +415,16 @@ def _build_parser(self): 'The base url for the NIST CVE data. Default = https://nvd.nist.gov/feeds/json/cve/1.1' ), ) - parser.add_argument( + cve_arg_group.add_argument( '--cve-enabled', action='store_true', help=( 'If set, CVE data will be synced from NIST.' ), ) - parser.add_argument( - '--statsd-enabled', - action='store_true', - help=( - 'If set, enables sending metrics using statsd to a server of your choice.' - ), - ) - parser.add_argument( - '--statsd-prefix', - type=str, - default='', - help=( - 'The string to prefix statsd metrics with. Only used if --statsd-enabled is on. Default = empty string.' - ), - ) - parser.add_argument( - '--statsd-host', - type=str, - default='127.0.0.1', - help=( - 'The IP address of your statsd server. Only used if --statsd-enabled is on. Default = 127.0.0.1.' - ), - ) - parser.add_argument( - '--statsd-port', - type=int, - default=8125, - help=( - 'The port of your statsd server. Only used if --statsd-enabled is on. Default = UDP 8125.' - ), - ) - parser.add_argument( + + pagerduty_arg_group = sync_subparser.add_argument_group('pagerduty') + pagerduty_arg_group.add_argument( '--pagerduty-api-key-env-var', type=str, default=None, @@ -371,7 +432,7 @@ def _build_parser(self): 'The name of environment variable containing the pagerduty API key for authentication.' ), ) - parser.add_argument( + pagerduty_arg_group.add_argument( '--pagerduty-request-timeout', type=int, default=None, @@ -379,7 +440,9 @@ def _build_parser(self): 'Seconds to timeout for pagerduty API sessions.' ), ) - parser.add_argument( + + crowdstrike_arg_group = sync_subparser.add_argument_group('crowdstrike') + crowdstrike_arg_group.add_argument( '--crowdstrike-client-id-env-var', type=str, default=None, @@ -387,7 +450,7 @@ def _build_parser(self): 'The name of environment variable containing the crowdstrike client id for authentication.' ), ) - parser.add_argument( + crowdstrike_arg_group.add_argument( '--crowdstrike-client-secret-env-var', type=str, default=None, @@ -395,7 +458,7 @@ def _build_parser(self): 'The name of environment variable containing the crowdstrike secret key for authentication.' ), ) - parser.add_argument( + crowdstrike_arg_group.add_argument( '--crowdstrike-api-url', type=str, default=None, @@ -403,7 +466,9 @@ def _build_parser(self): 'The crowdstrike URL, if using self-hosted. Defaults to the public crowdstrike API URL otherwise.' ), ) - parser.add_argument( + + gsuite_arg_group = sync_subparser.add_argument_group('gsuite') + gsuite_arg_group.add_argument( '--gsuite-auth-method', type=str, default='delegated', @@ -412,7 +477,7 @@ def _build_parser(self): 'The method used by GSuite to authenticate. delegated is the legacy one.' ), ) - parser.add_argument( + gsuite_arg_group.add_argument( '--gsuite-tokens-env-var', type=str, default='GSUITE_GOOGLE_APPLICATION_CREDENTIALS', @@ -422,15 +487,36 @@ def _build_parser(self): ) return parser - def main(self, argv: str) -> int: + def main(self, argv: List[str]) -> int: """ Entrypoint for the command line interface. :type argv: string :param argv: The parameters supplied to the command line program. """ - # TODO support parameter lookup in environment variables if not present on command line - config: argparse.Namespace = self.parser.parse_args(argv) + config: Union[argparse.Namespace, cartography.config.Config] = self.parser.parse_args(argv) + # If a yaml config is specified, make sure we use only the yaml file's values. We will do no merging. + if config.config: + if not os.path.isabs(config.config): + config.config = os.path.join(os.getcwd(), config.config) + with open(config.config) as yaml_file: + config = cartography.config.Config(**yaml.safe_load(yaml_file)) + + # TODO (consider) the code would be more functional if this returned a new object instead of doing this in place + self._post_process_config(config) + + # Run cartography + try: + return run_with_config(self.sync, config) + except KeyboardInterrupt: + return cartography.util.STATUS_KEYBOARD_INTERRUPT + + def _post_process_config(self, config: Union[argparse.Namespace, cartography.config.Config]): + # TODO ensure that this function creates the same default values as argparse in _build_parser(). + # TODO add tests for the expected shape of a config object. + """ + Use values specified from either the cartography CLI or config file to set all other necessary config items. + """ # Logging config if config.verbose: logging.getLogger('cartography').setLevel(logging.DEBUG) @@ -439,7 +525,9 @@ def main(self, argv: str) -> int: else: logging.getLogger('cartography').setLevel(logging.INFO) logger.debug("Launching cartography with CLI configuration: %r", vars(config)) + # Neo4j config + config.neo4j_password = None if config.neo4j_user: config.neo4j_password = None if config.neo4j_password_prompt: @@ -454,8 +542,10 @@ def main(self, argv: str) -> int: config.neo4j_password = os.environ.get(config.neo4j_password_env_var) if not config.neo4j_password: logger.warning("Neo4j username was provided but a password could not be found.") - else: - config.neo4j_password = None + + # Selected modules + if config.selected_modules: + self.sync = cartography.sync.build_sync(config.selected_modules) # AWS config if config.aws_requested_syncs: @@ -463,44 +553,41 @@ def main(self, argv: str) -> int: parse_and_validate_aws_requested_syncs(config.aws_requested_syncs) # Azure config + config.azure_client_secret = None if config.azure_sp_auth and config.azure_client_secret_env_var: logger.debug( "Reading Client Secret for Azure Service Principal Authentication from environment variable %s", config.azure_client_secret_env_var, ) config.azure_client_secret = os.environ.get(config.azure_client_secret_env_var) - else: - config.azure_client_secret = None # Okta config + config.okta_api_key = None if config.okta_org_id and config.okta_api_key_env_var: logger.debug(f"Reading API key for Okta from environment variable {config.okta_api_key_env_var}") config.okta_api_key = os.environ.get(config.okta_api_key_env_var) - else: - config.okta_api_key = None # CRXcavator config + config.crxcavator_api_key = None if config.crxcavator_api_base_uri and config.crxcavator_api_key_env_var: logger.debug(f"Reading API key for CRXcavator from env variable {config.crxcavator_api_key_env_var}.") config.crxcavator_api_key = os.environ.get(config.crxcavator_api_key_env_var) - else: - config.crxcavator_api_key = None # GitHub config + config.github_config = None if config.github_config_env_var: logger.debug(f"Reading config string for GitHub from environment variable {config.github_config_env_var}") config.github_config = os.environ.get(config.github_config_env_var) - else: - config.github_config = None # DigitalOcean config + config.digitalocean_token = None if config.digitalocean_token_env_var: logger.debug(f"Reading token for DigitalOcean from env variable {config.digitalocean_token_env_var}") config.digitalocean_token = os.environ.get(config.digitalocean_token_env_var) - else: - config.digitalocean_token = None # Jamf config + config.jamf_user = None + config.jamf_password = None if config.jamf_base_uri: if config.jamf_user: config.jamf_password = None @@ -515,9 +602,6 @@ def main(self, argv: str) -> int: logger.warning("A Jamf base URI was provided but a user was not.") if not config.jamf_password: logger.warning("A Jamf password could not be found.") - else: - config.jamf_user = None - config.jamf_password = None if config.statsd_enabled: logger.debug( @@ -526,44 +610,34 @@ def main(self, argv: str) -> int: ) # Pagerduty config + config.pagerduty_api_key = None if config.pagerduty_api_key_env_var: logger.debug(f"Reading API key for PagerDuty from environment variable {config.pagerduty_api_key_env_var}") config.pagerduty_api_key = os.environ.get(config.pagerduty_api_key_env_var) - else: - config.pagerduty_api_key = None # Crowdstrike config + config.crowdstrike_client_id = None if config.crowdstrike_client_id_env_var: logger.debug( f"Reading API key for Crowdstrike from environment variable {config.crowdstrike_client_id_env_var}", ) config.crowdstrike_client_id = os.environ.get(config.crowdstrike_client_id_env_var) - else: - config.crowdstrike_client_id = None + config.crowdstrike_client_secret = None if config.crowdstrike_client_secret_env_var: logger.debug( f"Reading API key for Crowdstrike from environment variable {config.crowdstrike_client_secret_env_var}", ) config.crowdstrike_client_secret = os.environ.get(config.crowdstrike_client_secret_env_var) - else: - config.crowdstrike_client_secret = None # GSuite config + config.github_config = None if config.gsuite_tokens_env_var: logger.debug(f"Reading config string for GSuite from environment variable {config.gsuite_tokens_env_var}") config.gsuite_config = os.environ.get(config.gsuite_tokens_env_var) - else: - config.github_config = None - - # Run cartography - try: - return cartography.sync.run_with_config(self.sync, config) - except KeyboardInterrupt: - return cartography.util.STATUS_KEYBOARD_INTERRUPT -def main(argv=None): +def main(argv: Optional[List[str]] = None): """ Entrypoint for the default cartography command line interface. @@ -577,5 +651,4 @@ def main(argv=None): logging.getLogger('googleapiclient').setLevel(logging.WARNING) logging.getLogger('neo4j').setLevel(logging.WARNING) argv = argv if argv is not None else sys.argv[1:] - default_sync = cartography.sync.build_default_sync() - sys.exit(CLI(default_sync, prog='cartography').main(argv)) + sys.exit(CLI(prog='cartography').main(argv)) diff --git a/cartography/config.py b/cartography/config.py index 7eb809906..62d28f92c 100644 --- a/cartography/config.py +++ b/cartography/config.py @@ -1,4 +1,6 @@ class Config: + # TODO reorder the params + # TODO add comments for the new attributes """ A common interface for cartography configuration. @@ -19,6 +21,8 @@ class Config: :param neo4j_database: The name of the database in Neo4j to connect to. If not specified, uses your Neo4j database settings to infer which database is set to default. See https://neo4j.com/docs/api/python-driver/4.4/api.html#database. Optional. + :type selected_modules: str + :param selected_modules: Comma-separated list of cartography top-level modules to sync. Optional. :type update_tag: int :param update_tag: Update tag for a cartography sync run. Optional. :type aws_sync_all_profiles: bool @@ -85,6 +89,8 @@ class Config: :param gsuite_auth_method: Auth method (delegated, oauth) used for Google Workspace. Optional. :type gsuite_config: str :param gsuite_config: Base64 encoded config object or config file path for Google Workspace. Optional. + :type config: str + :param gsuite_config: Path to cartography.yaml config file. Optional. """ def __init__( @@ -94,6 +100,7 @@ def __init__( neo4j_password=None, neo4j_max_connection_lifetime=None, neo4j_database=None, + selected_modules=None, update_tag=None, aws_sync_all_profiles=False, aws_best_effort_mode=False, @@ -102,20 +109,26 @@ def __init__( azure_tenant_id=None, azure_client_id=None, azure_client_secret=None, + azure_client_secret_env_var=None, aws_requested_syncs=None, analysis_job_directory=None, crxcavator_api_base_uri=None, crxcavator_api_key=None, + crxcavator_api_key_env_var=None, oci_sync_all_profiles=None, okta_org_id=None, okta_api_key=None, okta_saml_role_regex=None, + okta_api_key_env_var=None, github_config=None, + github_config_env_var=None, digitalocean_token=None, + digitalocean_token_env_var=None, permission_relationships_file=None, jamf_base_uri=None, jamf_user=None, jamf_password=None, + jamf_password_env_var=None, k8s_kubeconfig=None, statsd_enabled=False, statsd_prefix=None, @@ -123,19 +136,31 @@ def __init__( statsd_port=None, pagerduty_api_key=None, pagerduty_request_timeout=None, + pagerduty_api_key_env_var=None, nist_cve_url=None, cve_enabled=False, crowdstrike_client_id=None, crowdstrike_client_secret=None, crowdstrike_api_url=None, + crowdstrike_client_id_env_var=None, + crowdstrike_client_secret_env_var=None, gsuite_auth_method=None, gsuite_config=None, + gsuite_tokens_env_var=None, + config=None, + verbose=None, + quiet=None, + neo4j_password_prompt=None, + neo4j_password_env_var=None, ): self.neo4j_uri = neo4j_uri self.neo4j_user = neo4j_user self.neo4j_password = neo4j_password + self.neo4j_password_prompt = neo4j_password_prompt + self.neo4j_password_env_var = neo4j_password_env_var self.neo4j_max_connection_lifetime = neo4j_max_connection_lifetime self.neo4j_database = neo4j_database + self.selected_modules = selected_modules self.update_tag = update_tag self.aws_sync_all_profiles = aws_sync_all_profiles self.aws_best_effort_mode = aws_best_effort_mode @@ -144,20 +169,26 @@ def __init__( self.azure_tenant_id = azure_tenant_id self.azure_client_id = azure_client_id self.azure_client_secret = azure_client_secret + self.azure_client_secret_env_var = azure_client_secret_env_var self.aws_requested_syncs = aws_requested_syncs self.analysis_job_directory = analysis_job_directory self.crxcavator_api_base_uri = crxcavator_api_base_uri self.crxcavator_api_key = crxcavator_api_key + self.crxcavator_api_key_env_var = crxcavator_api_key_env_var self.oci_sync_all_profiles = oci_sync_all_profiles self.okta_org_id = okta_org_id self.okta_api_key = okta_api_key + self.okta_api_key_env_var = okta_api_key_env_var, self.okta_saml_role_regex = okta_saml_role_regex self.github_config = github_config + self.github_config_env_var = github_config_env_var self.digitalocean_token = digitalocean_token + self.digitalocean_token_env_var = digitalocean_token_env_var self.permission_relationships_file = permission_relationships_file self.jamf_base_uri = jamf_base_uri self.jamf_user = jamf_user self.jamf_password = jamf_password + self.jamf_password_env_var = jamf_password_env_var self.k8s_kubeconfig = k8s_kubeconfig self.statsd_enabled = statsd_enabled self.statsd_prefix = statsd_prefix @@ -165,10 +196,17 @@ def __init__( self.statsd_port = statsd_port self.pagerduty_api_key = pagerduty_api_key self.pagerduty_request_timeout = pagerduty_request_timeout + self.pagerduty_api_key_env_var = pagerduty_api_key_env_var self.nist_cve_url = nist_cve_url self.cve_enabled = cve_enabled self.crowdstrike_client_id = crowdstrike_client_id self.crowdstrike_client_secret = crowdstrike_client_secret self.crowdstrike_api_url = crowdstrike_api_url + self.crowdstrike_client_id_env_var = crowdstrike_client_id_env_var + self.crowdstrike_client_secret_env_var = crowdstrike_client_secret_env_var self.gsuite_auth_method = gsuite_auth_method self.gsuite_config = gsuite_config + self.gsuite_tokens_env_var = gsuite_tokens_env_var + self.config = config + self.verbose = verbose + self.quiet = quiet diff --git a/cartography/sync.py b/cartography/sync.py index 4ac02593c..b417bb286 100644 --- a/cartography/sync.py +++ b/cartography/sync.py @@ -33,6 +33,24 @@ logger = logging.getLogger(__name__) +TOP_LEVEL_MODULES = OrderedDict({ # preserve order so that the default sync always runs `analysis` at the very end + 'create-indexes': cartography.intel.create_indexes.run, + 'aws': cartography.intel.aws.start_aws_ingestion, + 'azure': cartography.intel.azure.start_azure_ingestion, + 'crowdstrike': cartography.intel.crowdstrike.start_crowdstrike_ingestion, + 'gcp': cartography.intel.gcp.start_gcp_ingestion, + 'gsuite': cartography.intel.gsuite.start_gsuite_ingestion, + 'crxcavator': cartography.intel.crxcavator.start_extension_ingestion, + 'cve': cartography.intel.cve.start_cve_ingestion, + 'oci': cartography.intel.oci.start_oci_ingestion, + 'okta': cartography.intel.okta.start_okta_ingestion, + 'github': cartography.intel.github.start_github_ingestion, + 'digitalocean': cartography.intel.digitalocean.start_digitalocean_ingestion, + 'kubernetes': cartography.intel.kubernetes.start_k8s_ingestion, + 'analysis': cartography.intel.analysis.run, +}) + + class Sync: """ A cartography sync task. @@ -172,19 +190,42 @@ def build_default_sync() -> Sync: """ sync = Sync() sync.add_stages([ - ('create-indexes', cartography.intel.create_indexes.run), - ('aws', cartography.intel.aws.start_aws_ingestion), - ('azure', cartography.intel.azure.start_azure_ingestion), - ('crowdstrike', cartography.intel.crowdstrike.start_crowdstrike_ingestion), - ('gcp', cartography.intel.gcp.start_gcp_ingestion), - ('gsuite', cartography.intel.gsuite.start_gsuite_ingestion), - ('crxcavator', cartography.intel.crxcavator.start_extension_ingestion), - ('cve', cartography.intel.cve.start_cve_ingestion), - ('oci', cartography.intel.oci.start_oci_ingestion), - ('okta', cartography.intel.okta.start_okta_ingestion), - ('github', cartography.intel.github.start_github_ingestion), - ('digitalocean', cartography.intel.digitalocean.start_digitalocean_ingestion), - ('kubernetes', cartography.intel.kubernetes.start_k8s_ingestion), - ('analysis', cartography.intel.analysis.run), + (stage_name, stage_func) for stage_name, stage_func in TOP_LEVEL_MODULES.items() ]) return sync + + +def parse_and_validate_selected_modules(selected_modules: str) -> List[str]: + """ + Ensures that user-selected modules passed through the CLI are valid and parses them to a list of str. + :param selected_modules: comma separated string of module names provided by user + :return: A validated list of module names that we will run + """ + validated_modules: List[str] = [] + for module in selected_modules.split(','): + module = module.strip() + + if module in TOP_LEVEL_MODULES.keys(): + validated_modules.append(module) + else: + valid_modules = ', '.join(TOP_LEVEL_MODULES.keys()) + raise ValueError( + f'Error parsing `selected_modules`. You specified "{selected_modules}". ' + f'Please check that your string is formatted properly. ' + f'Example valid input looks like "aws,gcp,analysis" or "azure, oci, crowdstrike". ' + f'Our full list of valid values is: {valid_modules}.', + ) + return validated_modules + + +def build_sync(selected_modules_as_str: str) -> Sync: + """ + Returns a cartography sync object where all the sync stages are from the user-specified comma separated list of + modules to run. + """ + selected_modules = parse_and_validate_selected_modules(selected_modules_as_str) + sync = Sync() + sync.add_stages( + [(sync_name, TOP_LEVEL_MODULES[sync_name]) for sync_name in selected_modules], + ) + return sync diff --git a/setup.cfg b/setup.cfg index 312318941..2b93ff264 100644 --- a/setup.cfg +++ b/setup.cfg @@ -68,4 +68,4 @@ allow_redefinition = true ignore_errors = true [coverage:report] -fail_under = 30 +fail_under = 40 diff --git a/tests/data/test_cartography_conf.yaml b/tests/data/test_cartography_conf.yaml new file mode 100644 index 000000000..8139986a0 --- /dev/null +++ b/tests/data/test_cartography_conf.yaml @@ -0,0 +1,45 @@ +analysis_job_directory: null +aws_best_effort_mode: false +aws_requested_syncs: null +aws_sync_all_profiles: false +azure_client_id: null +azure_client_secret_env_var: null +azure_sp_auth: false +azure_sync_all_subscriptions: false +azure_tenant_id: null +crowdstrike_api_url: null +crowdstrike_client_id_env_var: null +crowdstrike_client_secret_env_var: null +crxcavator_api_base_uri: https://api.crxcavator.io/v1 +crxcavator_api_key_env_var: null +cve_enabled: false +digitalocean_token_env_var: null +github_config_env_var: null +gsuite_auth_method: delegated +gsuite_tokens_env_var: GSUITE_GOOGLE_APPLICATION_CREDENTIALS +jamf_base_uri: null +jamf_password_env_var: null +jamf_user: null +k8s_kubeconfig: null +neo4j_database: null +neo4j_max_connection_lifetime: 3600 +neo4j_password_env_var: null +neo4j_password_prompt: false +neo4j_uri: bolt://localhost:7687 +neo4j_user: null +nist_cve_url: https://nvd.nist.gov/feeds/json/cve/1.1 +oci_sync_all_profiles: false +okta_api_key_env_var: null +okta_org_id: null +okta_saml_role_regex: ^aws\#\S+\#(?{{role}}[\w\-]+)\#(?{{accountid}}\d+)$ +pagerduty_api_key_env_var: null +pagerduty_request_timeout: null +permission_relationships_file: cartography/data/permission_relationships.yaml +quiet: false +selected_modules: aws +statsd_enabled: false +statsd_host: 127.0.0.1 +statsd_port: 8125 +statsd_prefix: '' +update_tag: null +verbose: false diff --git a/tests/integration/cartography/test_cli.py b/tests/integration/cartography/test_cli.py index 260575744..0c42d65fb 100644 --- a/tests/integration/cartography/test_cli.py +++ b/tests/integration/cartography/test_cli.py @@ -1,11 +1,37 @@ -import unittest.mock +from unittest.mock import MagicMock +from unittest.mock import patch import cartography.cli +from cartography.cli import CLI from tests.integration import settings def test_cli(): - sync = unittest.mock.MagicMock() - cli = cartography.cli.CLI(sync, 'test') - cli.main(["--neo4j-uri", settings.get("NEO4J_URL")]) + """ + Simulate running `cartography --neo4j-uri URI sync` and ensure the sync gets run. + """ + sync = MagicMock() + cli = CLI(sync, 'test') + cli.main(["--neo4j-uri", settings.get("NEO4J_URL"), "sync"]) sync.run.assert_called_once() + + +@patch.object(cartography.cli, 'run_with_config', return_value=0) +def test_cli_load_yaml(mock_run_with_config: MagicMock): + """ + Simulate running `cartography --config tests/data/test_cartography_conf.yaml` and ensure that the sync starts. + """ + argv = [ + "--config", + "tests/data/test_cartography_conf.yaml", + "sync", + ] + + # Act + CLI(prog='cartography').main(argv) + + # Assert + mock_run_with_config.assert_called() + + +# TODO test that the okta_saml_role_regex reaches the AWS module as expected diff --git a/tests/unit/cartography/test_cli.py b/tests/unit/cartography/test_cli.py new file mode 100644 index 000000000..735a0cb1b --- /dev/null +++ b/tests/unit/cartography/test_cli.py @@ -0,0 +1,49 @@ +from unittest.mock import MagicMock +from unittest.mock import patch + +import cartography.sync +from cartography.cli import CLI + + +def test_cli_selected_modules(): + """ + Test that we correctly parse the --selected-modules arg + """ + # Arrange + argv = [ + "sync", + "--selected-modules", + "aws", + ] + + # Act + cli = CLI(prog='cartography') + + # Assert that the argparser created by the CLI knows that we want to run the aws module + parsed_args = cli.parser.parse_args(argv) + assert parsed_args.selected_modules == 'aws' + # TODO - remove this when ready; this is an easy way to hook in and get yaml for copy pasta + # + # args = vars(parsed_args) + # config = dump(args) + # + # + # assert False + + +@patch.object(cartography.cli, 'run_with_config', return_value=0) +def test_cli_main(mock_run_with_config: MagicMock): + """ + Test that processing a cartography Config object with CLI.main() works. + """ + argv = [ + "sync", + "--selected-modules", + "aws", + ] + + # Act + CLI(prog='cartography').main(argv) + + # Assert + mock_run_with_config.assert_called() diff --git a/tests/unit/cartography/test_sync.py b/tests/unit/cartography/test_sync.py new file mode 100644 index 000000000..3b8b0b436 --- /dev/null +++ b/tests/unit/cartography/test_sync.py @@ -0,0 +1,51 @@ +import pytest + +from cartography.sync import build_default_sync +from cartography.sync import build_sync +from cartography.sync import parse_and_validate_selected_modules +from cartography.sync import TOP_LEVEL_MODULES + + +def test_build_default_sync(): + sync = build_default_sync() + # Use list because order matters + assert [name for name in sync._stages.keys()] == list(TOP_LEVEL_MODULES.keys()) + + +def test_build_sync(): + # Arrange + selected_modules = 'aws, gcp, analysis' + + # Act + sync = build_sync(selected_modules) + + # Assert + assert [name for name in sync._stages.keys()] == selected_modules.split(', ') + + +# TODO - this test enforces that we put analysis at the end. idk if we want to own this logic though. +# def test_build_sync_out_of_order_args(): +# # Arrange +# selected_modules = 'analysis,aws,gcp' +# +# # Act +# sync = build_sync(selected_modules) +# +# # Assert +# assert [name for name in sync._stages.keys()] == ['aws', 'gcp', 'analysis'] + + +def test_parse_and_validate_selected_modules(): + no_spaces = "aws,gcp,oci,analysis" + assert parse_and_validate_selected_modules(no_spaces) == ['aws', 'gcp', 'oci', 'analysis'] + + mismatch_spaces = 'gcp, oci,analysis' + assert parse_and_validate_selected_modules(mismatch_spaces) == ['gcp', 'oci', 'analysis'] + + sync_that_does_not_exist = 'gcp, thisdoesnotexist, aws' + with pytest.raises(ValueError): + parse_and_validate_selected_modules(sync_that_does_not_exist) + + absolute_garbage = '#@$@#RDFFHKjsdfkjsd,KDFJHW#@,' + with pytest.raises(ValueError): + parse_and_validate_selected_modules(absolute_garbage)