From 669e1300a488e5afafee56fd25b8f4e89ffadf7a Mon Sep 17 00:00:00 2001 From: Theodore Vasiloudis Date: Wed, 7 Oct 2020 11:23:35 -0700 Subject: [PATCH] Update README and data location. --- README.md | 6 ++-- ...detection-sagemaker-notebook-instance.yaml | 9 +++--- source/env_setup.py | 7 ++++- source/notebooks/src/package/config.py | 28 +++++++++---------- 4 files changed, 28 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 65aa888..c78d13f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ With businesses moving online, fraud and abuse in online systems is constantly increasing as well. Traditionally, rule-based fraud detection systems are used to combat online fraud, but these rely on a static set of rules created by human experts. This project uses machine learning to create models for fraud detection that are dynamic, self-improving and maintainable. Importantly, they can scale with the online business. -Specifically, we show how to use Amazon SageMaker to train supervised and unsupervised machine learning models on historical transactions, so that they can predict the likelihood of incoming transactions being fraudulent or not. We also show how to deploy the models, once trained, to a REST API that can be integrated into an existing business software infracture. This project includes a demonstration of this process using a public, anonymized credit card transactions [dataset provided by ULB](https://www.kaggle.com/mlg-ulb/creditcardfraud), but can be easily modified to work with custom labelled or unlaballed data provided as a relational table in csv format. +Specifically, we show how to use Amazon SageMaker to train supervised and unsupervised machine learning models on historical transactions, so that they can predict the likelihood of incoming transactions being fraudulent or not. We also show how to deploy the models, once trained, to a REST API that can be integrated into an existing business software infrastructure. This project includes a demonstration of this process using a public, anonymized credit card transactions [dataset provided by ULB](https://www.kaggle.com/mlg-ulb/creditcardfraud), but can be easily modified to work with custom labelled or unlaballed data provided as a relational table in csv format. ## Getting Started @@ -10,8 +10,8 @@ To get started quickly, use the following quick-launch link to launch a CloudFor | Region | Stack | | ---- | ---- | -|US East (N. Virginia) | [](https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-1.s3-us-east-1.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) | -|US East (Ohio) | [](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-2.s3-us-east-2.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) | +|US East (N. Virginia) | [](https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-1.s3.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) | +|US East (Ohio) | [](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-2.s3.us-east-2.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) | |US West (Oregon) | [](https://us-west-2.console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/create/review?templateURL=https://sagemaker-solutions-us-west-2.s3-us-west-2.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) | diff --git a/deployment/fraud-detection-sagemaker-notebook-instance.yaml b/deployment/fraud-detection-sagemaker-notebook-instance.yaml index be3e6da..17f2114 100644 --- a/deployment/fraud-detection-sagemaker-notebook-instance.yaml +++ b/deployment/fraud-detection-sagemaker-notebook-instance.yaml @@ -54,8 +54,9 @@ Resources: sudo -u ec2-user -i <> stack_outputs.json @@ -80,9 +81,9 @@ Resources: set -e # perform following actions as ec2-user sudo -u ec2-user -i < str: - stack_outputs_file = Path(CURRENT_FOLDER.parent, 'stack_outputs.json') + stack_outputs_file = Path(CURRENT_FOLDER, 'stack_outputs.json') with open(stack_outputs_file) as f: outputs = json.load(f) sagemaker_mode = outputs['SagemakerMode'] @@ -167,6 +168,10 @@ def env_setup_notebook_instance() -> None: def env_setup_studio() -> None: logging.info('Starting environment setup for Studio.') py_exec = get_executable() + logging.info('Extracting data.') + with ZipFile(f"{CURRENT_FOLDER}/creditcardfraud.zip", 'r') as zf: + zf.extractall(path=f"{CURRENT_FOLDER}/notebooks") + logging.info('Upgrading pip packages.') bash(f""" export PIP_DISABLE_PIP_VERSION_CHECK=1 diff --git a/source/notebooks/src/package/config.py b/source/notebooks/src/package/config.py index ed7b5ae..939f882 100644 --- a/source/notebooks/src/package/config.py +++ b/source/notebooks/src/package/config.py @@ -1,21 +1,21 @@ -from dotenv import load_dotenv -import os +import json from pathlib import Path from package import utils current_folder = utils.get_current_folder(globals()) -env_location = '../../../../.env' -dotenv_filepath = Path(current_folder, env_location).resolve() -assert dotenv_filepath.exists(), "Could not find .env file at {}".format(str(dotenv_filepath)) +cfn_stack_outputs_filepath = Path(current_folder, '../../../stack_outputs.json').resolve() +assert cfn_stack_outputs_filepath.exists(), "Could not find stack_outputs.json file at {}".format( + str(cfn_stack_outputs_filepath)) -load_dotenv() +with open(cfn_stack_outputs_filepath) as f: + cfn_stack_outputs = json.load(f) -STACK_NAME = os.environ['FRAUD_STACK_NAME'] -AWS_ACCOUNT_ID = os.environ['AWS_ACCOUNT_ID'] -AWS_REGION = os.environ['AWS_REGION'] -SAGEMAKER_IAM_ROLE = os.environ['SAGEMAKER_IAM_ROLE'] -SOLUTIONS_S3_BUCKET = os.environ['SOLUTIONS_S3_BUCKET'] - -MODEL_DATA_S3_BUCKET = os.environ['MODEL_DATA_S3_BUCKET'] -REST_API_GATEWAY = os.environ['REST_API_GATEWAY'] +STACK_NAME = cfn_stack_outputs['FraudStackName'] +SOLUTION_PREFIX = cfn_stack_outputs['SolutionPrefix'] +AWS_ACCOUNT_ID = cfn_stack_outputs['AwsAccountId'] +AWS_REGION = cfn_stack_outputs['AwsRegion'] +SAGEMAKER_IAM_ROLE = cfn_stack_outputs['IamRole'] +MODEL_DATA_S3_BUCKET = cfn_stack_outputs['ModelDataBucket'] +SOLUTIONS_S3_BUCKET = cfn_stack_outputs['SolutionsS3Bucket'] +REST_API_GATEWAY = cfn_stack_outputs['RESTAPIGateway']