From 669e1300a488e5afafee56fd25b8f4e89ffadf7a Mon Sep 17 00:00:00 2001
From: Theodore Vasiloudis <thvasilo@amazon.com>
Date: Wed, 7 Oct 2020 11:23:35 -0700
Subject: [PATCH] Update README and data location.

---
 README.md                                     |  6 ++--
 ...detection-sagemaker-notebook-instance.yaml |  9 +++---
 source/env_setup.py                           |  7 ++++-
 source/notebooks/src/package/config.py        | 28 +++++++++----------
 4 files changed, 28 insertions(+), 22 deletions(-)
diff --git a/README.md b/README.md
index 65aa888..c78d13f 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 With businesses moving online, fraud and abuse in online systems is constantly increasing as well. Traditionally, rule-based fraud detection systems are used to combat online fraud, but these rely on a static set of rules created by human experts. This project uses machine learning to create models for fraud detection that are dynamic, self-improving and maintainable. Importantly, they can scale with the online business.
 
-Specifically, we show how to use Amazon SageMaker to train supervised and unsupervised machine learning models on historical transactions, so that they can predict the likelihood of incoming transactions being fraudulent or not. We also show how to deploy the models, once trained, to a REST API that can be integrated into an existing business software infracture. This project includes a demonstration of this process using a public, anonymized credit card transactions [dataset provided by ULB](https://www.kaggle.com/mlg-ulb/creditcardfraud), but can be easily modified to work with custom labelled or unlaballed data provided as a relational table in csv format.
+Specifically, we show how to use Amazon SageMaker to train supervised and unsupervised machine learning models on historical transactions, so that they can predict the likelihood of incoming transactions being fraudulent or not. We also show how to deploy the models, once trained, to a REST API that can be integrated into an existing business software infrastructure. This project includes a demonstration of this process using a public, anonymized credit card transactions [dataset provided by ULB](https://www.kaggle.com/mlg-ulb/creditcardfraud), but can be easily modified to work with custom labelled or unlaballed data provided as a relational table in csv format.
 
 ## Getting Started
 
@@ -10,8 +10,8 @@ To get started quickly, use the following quick-launch link to launch a CloudFor
 
 | Region | Stack |
 | ---- | ---- |
-|US East (N. Virginia) | [<img src="https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png">](https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-1.s3-us-east-1.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) |
-|US East (Ohio) | [<img src="https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png">](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-2.s3-us-east-2.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) |
+|US East (N. Virginia) | [<img src="https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png">](https://us-east-1.console.aws.amazon.com/cloudformation/home?region=us-east-1#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-1.s3.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) |
+|US East (Ohio) | [<img src="https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png">](https://us-east-2.console.aws.amazon.com/cloudformation/home?region=us-east-2#/stacks/create/review?templateURL=https://sagemaker-solutions-us-east-2.s3.us-east-2.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) |
 |US West (Oregon) | [<img src="https://s3.amazonaws.com/cloudformation-examples/cloudformation-launch-stack.png">](https://us-west-2.console.aws.amazon.com/cloudformation/home?region=us-west-2#/stacks/create/review?templateURL=https://sagemaker-solutions-us-west-2.s3-us-west-2.amazonaws.com/Fraud-detection-using-machine-learning/deployment/fraud-detection-using-machine-learning.yaml&stackName=SageMaker-Fraud-Machine-Learning) |
 
 
diff --git a/deployment/fraud-detection-sagemaker-notebook-instance.yaml b/deployment/fraud-detection-sagemaker-notebook-instance.yaml
index be3e6da..17f2114 100644
--- a/deployment/fraud-detection-sagemaker-notebook-instance.yaml
+++ b/deployment/fraud-detection-sagemaker-notebook-instance.yaml
@@ -54,8 +54,9 @@ Resources:
                 sudo -u ec2-user -i <<EOF
                 cd /home/ec2-user/SageMaker
                 # copy source files
-                aws s3 sync s3://${SolutionsS3BucketNamePrefix}-${AWS::Region}/${SolutionName}/ .
-                unzip creditcardfraud.zip -d ./source/notebooks/
+                aws s3 sync s3://${SolutionsS3BucketNamePrefix}-${AWS::Region}/${SolutionName}/source .
+                unzip ./creditcardfraud.zip -d ./notebooks/
+                rm ./creditcardfraud.zip
                 # create stack_outputs.json with stack resources that are required in notebook(s)
                 touch stack_outputs.json
                 echo '{' >> stack_outputs.json
@@ -80,9 +81,9 @@ Resources:
               set -e
               # perform following actions as ec2-user
               sudo -u ec2-user -i <<EOF
-              /home/ec2-user/anaconda3/envs/python3/bin/python /home/ec2-user/SageMaker/source/env_setup.py --force --log-level DEBUG
+              /home/ec2-user/anaconda3/envs/python3/bin/python /home/ec2-user/SageMaker/env_setup.py --force --log-level DEBUG
               EOF
 Outputs:
   SageMakerNotebook:
     Description: "Opens the Jupyter notebook to get started with model training"
-    Value: !Sub "https://${SolutionPrefix}-notebook-instance.notebook.${AWS::Region}.sagemaker.aws/notebooks/source/notebooks/sagemaker_fraud_detection.ipynb"
+    Value: !Sub "https://${SolutionPrefix}-notebook-instance.notebook.${AWS::Region}.sagemaker.aws/notebooks/notebooks/sagemaker_fraud_detection.ipynb"
diff --git a/source/env_setup.py b/source/env_setup.py
index 49cde2e..0170161 100644
--- a/source/env_setup.py
+++ b/source/env_setup.py
@@ -4,6 +4,7 @@
 import subprocess
 import logging
 import sys
+from zipfile import ZipFile
 
 CURRENT_FILE = Path(__file__).resolve()
 CURRENT_FOLDER = CURRENT_FILE.parent
@@ -15,7 +16,7 @@
 # Common setup
 
 def get_sagemaker_mode() -> str:
-    stack_outputs_file = Path(CURRENT_FOLDER.parent, 'stack_outputs.json')
+    stack_outputs_file = Path(CURRENT_FOLDER, 'stack_outputs.json')
     with open(stack_outputs_file) as f:
         outputs = json.load(f)
     sagemaker_mode = outputs['SagemakerMode']
@@ -167,6 +168,10 @@ def env_setup_notebook_instance() -> None:
 def env_setup_studio() -> None:
     logging.info('Starting environment setup for Studio.')
     py_exec = get_executable()
+    logging.info('Extracting data.')
+    with ZipFile(f"{CURRENT_FOLDER}/creditcardfraud.zip", 'r') as zf:
+        zf.extractall(path=f"{CURRENT_FOLDER}/notebooks")
+
     logging.info('Upgrading pip packages.')
     bash(f"""
     export PIP_DISABLE_PIP_VERSION_CHECK=1
diff --git a/source/notebooks/src/package/config.py b/source/notebooks/src/package/config.py
index ed7b5ae..939f882 100644
--- a/source/notebooks/src/package/config.py
+++ b/source/notebooks/src/package/config.py
@@ -1,21 +1,21 @@
-from dotenv import load_dotenv
-import os
+import json
 from pathlib import Path
 
 from package import utils
 
 current_folder = utils.get_current_folder(globals())
-env_location = '../../../../.env'
-dotenv_filepath = Path(current_folder, env_location).resolve()
-assert dotenv_filepath.exists(), "Could not find .env file at {}".format(str(dotenv_filepath))
+cfn_stack_outputs_filepath = Path(current_folder, '../../../stack_outputs.json').resolve()
+assert cfn_stack_outputs_filepath.exists(), "Could not find stack_outputs.json file at {}".format(
+    str(cfn_stack_outputs_filepath))
 
-load_dotenv()
+with open(cfn_stack_outputs_filepath) as f:
+    cfn_stack_outputs = json.load(f)
 
-STACK_NAME = os.environ['FRAUD_STACK_NAME']
-AWS_ACCOUNT_ID = os.environ['AWS_ACCOUNT_ID']
-AWS_REGION = os.environ['AWS_REGION']
-SAGEMAKER_IAM_ROLE = os.environ['SAGEMAKER_IAM_ROLE']
-SOLUTIONS_S3_BUCKET = os.environ['SOLUTIONS_S3_BUCKET']
-
-MODEL_DATA_S3_BUCKET = os.environ['MODEL_DATA_S3_BUCKET']
-REST_API_GATEWAY = os.environ['REST_API_GATEWAY']
+STACK_NAME = cfn_stack_outputs['FraudStackName']
+SOLUTION_PREFIX = cfn_stack_outputs['SolutionPrefix']
+AWS_ACCOUNT_ID = cfn_stack_outputs['AwsAccountId']
+AWS_REGION = cfn_stack_outputs['AwsRegion']
+SAGEMAKER_IAM_ROLE = cfn_stack_outputs['IamRole']
+MODEL_DATA_S3_BUCKET = cfn_stack_outputs['ModelDataBucket']
+SOLUTIONS_S3_BUCKET = cfn_stack_outputs['SolutionsS3Bucket']
+REST_API_GATEWAY = cfn_stack_outputs['RESTAPIGateway']