SpaceNetChallenge · dlindenbaum · Nov 5, 2018 · Nov 5, 2018 · Nov 5, 2018 · Nov 5, 2018
diff --git a/.idea/dictionaries/dlindenbaum.xml b/.idea/dictionaries/dlindenbaum.xml
diff --git a/.travis.yml b/.travis.yml
@@ -19,10 +19,16 @@ install:
   - conda update -q conda
   # Useful for debugging any issues with conda
   - conda info -a
-  - conda create --yes -n ml-export python=$TRAVIS_PYTHON_VERSION pip
+  - conda create --yes -n ml-export python=$TRAVIS_PYTHON_VERSION pip=18.1
   - source activate ml-export
-  - conda install -c conda-forge rtree
-  - pip install -q -e .[test]
+  - conda install -c conda-forge rtree pytest opencv
+  - conda info -a
+  - pip install -e .[test]
+  - conda info -a
+  - conda list
 # command to run tests
 script:
-  - pytest # or py.test for Python versions 3.5 and below
+  - source activate ml-export & pytest --cov=./ #--log-level=INFO #--cov=./# or py.test for Python versions 3.5 and below
+  - codecov
+
+
diff --git a/Docker/Dockerfile b/Docker/Dockerfile
@@ -0,0 +1,11 @@
+# Using the official tensorflow serving image from docker hub as base image
+FROM developmentseed/looking-glass:latest
+
+# Installing NGINX, used to rever proxy the predictions from SageMaker to TF Serving
+RUN apt-get update && apt-get install -y --no-install-recommends nginx git
+
+# Copy NGINX configuration to the container
+COPY nginx.conf /etc/nginx/nginx.conf
+
+# starts NGINX and TF serving pointing to our model
+ENTRYPOINT service nginx start | /usr/bin/tf_serving_entrypoint.sh
diff --git a/Docker/nginx.conf b/Docker/nginx.conf
@@ -0,0 +1,26 @@
+events {
+    # determines how many requests can simultaneously be served
+    # https://www.digitalocean.com/community/tutorials/how-to-optimize-nginx-configuration
+    # for more information
+    worker_connections 2048;
+}
+
+http {
+
+   client_max_body_size 50M;
+  server {
+    # configures the server to listen to the port 8080
+    listen 8080 deferred;
+
+    # redirects requests from SageMaker to TF Serving
+    location /invocations {
+      proxy_pass http://localhost:8501/v1/models/looking_glass_export:predict;
+    }
+
+    # Used my SageMaker to confirm if server is alive.
+    location /ping {
+      return 200 "OK";
+    }
+  }
+}
+
diff --git a/README.MD b/README.MD
@@ -1,4 +1,5 @@
 # Creating an ML-Export Tool 
+[![Build Status](https://travis-ci.com/SpaceNetChallenge/ml-export-tool.svg?branch=dev)](https://travis-ci.com/SpaceNetChallenge/ml-export-tool)
 
 ## User Story
 
@@ -12,16 +13,45 @@ A user would like to perform machine learning against an area.  They provide an
     3. Output formats for result.  
 
 
-## Export End Points
+# Interface End Points
 
+### GET
 1. TMS
 2. Vector Tiles
 3. GeoJson
 4. Cloud Optimized GeoTiff
 
+### Push:
+1. New ML Prediction
+2. New STAC-ITEM 
 
 
 
+## Storage Layer:
+We will use the [Spatial Temporal Access Catalog Spec](https://github.com/radiantearth/stac-spec) for Storage Documentation 9
+STAC-ITEMS:  Each machine learning output will be stored as an STAC ITEM.  Binary masks can be stored as a Cloud Optimized GeoTiff to encourage easy processing
+
+    *   STAC-ITEMS can be added to Catalog Collections based on TaskID.  
+    STAC-ITEMS produced by the ML Service should have at least 3 assets:
+
+        *  results_cog
+        *  results_cog_binary
+        *  results_cog_geojson
+
+    *   STAC-ITEMs for other apis can be documnted
+        * [AI for Earth](https://github.com/Microsoft/AIforEarth-API-Development/blob/master/Quickstart.md)
+
+
+
+### Why STAC:
+*   STAC is an industry initiative to create a overarching, cloud native searchable catalog of geospatial data.  
+*   Machine Learning outputs in mapping situations are Spatial and Temporal Items.
+*   This will allow cumalitive machine learning results to queable and should allow flexibility as we add new types of data
+*   An example of the SpaceNet STAC-Browser can be found at [SpaceNet-STAC](https://spacenet-stac.netlify.com/)
+
+###
+
+
 ## Test items:
 
 Test Location 1:

diff --git a/ml_export/api/__init__.py b/ml_export/api/__init__.py
diff --git a/ml_export/ml_tools/__init__.py b/ml_export/ml_tools/__init__.py
@@ -0,0 +1,3 @@
+"""ml-tools"""
+
+__version__ = '0.1'
diff --git a/ml_export/ml_tools/mlbase.py b/ml_export/ml_tools/mlbase.py
@@ -0,0 +1,145 @@
+import logging
+import numpy as np
+import json
+import requests
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+
+class MLModel():
+
+    """MLModel test case.
+    The ml model base class should have 4 functions
+
+    init:
+    load_model_dict: This should Load The model into memory based on the provided model dictionary
+    predict:  This should receive a np array of 3 x 1024 x 1024 and return a numpy array of 1x1024x1024
+    predict_batch:  This should receive a list of np arrays of [np(3,1024,1024)] and return a list of [np(1,1024,1024]
+
+
+    model_dictionary = {'model_file': "test.hdf5",
+                "model_description": "Passthrough Model",
+                "model_version": "0.1",
+                "model_speed": 20,   # numpy arrays per second
+                                    }
+
+
+
+    """
+
+
+    def __init__(self, model_json_string, debug=False):
+        ''' Initialize model using a json string ID for the model.'''
+
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.DEBUG)
+        # Create the Handler for logging data to a file
+        logger_handler = logging.StreamHandler()
+        # Create a Formatter for formatting the log messages
+        logger_formatter = logging.Formatter(
+            '%(name)s - %(levelname)s - %(message)s')
+        # Add the Formatter to the Handler
+        logger_handler.setFormatter(logger_formatter)
+        # Add the Handler to the Logger
+        if debug:
+            logger.setLevel(logging.DEBUG)
+        else:
+            logger.setLevel(logging.INFO)
+        self.logger.addHandler(logger_handler)
+        ## Assign Model Dictionary
+        self.model_json = model_json_string
+        ## Load Model Into Memory
+        self.load_model_dict()
+
+    def estimate_time(self, tiles_length):
+        """Returns Completion estimate in Seconds"""
+        return self.model_dict['model_speed']* tiles_length
+
+    def load_model_dict(self):
+        self.model_dict = json.loads(self.model_json)
+
+    def predict(self, np_array):
+        # TODO: IMPLEMENT!
+        return np_array[None, 0, :, :]
+
+    def predict_batch(self, list_np_array):
+        # TODO: IMPLEMENT!
+        list_np_array_results = []
+        for np_array in list_np_array:
+            list_np_array_results.append(np_array[None,0, :, :])
+        return list_np_array_results
+
+
+class MLTFServing():
+    """MLTFServing model test case.
+
+    The ml model base class should have 4 functions
+
+    init:
+    load_model_dict: This should Load The model into memory based on the provided model dictionary
+    predict:  This should receive a np array of 3 x 1024 x 1024 and return a numpy array of 1x1024x1024
+    predict_batch:  This should receive a list of np arrays of [np(3,1024,1024)] and return a list of [np(1,1024,1024]
+
+
+    model_dictionary = {'model_file': "test.hdf5",
+                "model_description": "Passthrough Model",
+                "model_version": "0.1",
+                "model_speed": 20,   # numpy arrays per second
+                                    }
+    """
+
+
+    def __init__(self, api_location, output_num_channels=1, debug=False):
+        ''' Inititialize model '''
+
+        self.logger = logging.getLogger(__name__)
+        self.logger.setLevel(logging.DEBUG)
+        # Create the Handler for logging data to a file
+        logger_handler = logging.StreamHandler()
+        # Create a Formatter for formatting the log messages
+        logger_formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
+
+        # Add the Formatter to the Handler
+        logger_handler.setFormatter(logger_formatter)
+
+        # Add the Handler to the Logger
+
+        if debug:
+            logger.setLevel(logging.DEBUG)
+        else:
+            logger.setLevel(logging.INFO)
+
+        self.logger.addHandler(logger_handler)
+
+        # Assign Model Dictionary
+        self.predict_api_loc = api_location
+        self.num_channels = output_num_channels
+        self.model_speed = 1
+
+        # Load Model Into Memory
+        self.load_model_dict()
+
+    def estimate_time(self, tiles_length):
+        """Returns Completion estimate in Seconds"""
+        return self.model_speed * tiles_length
+
+    def load_model_dict(self):
+        # TODO: IMPLEMENT
+        return 0
+
+    def predict(self, np_array):
+        # TODO: IMPLEMENT
+        return np_array[None, 0, :, :]
+
+    def predict_batch(self, super_res_tile_batch):
+        inputs = np.moveaxis(super_res_tile_batch, 1, 3).astype(np.float32)/255
+        payload = {'inputs': inputs.tolist()}
+        # Send prediction request
+        r = requests.post(self.predict_api_loc,
+                          json=payload)
+        content = json.loads(r.content)
+        all_image_preds = np.asarray(content['outputs']).reshape(len(inputs),
+                                                                 256, 256)
+        all_image_preds = all_image_preds[:, np.newaxis, :, :]
+
+        return all_image_preds
diff --git a/ml_export/ml_tools/mlopencv.py b/ml_export/ml_tools/mlopencv.py
@@ -0,0 +1,54 @@
+## Note, for mac osx compatability import something from shapely.geometry before importing fiona or geopandas
+## https://github.com/Toblerity/Shapely/issues/553  * Import shapely before rasterio or fioana
+from shapely import geometry
+import mercantile
+from ml_export import tile_generator
+from torch.utils.data import Dataset
+
+
+def mlopencv(mlbase):
+
+    def __init__(self, model):
+            super().__init__()
+
+
+class OpenCVClassDataset(Dataset):
+
+        def __init__(self, root_tile_obj, raster_location,
+                     desired_zoom_level, super_res_zoom_level,
+                     cog=True,
+                     tile_size=256,
+                     indexes=None
+                     ):
+
+            self.root_tile_obj = root_tile_obj
+            self.desired_zoom_level = desired_zoom_level
+            self.super_res_zoom_level = super_res_zoom_level
+            self.raster_location = raster_location
+            self.cog = cog
+            self.tile_size = tile_size
+
+            if indexes is None:
+                self.indexes = [1, 2, 3]
+            else:
+                self.indexes = indexes
+
+            small_tile_object_list, small_tile_position_list = tile_generator.create_super_tile_list(root_tile_obj,
+                                                                                                     desired_zoom_level=desired_zoom_level)
+            self.small_tile_object_list = small_tile_object_list
+            self.small_tile_position_list = small_tile_position_list  # this isn't used anywhere?
+
+        def __len__(self):
+            return len(self.small_tile_object_list)
+
+        def __getitem__(self, idx):
+            super_res_tile = tile_generator.create_super_tile_image(
+                self.small_tile_object_list[idx],
+                self.raster_location,
+                desired_zoom_level=self.super_res_zoom_level,
+                indexes=self.indexes,
+                tile_size=self.tile_size,
+                cog=self.cog
+                )
+            return super_res_tile, mercantile.xy_bounds(
+                *self.small_tile_object_list[idx])
diff --git a/ml_export/postprocessing/__init__.py b/ml_export/postprocessing/__init__.py
@@ -0,0 +1,45 @@
+import geopandas as gpd
+import rasterio
+import rasterio.features
+import rasterio.warp
+
+
+def create_geojson(raster_name, geojson_name, threshold=0.5):
+    """Binarize and vectorize a raster file.
+
+    This function takes an image mask with float values between ``0`` and ``1``
+    and converts it to a binary mask, which it then polygonizes. Use the
+    `threshold` argument to set the mimimum pixel intensity value that will
+    be included in the vectorized polygon outputs.
+
+    Arguments
+    ---------
+    raster_name : str
+        Path to the raster mask file to vectorize.
+    geojson_name : str
+        Desired output path for the geojson file.
+    threshold : float, optional
+        Minimum pixel intensity to include in the vectorized polygons. Defaults
+        to ``0.5``.
+
+    """
+    geomList = []
+    with rasterio.open(raster_name) as dataset:
+        # Read the dataset's valid data mask as a ndarray.
+        data = dataset.read()
+        data[data >= threshold] = 1
+        data[data < threshold] = 0
+        mask = data == 1
+        # Extract feature shapes and values from the array.
+        for geom, val in rasterio.features.shapes(data, mask=mask,
+                                                  transform=dataset.transform):
+            # Transform shapes from the dataset's own coordinate
+            # reference system to CRS84 (EPSG:4326).
+            geom = rasterio.warp.transform_geom(
+                dataset.crs, 'EPSG:4326', geom, precision=6)
+            geomList.append(geom)
+    gdf = gpd.GeoDataFrame(geometry=geomList)
+    gdf.crs = {'init': 'epsg:4326'}
+    gdf.to_file(geojson_name, driver="GeoJSON")
+
+    return geojson_name
diff --git a/ml_export/stac_tools/__init__.py b/ml_export/stac_tools/__init__.py
diff --git a/ml_export/stac_tools/stac_catalog_base.py b/ml_export/stac_tools/stac_catalog_base.py
diff --git a/ml_export/stac_tools/stac_collection_base.py b/ml_export/stac_tools/stac_collection_base.py
@@ -0,0 +1,16 @@
+
+
+
+def create_stac_collection(stac_version="0.6.0",
+                           id='taskm01',
+                           title='Tasking Manager',
+                           keywords = 'Machine-Learning, Remote-Sensing, computervision, ml',
+                           version=0.1,
+                           licesnse= 'CC-BY-SA-4.0',
+                           providers= 'SpaceNet',
+                           extent={},
+                           temporal="null"):
+
+
+
+    pass
diff --git a/ml_export/stac_tools/stac_creator.py b/ml_export/stac_tools/stac_creator.py