Merge pull request #8 from UPstartDeveloper/code-quality

UPstartDeveloper · web-flow · commit c81f7d2cd5e3 · 2021-09-02T11:58:37.000-04:00
Improve code documentation.
diff --git a/app.yaml b/app.yaml
@@ -1,3 +1,4 @@
+# Use this if you choose to deploy on the GCP App Engine!
 runtime: custom
 env: flex
 service: default
diff --git a/app/config.yaml b/app/config.yaml
@@ -1,5 +1,5 @@
 resize_shape: [256, 256]
 targets: ['Fire_Images']
-model_paths: ["fire_classifier_params.h5", "fire_classifier_layers.json"]
+model_file_paths: ["fire_classifier_params.h5", "fire_classifier_layers.json"]
 base_model_url: "https://github.com/UPstartDeveloper/Fire-Detection-API/releases/download/v0.0.2"
 model_sha256: "26f32ae0666bbb83e11968935db0ec2ab06623d1"
diff --git a/app/main.py b/app/main.py
@@ -1,18 +1,18 @@
 from fastapi import FastAPI, File, UploadFile
-
 from fire_classifier.predictor import ImagePredictor
 
+# init API
 app = FastAPI(
     title="Fire Detection API",
     description="Informs the probability that an image contains fire.",
 )
 
+# init ML inference object
 predictor_config_path = "./app/config.yaml"
-
-predictor = ImagePredictor.init_from_config_url(predictor_config_path)
+predictor = ImagePredictor.init_from_config_path(predictor_config_path)
 
 
 @app.post("/classify-image/")
 def create_upload_file(file: UploadFile = File(...)):
-    """Predicts the possibility that a RBG image contains fire."""
+    '''Predicts the possibility that a RBG image contains fire.'''
     return predictor.predict_from_file(file.file)
diff --git a/fire_classifier/predictor.py b/fire_classifier/predictor.py
@@ -1,68 +1,48 @@
 import argparse
 
 import numpy as np
-from typing import List
+from typing import Dict
 import yaml
 
-from fire_classifier.preprocessing_utilities import (
-    read_img_from_path,
-    read_from_file,
-)
-from fire_classifier.utils import download_model, load_model
+from fire_classifier.preprocessing_utilities import read_from_file
+from fire_classifier.utils import load_model
 
 
 class ImagePredictor:
-    def __init__(
-        self, model_paths: List[str], resize_size: List[int], 
-        base_download_url: str, targets: List[str]
-    ):
-        self.model_paths = model_paths
-        self.resize_size = resize_size
-        self.model = load_model(base_download_url, self.model_paths)
-        self.targets = targets
+    def __init__(self, config: Dict[str, int or str]):
+        self.model_paths = config["model_file_paths"]
+        self.resize_size = config["resize_shape"]
+        self.model = load_model(
+            config["base_model_url"], self.model_paths, config["model_sha256"]
+        )
+        self.targets = config["targets"]
 
     @classmethod
     def init_from_config_path(cls, config_path):
+        '''Parses the config file, and instantiates a new ImagePredictor'''
         # load details for setting up the model
         with open(config_path, "r") as f:
             config = yaml.load(f, yaml.SafeLoader)
-        # use the config data, to integrate the model into the new object
-        predictor = cls(
-            model_paths=config["model_paths"],
-            resize_size=config["resize_shape"],
-            base_download_url=config["base_model_url"],
-            targets=config["targets"],
-        )
+        # use the config data to integrate the model into the new instance
+        predictor = cls(config)
         return predictor
 
-    @classmethod
-    def init_from_config_url(cls, config_path):
-        # with open(config_path, "r") as f:
-        #     config = yaml.load(f, yaml.SafeLoader)
-
-        # download_model(
-        #     config["model_file_urls"], config["model_paths"], config["model_sha256"]
-        # )
-
-        return cls.init_from_config_path(config_path)
-
-    def predict_from_array(self, arr):
+    def predict_from_array(self, arr) -> Dict[str, float]:
+        '''Returns a prediction value the sample belongs to each class.'''
         pred = self.model.predict(arr[np.newaxis, ...]).ravel().tolist()
-        pred = [round(x, 3) for x in pred]
-        return {k: v for k, v in zip(self.targets, pred)}
-
-    def predict_from_path(self, path):
-        arr = read_img_from_path(path)
-        return self.predict_from_array(arr)
+        pred = [round(x, 3) for x in pred]  # values between 0-1
+        return {class_label: prob for class_label, prob in zip(self.targets, pred)}
 
     def predict_from_file(self, file_object):
+        '''Converts uploaded image to a NumPy array and classifies it.'''
         arr = read_from_file(file_object)
         return self.predict_from_array(arr)
 
 
 if __name__ == "__main__":
     """
-    python predictor.py --predictor_config "../example/predictor_config.yaml"
+    Test out the predictor class via the CLI:
+        python predictor.py --predictor_config "../example/predictor_config.yaml"
 
     """
     parser = argparse.ArgumentParser()
diff --git a/fire_classifier/preprocessing_utilities.py b/fire_classifier/preprocessing_utilities.py
@@ -2,12 +2,20 @@
 import numpy as np
 
 
-def read_img_from_path(path):
-    img = cv2.imread(path, cv2.IMREAD_COLOR)
-    return img
+def read_from_file(file_object):
+    """
+    Produces a 3D array representing a color image.
 
+    NumPy creates a new 1D array from the file object,
+    and then using OpenCV we convert it to the proper 3D array
+    that the model can run inference on.
 
-def read_from_file(file_object):
+    Args:
+        file_object(fastapi.UploadFile): the uploaded image
+
+    Returns:
+        img_np: array-like object
+    """
     arr = np.fromstring(file_object.read(), np.uint8)
     img_np = cv2.imdecode(arr, cv2.IMREAD_COLOR)
 
diff --git a/fire_classifier/utils.py b/fire_classifier/utils.py
@@ -4,6 +4,18 @@
 
 
 def get_hash(filename):
+    """
+    Computes the SHA256 hash of a given file.
+
+    This can then be used to ensure the model file(s) downloaded
+    in this codebase are not corrupted.
+
+    Args:
+        filename(str): the name of the file
+
+    Returns:
+        bytes-like object
+    """
     sha256_hash = hashlib.sha256()
     with open(filename, "rb") as f:
         for byte_block in iter(lambda: f.read(4096), b""):
@@ -13,32 +25,77 @@ def get_hash(filename):
 
 
 def download_model(url, file_paths, file_sha256=None):
-    params_file, layers_file = file_paths
-    params_url, layers_url = (
-        f"{url}/{params_file}",
-        f"{url}/{layers_file}"
-    )
-    if (os.path.exists(params_file) and os.path.exists(layers_file)
-        # and get_hash(layers_file) == file_sha256
-    ):
-        print("File already exists")
-    else:  # download the model
-        keras.utils.get_file(
-            origin=layers_url, fname=layers_file,
-            cache_dir='.', cache_subdir="./model"
-        )
-        keras.utils.get_file(
-            origin=params_url, fname=params_file,
-            cache_dir='.', cache_subdir="./model"
-        )
-
-def load_model(url, file_paths):
-    '''Model reconstruction using H5 + JSON'''
+    """
+    Downloads the model files in memory.
+
+    This will first check if the files are already present,
+    and not corrupted, before downloading from the address
+    specified in config.yaml.
+
+    Args:
+        url(str): the base url where the files are located
+        file_paths(List[str]): collection of all the files needed to
+                               eventually load the model
+        file_sha256(str): the supposed hash of one of the files
+                          we need to download. Checked against the
+                          one we may already have in the codebase.
+
+    Returns:
+        None
+    """
+    # Download only the model files that are needed
+    for model_file_path in file_paths:
+        if os.path.exists(model_file_path):
+            if get_hash(model_file_path) == file_sha256:
+                print(f"File already exists: {model_file_path}")
+        else:  # need to download the model
+            model_file_url = f"{url}/{model_file_path}"
+            keras.utils.get_file(
+                origin=model_file_url, fname=model_file_path, 
+                cache_dir=".", cache_subdir="./model"
+            )
+
+
+def load_model(url, file_paths, file_sha256=None, format='composite'):
+    """
+    Model reconstruction.
+
+    This will first load the model in memory using the given files
+    and save format
+
+    Args:
+        url(str): the base url where the files are located
+        file_paths(List[str]): collection of all the files needed to
+                               eventually load the model
+        file_sha256(str): the supposed hash of one of the files
+                          we need to download. Checked against the
+                          one we may already have in the codebase.
+        format(str): currently this only supports 'composite' 
+                     (which is for when the model is saved using a H5 + JSON)
+                     or 'h5' as the save format of the model.
+
+    Returns:
+        keras.Model object
+    """
+
+    def _model_from_composite_format():
+        '''Specific to using H5 + JSON as the save format'''
+        params_file, layers_file = file_paths
+        # load the model in memory
+        with open(f"./model/{layers_file}") as f:
+            model = keras.models.model_from_json(f.read())  # build the layers
+            model.load_weights(f"./model/{params_file}")  # load weights + biases
+        return model
+
+    def _model_from_h5():
+        '''Specific to using a single Hadoop(H5) file'''
+        params_file = file_paths[0]
+        return keras.models.load_model(params_file)
+    
     # First download the model, if needed
-    download_model(url, file_paths)
-    params_file, layers_file = file_paths
-    # Model reconstruction
-    with open(f"./model/{layers_file}") as f:
-        model = keras.models.model_from_json(f.read())
-        model.load_weights(f"./model/{params_file}")
-    return model
+    download_model(url, file_paths, file_sha256)
+    # load the model in memory
+    if format == 'composite':
+        return _model_from_composite_format()
+    else:  # assuming a single H5
+        return _model_from_h5()
diff --git a/setup.py b/setup.py
@@ -11,6 +11,7 @@
 except:
     REQUIRED = []
 
+# Use this if you plan to turn this project into a PyPI package!
 setup(
     name="YOUR_API_NAMME",
     version="0.1.0",

Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+# Use this if you choose to deploy on the GCP App Engine!`
`1`	`2`	`runtime: custom`
`2`	`3`	`env: flex`
`3`	`4`	`service: default`