microfaune · W-Alphonse · Oct 14, 2020 · Oct 14, 2020 · Oct 14, 2020 · Oct 14, 2020
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,11 @@
+.DS_Store
+.ipynb_checkpoints/
+.idea/
+*.iml
+
+**/build
+**/__pycache__
+/.idea/
+**/*.gitignore
+/dist
+**/*.egg-info
diff --git a/README.md b/README.md
@@ -10,6 +10,13 @@
 * Go to the folder *microfaune_package*
 * Run the command `pipenv run pip install .`
 
+### Generate a distribution and upload it to Pypi
+
+* Go to the folder *microfaune_ai_package*
+* Run the commands<br/> 
+Generate the distribution `python3 setup.py sdist bdist_wheel`<br/>
+Upload the distribution `twine upload dist/*`
+
 ## Usage
 
 Can be used as any package python:

diff --git a/microfaune/data/model_weights_tf2-20200912_173814.h5 b/microfaune/data/model_weights_tf2-20200912_173814.h5
diff --git a/microfaune/detection.py b/microfaune/detection.py
@@ -4,13 +4,14 @@
 
 from tensorflow import keras
 from tensorflow.keras import layers
-from tensorflow.math import reduce_max
+from tensorflow import math
 
-from .audio import load_wav, create_spec
+
+from microfaune.audio import load_wav, create_spec
 
 RNN_WEIGHTS_FILE = os.path.abspath(
     os.path.join(os.path.dirname(__file__),
-                 "data/model_weights-20190919_220113.h5"))
+                 "data/model_weights_tf2-20200912_173814.h5"))
 
 
 class RNNDetector:
@@ -65,15 +66,15 @@ def create_model(self):
         x = layers.ReLU()(x)
         x = layers.MaxPool2D((1, 2))(x)
 
-        x = reduce_max(x, axis=-2)
+        x = math.reduce_max(x, axis=-2)
 
         x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x)
         x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x)
 
         x = layers.TimeDistributed(layers.Dense(64, activation="sigmoid"))(x)
         local_pred = layers.TimeDistributed(
             layers.Dense(1, activation="sigmoid"))(x)
-        pred = reduce_max(local_pred, axis=-2)
+        pred = math.reduce_max(local_pred, axis=-2)
         return keras.Model(inputs=spec, outputs=[pred, local_pred])
 
     def compute_features(self, audio_signals):
@@ -144,3 +145,9 @@ def predict(self, X):
     def free_mem(self):
         """Release GPU memory."""
         self._model = None
+
+# if __name__ == '__main__' :
+#     detector = RNNDetector()
+#     global_score, local_score = detector.predict_on_wav(os.path.abspath(os.path.join(os.path.dirname(__file__), "media/SWIFT_20190723_050006.wav"))) # NB: Check that loaded wav file actually exists on your disk
+#     print(f"Golbal score: {global_score}  -  Localscore: {local_score}")
+
diff --git a/microfaune/detection_validation.py b/microfaune/detection_validation.py
@@ -0,0 +1,48 @@
+import os
+from collections import Counter
+from pathlib import Path
+
+from detection import RNNDetector
+from domain.track import Track
+from utils import misc_utils
+
+class RNNDetectorValidator:
+
+    def __init__(self, detector:RNNDetector):
+        self.detector = detector
+
+    def computeMetricsAgainstAnnotatedDirectory(self, directory_path :str) -> Counter:
+        print(f'Computing metrics for files in directory: {directory_path}')
+        metrics_dir = Counter()
+        entries = Path(directory_path)
+        for entry in entries.iterdir():
+            metrics_dir += self.computeMetricsAgainstAnnotatedDirectory(os.path.join(directory_path, entry.name)) \
+                           if entry.is_dir() else self.computeMetricsAgainstAnnotatedFile(f'{directory_path}/{entry.name}')
+        return metrics_dir
+
+    def computeMetricsAgainstAnnotatedFile(self, json_file_path :str) -> Counter:
+        media_file_annotation = self._load_json_annotation_file(json_file_path)
+        return self._compute_metrics_of_prediction_against_annotation(media_file_annotation)
+
+    def _load_json_annotation_file(self, json_file_path :str) -> dict :
+        return misc_utils.read_json_file(json_file_path)
+
+    def _compute_metrics_of_prediction_against_annotation(self, media_file_annotation:dict) -> Counter:
+        track = Track()
+        metrics_counter = []
+        metrics_counter += map(lambda track_elmt : track.compute_metrics_of_prediction_against_annotation(track_elmt) ,
+                               media_file_annotation.get("tracks"))
+        return misc_utils.convert_counter_collection_to_counter(metrics_counter)
+
+
+
+# if __name__ == '__main__' :
+#     detector = RNNDetector()
+#     validator = RNNDetectorValidator(detector)
+#     # metrics = validator.computeMetricsAgainstAnnotatedFile( os.path.abspath(os.path.join(os.path.dirname(__file__), "media-annotation/SWIFT_20000101_022052.json")) )
+#     metrics = validator.computeMetricsAgainstAnnotatedDirectory( os.path.abspath(os.path.join(os.path.dirname(__file__), "media-annotation")) )
+#     print(f'Accuracy  : {misc_utils.getAccuracy(metrics)}')
+#     print(f'Precision : {misc_utils.getPrecision(metrics)}')
+#     print(f'Recall    : {misc_utils.getRecall(metrics)}')
+#     print(f'F1        : {misc_utils.getF1(metrics)}')
+#     print(f'Total METRICS : {metrics}')
diff --git a/microfaune/domain/__init__.py b/microfaune/domain/__init__.py
diff --git a/microfaune/domain/track.py b/microfaune/domain/track.py
@@ -0,0 +1,141 @@
+from functools import reduce
+from collections import Counter
+from utils import misc_utils as util
+import operator
+import numpy as np
+
+# PREDICTION_SAMPLE_WIDTH_IN_MS = 20
+
+class Track:
+
+    # "tracks": [
+    #     {
+    #         "id": 47,
+    #         "name": "SWIFT_20000101_022052.wav",
+    #         "file": "/media/SWIFT_20000101_022052.wav",
+    #         "format": "wav",
+    #         "project_id": 1,
+    #         "duration": 60.0,
+    #         "prediction": "",
+    #         "annotation_set": [
+    #             {
+    #                 "id": 1,
+    #                 "track_id": 47,
+    #                 "value":"[
+    #                   {\"id\":\"wavesurfer_4kgitqcktig\",\"start\":4.935061842665718,\"end\":10.509955195406347,\"annotation\":\"\"},
+    #                   {\"id\":\"wavesurfer_afb13jpasm8\",\"start\":17.55982033205593,\"end\":22.95971703246838,\"annotation\":\"\"},
+    #                   {\"id\":\"wavesurfer_jdu8bguik4\",\"start\":26.334652470226157,\"end\":30.184578821446145,\"annotation\":\"\"}
+    #                 ]",
+    #           "user_id": 1,
+    #           "reviewed": false,
+    #           "reviewed_by_id": null,
+    #           "date_time": "2020-10-17T19:12:13.800Z",
+    #           "username": "admin",
+    #           "reviewer": null
+    #           }
+    #         ] --> Fin annotation_set
+    #     }, --> Fin du TrackElmt
+    # ] --> Fin du Tracks
+
+
+    ############################
+    # Compute positive indexes #
+    ############################
+    def map_annotation_set_to_prediction_ndxes(self, track_elmt:dict) -> [(int,int)] :
+        '''
+        :param track_elmt: An element of 'tracks'
+        :return: An array of (start,end) excerpt corresponding to the indexes in Prediction structure of Tack[]
+        '''
+        prediction_ndxes_of_annotation_set_elmt = []
+        prediction_ndxes_of_annotation_set_elmt +=  map(lambda annotation_set_elmt :
+            self._map_annotation_set_elmt_to_prediction_ndxes(annotation_set_elmt.get("value"), track_elmt.get("duration"), len(track_elmt.get("prediction")) ) ,
+                                                              track_elmt.get("annotation_set"))
+        prediction_ndxes_of_annotation_set_elmt = reduce(operator.concat, prediction_ndxes_of_annotation_set_elmt, [])
+        return prediction_ndxes_of_annotation_set_elmt
+
+    def _map_annotation_set_elmt_to_prediction_ndxes(self, value_list:[dict], track_duration:float, track_predictions_count:int ) -> [(int,int)]:
+        '''
+        :param value_list: track.annotation_set.value[]
+        :param track_duration: tracks.duration
+        :param track_predictions_count: tracks.prediction.length
+        :return: An array of (start,end) excerpt corresponding to the indexes in Prediction structure of a particular Tack
+        '''
+        prediction_ndxes_of_value_elmt = []
+        prediction_ndxes_of_value_elmt += map(lambda value : self._convert_from_annonation_elmt_time_to_prediction_order(value, track_duration, track_predictions_count),
+                                              value_list)
+        return prediction_ndxes_of_value_elmt
+
+
+    def _convert_from_annonation_elmt_time_to_prediction_order(self, value:dict, track_duration:float, track_predictions_count:int ) -> (int,int):
+        '''
+                Formula of a tuple (start,end) excerpt corresponding to the indexes in Prediction structure of a particular track.value
+                track_duration(60 sec) ---represente-par--> track.prediction.length (2814)
+                start                  ------------------->  ?
+        :param value: track.value
+        :param track_duration: tracks.duration
+        :param track_predictions_count: tracks.prediction.length
+        :return: An tuple (start,end) excerpt corresponding to the indexes in Prediction structure of a particular track.value
+        '''
+        return (  int(value.get("start") * track_predictions_count // track_duration) ,
+                  int(value.get("end") * track_predictions_count // track_duration)  )
+
+
+    ####################
+    # Compute Metrics  #
+    ####################
+    def compute_metrics_of_prediction_against_annotation(self, track_elmt:dict) -> Counter :
+        print(f'**Track** id:{track_elmt.get("id")} / name:{track_elmt.get("name")} / file:{track_elmt.get("file")} / duration:{track_elmt.get("duration")}')
+        positive_tuples_ndexes = self.map_annotation_set_to_prediction_ndxes(track_elmt)
+        return self.compute_track_elmt_metrics(positive_tuples_ndexes, track_elmt.get("prediction"))
+
+    def compute_track_elmt_metrics(self, positive_tuples_ndexes : [(int, int)], predictions:[]) -> Counter :
+        '''
+        :param positive_tuples_ndexes: list of positive annotated tuple indexes
+        :param predictions: list of predictions made by the model
+        :return: Counter representing the confusion matrix metrics TP / TN / FP / FN
+        '''
+        positive_tuples_ndexes.sort(key= lambda tuple: tuple[0])
+        # 1 - Compute TP and FN
+        counter_tpfn = self._do_compute_track_elmt_metrics(positive_tuples_ndexes , predictions, 'tp_and_fn')
+        # 2 - Compute 'negative_tuples_ndexes' used to compute TN and FFP
+        negative_tuples_ndexes = self._compute_negative_tuples_ndexes(positive_tuples_ndexes, predictions)
+        # 3 - Compute FP and TN
+        counter_fptn = self._do_compute_track_elmt_metrics(negative_tuples_ndexes , predictions, 'fp_and_tn')
+        # 4 - Return the Confusion Matrix element
+        metrics =  Counter({'TP': util.ifNone(counter_tpfn.get(1.0), 0.0), 'FN': util.ifNone(counter_tpfn.get(0.0), 0.0),
+                            'FP': util.ifNone(counter_fptn.get(1.0), 0.0), 'TN': util.ifNone(counter_fptn.get(0.0), 0.0) })
+        print(f'\t{metrics}')
+        return metrics
+
+    def _do_compute_track_elmt_metrics(self, positive_or_negative_tuples_ndexes : [(int, int)], predictions:[], n_p_desc : str) -> Counter :
+        p_and_n = list(map(lambda positive_or_negative_tuple_ndexes: self._compute_metrics_according_to_ground_truth(positive_or_negative_tuple_ndexes, predictions),
+                             positive_or_negative_tuples_ndexes))
+        counter_positive_negative = util.convert_counter_collection_to_counter(p_and_n)
+        # print(f'{n_p_desc} : {p_and_n}\ncounter_{n_p_desc} : {counter_positive_negative}\n')
+        return counter_positive_negative
+
+
+    def _compute_metrics_according_to_ground_truth(self, tuple_ndexes:(int, int), predictions:[]) -> Counter : #() :
+        '''
+        :param tuple_ndexes: TRUE positive (or) TRUE negative tuple index, according to the ground of truth 'tha annotation'
+        :param predictions: predictions according to the RNN model prediction
+        :return: In case of tuple_ndexes represents TRUE positive => Counter dictionary { 0:x , 1:y} holding the number of FN (the 0) and TP (the 1)
+                 In case of tuple_ndexes represents TRUE negative => Counter dictionary { 0:x , 1:y} holding the number of TN (the 0) and FP (the 1)
+        '''
+        return Counter(np.floor(np.dot(np.array(predictions[tuple_ndexes[0]:tuple_ndexes[1] + 1:], np.float32), 2)) )
+
+
+    def _compute_negative_tuples_ndexes(self, positive_tuples_ndexes : [(int, int)], predictions:[]) -> [(int, int)] :
+        negative_tuples_ndexes = []
+        # 1 - Set the 1st elmt of 'negative_tuples_ndexes'. It is likely before the 1st elmt of 'positive_tuples_ndexes' BUT not sure
+        if positive_tuples_ndexes[0][0] != 0 :
+            negative_tuples_ndexes.append( (0, positive_tuples_ndexes[0][0]-1) )
+        # 2 - Initialize the remaining element of 'negative_tuples_ndexes' BUT beware how initalizing the last elmt
+        for i in range(0, len(positive_tuples_ndexes)) :
+            negative_tuples_ndexes.append( (positive_tuples_ndexes[i][1]+1,
+                                            positive_tuples_ndexes[i+1][0]-1  if i < len(positive_tuples_ndexes) - 1
+                                            else len(predictions) - 1 ) )
+        # 3 - Check that the last index did't exceed the prediction size
+        if negative_tuples_ndexes[-1][0] > len(predictions) - 1 :
+            negative_tuples_ndexes.pop()
+        return negative_tuples_ndexes
diff --git a/microfaune/labeling.py b/microfaune/labeling.py
@@ -7,22 +7,23 @@
 from microfaune import audio, plot
 
 
-def read_json_file(json_file_path):
-    """ Read json file with labels.
-
-                Parameters
-                ----------
-                json_file_path : str
-                    Path of json file.
-
-                Returns:
-                -------
-                data_dict : list
-                    List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation'
-    """
-    with open(json_file_path) as json_data:
-        data_dict = json.load(json_data)
-    return data_dict
+# def read_json_file(json_file_path):
+#     """ Read json file with labels.
+#
+#                 Parameters
+#                 ----------
+#                 json_file_path : str
+#                     Path of json file.
+#
+#                 Returns:
+#                 -------
+#                 data_dict : list
+#                     List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation'
+#     """
+#     with open(json_file_path) as json_data:
+#         data_dict = json.load(json_data)
+#     return data_dict
+from utils import misc_utils
 
 
 def number_labels(json_file_path):
@@ -38,7 +39,7 @@ def number_labels(json_file_path):
             nb_labels : int
                 Number of labels in json file
         """
-    data_dict = read_json_file(json_file_path)
+    data_dict = misc_utils.read_json_file(json_file_path)
     nb_labels = len(data_dict)
     return nb_labels
 
@@ -62,7 +63,7 @@ def prop_labeled(json_file_path, audio_file_path):
     fs, data = audio.load_audio(audio_file_path)
     total_duration = len(data) / fs
 
-    data_dict = read_json_file(json_file_path)
+    data_dict = misc_utils.read_json_file(json_file_path)
 
     bird_song_duration = 0
 
@@ -90,7 +91,7 @@ def charac_function_audio(json_file_path, audio_file_path):
     fs, data = audio.load_audio(audio_file_path)
     charac_func = np.zeros((len(data), 1))
 
-    data_dict = read_json_file(json_file_path)
+    data_dict = misc_utils.read_json_file(json_file_path)
 
     for label in data_dict:
         indx_start = int(label['start'] * fs)
@@ -248,7 +249,7 @@ def extract_labels(json_path, start_time, duration):
             labels: list
                 List of labelson the audio extract, each label is a dictionary with keys 'id', 'start', 'end' and 'annotation'
         """
-    data_dict = read_json_file(json_path)
+    data_dict = misc_utils.read_json_file(json_path)
     labels = []
 
     for label in data_dict:

diff --git a/microfaune/media-annotation/SWIFT_20000101_022052.json b/microfaune/media-annotation/SWIFT_20000101_022052.json
diff --git a/microfaune/media-annotation/tmp1/SWIFT_20000101_122052.json b/microfaune/media-annotation/tmp1/SWIFT_20000101_122052.json
diff --git a/microfaune/media/SWIFT_20190723_050006.wav b/microfaune/media/SWIFT_20190723_050006.wav
diff --git a/microfaune/utils/__init__.py b/microfaune/utils/__init__.py
diff --git a/microfaune/utils/misc_utils.py b/microfaune/utils/misc_utils.py
@@ -0,0 +1,42 @@
+
+import json
+from collections import Counter
+
+
+def read_json_file(json_file_path:str) -> dict:
+    """ Read json file with labels.
+
+                Parameters
+                ----------
+                json_file_path : str
+                    Path of json file.
+
+                Returns:
+                -------
+                data_dict : list
+                    List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation'
+    """
+    with open(json_file_path) as json_data:
+        data_dict = json.load(json_data)
+    return data_dict
+
+def convert_counter_collection_to_counter(counter_collection:[]) -> Counter :
+    counter_aggregation = Counter()
+    for elmt in counter_collection :
+        counter_aggregation += Counter(elmt)
+    return counter_aggregation
+
+def ifNone(valueToCheck, defaultValue ) :
+    return defaultValue if valueToCheck is None else valueToCheck
+
+def getAccuracy(metrics: Counter) -> float:
+    return (metrics.get('TP') + metrics.get('TN')) / (metrics.get('TP') + metrics.get('TN') + metrics.get('FP') + metrics.get('FN'))
+
+def getPrecision(metrics: Counter) -> float:
+    return metrics.get('TP') / (metrics.get('TP') + metrics.get('FP'))
+
+def getRecall(metrics: Counter) -> float:
+    return metrics.get('TP') / (metrics.get('TP') + metrics.get('FN'))
+
+def getF1(metrics: Counter) -> float:
+    return 2 * getPrecision(metrics) * getRecall(metrics) / (getPrecision(metrics) + getRecall(metrics))