-
Notifications
You must be signed in to change notification settings - Fork 3
Requirements.txt using TensorFlow-2.3.0 + Use a training model TF-2.x compatible + Let setup.py be distribution aware #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
d738d00
581995a
f20dee4
6e65150
8bca9eb
a91d35c
63b279e
8fdc982
2cbe515
1ad9ab6
16ceb06
a76f4a3
837d8f1
776289a
cded9a5
2b51992
525d21a
d7308ab
4ebd680
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| .DS_Store | ||
| .ipynb_checkpoints/ | ||
| .idea/ | ||
| *.iml | ||
|
|
||
| **/build | ||
| **/__pycache__ | ||
| /.idea/ | ||
| **/*.gitignore | ||
| /dist | ||
| **/*.egg-info |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,13 +4,14 @@ | |
|
|
||
| from tensorflow import keras | ||
| from tensorflow.keras import layers | ||
| from tensorflow.math import reduce_max | ||
| from tensorflow import math | ||
|
|
||
| from .audio import load_wav, create_spec | ||
|
|
||
| from microfaune.audio import load_wav, create_spec | ||
|
|
||
| RNN_WEIGHTS_FILE = os.path.abspath( | ||
| os.path.join(os.path.dirname(__file__), | ||
| "data/model_weights-20190919_220113.h5")) | ||
| "data/model_weights_tf2-20200912_173814.h5")) | ||
|
|
||
|
|
||
| class RNNDetector: | ||
|
|
@@ -65,15 +66,15 @@ def create_model(self): | |
| x = layers.ReLU()(x) | ||
| x = layers.MaxPool2D((1, 2))(x) | ||
|
|
||
| x = reduce_max(x, axis=-2) | ||
| x = math.reduce_max(x, axis=-2) | ||
|
|
||
| x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x) | ||
| x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x) | ||
|
|
||
| x = layers.TimeDistributed(layers.Dense(64, activation="sigmoid"))(x) | ||
| local_pred = layers.TimeDistributed( | ||
| layers.Dense(1, activation="sigmoid"))(x) | ||
| pred = reduce_max(local_pred, axis=-2) | ||
| pred = math.reduce_max(local_pred, axis=-2) | ||
| return keras.Model(inputs=spec, outputs=[pred, local_pred]) | ||
|
|
||
| def compute_features(self, audio_signals): | ||
|
|
@@ -144,3 +145,9 @@ def predict(self, X): | |
| def free_mem(self): | ||
| """Release GPU memory.""" | ||
| self._model = None | ||
|
|
||
| # if __name__ == '__main__' : | ||
| # detector = RNNDetector() | ||
| # global_score, local_score = detector.predict_on_wav(os.path.abspath(os.path.join(os.path.dirname(__file__), "media/SWIFT_20190723_050006.wav"))) # NB: Check that loaded wav file actually exists on your disk | ||
| # print(f"Golbal score: {global_score} - Localscore: {local_score}") | ||
|
|
||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's try to avoid commented code. |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| import os | ||
| from collections import Counter | ||
| from pathlib import Path | ||
|
|
||
| from detection import RNNDetector | ||
| from domain.track import Track | ||
| from utils import misc_utils | ||
|
|
||
| class RNNDetectorValidator: | ||
|
|
||
| def __init__(self, detector:RNNDetector): | ||
| self.detector = detector | ||
|
|
||
| def computeMetricsAgainstAnnotatedDirectory(self, directory_path :str) -> Counter: | ||
| print(f'Computing metrics for files in directory: {directory_path}') | ||
| metrics_dir = Counter() | ||
| entries = Path(directory_path) | ||
| for entry in entries.iterdir(): | ||
| metrics_dir += self.computeMetricsAgainstAnnotatedDirectory(os.path.join(directory_path, entry.name)) \ | ||
| if entry.is_dir() else self.computeMetricsAgainstAnnotatedFile(f'{directory_path}/{entry.name}') | ||
| return metrics_dir | ||
|
|
||
| def computeMetricsAgainstAnnotatedFile(self, json_file_path :str) -> Counter: | ||
| media_file_annotation = self._load_json_annotation_file(json_file_path) | ||
| return self._compute_metrics_of_prediction_against_annotation(media_file_annotation) | ||
|
|
||
| def _load_json_annotation_file(self, json_file_path :str) -> dict : | ||
| return misc_utils.read_json_file(json_file_path) | ||
|
|
||
| def _compute_metrics_of_prediction_against_annotation(self, media_file_annotation:dict) -> Counter: | ||
| track = Track() | ||
| metrics_counter = [] | ||
| metrics_counter += map(lambda track_elmt : track.compute_metrics_of_prediction_against_annotation(track_elmt) , | ||
| media_file_annotation.get("tracks")) | ||
| return misc_utils.convert_counter_collection_to_counter(metrics_counter) | ||
|
|
||
|
|
||
|
|
||
| # if __name__ == '__main__' : | ||
| # detector = RNNDetector() | ||
| # validator = RNNDetectorValidator(detector) | ||
| # # metrics = validator.computeMetricsAgainstAnnotatedFile( os.path.abspath(os.path.join(os.path.dirname(__file__), "media-annotation/SWIFT_20000101_022052.json")) ) | ||
| # metrics = validator.computeMetricsAgainstAnnotatedDirectory( os.path.abspath(os.path.join(os.path.dirname(__file__), "media-annotation")) ) | ||
| # print(f'Accuracy : {misc_utils.getAccuracy(metrics)}') | ||
| # print(f'Precision : {misc_utils.getPrecision(metrics)}') | ||
| # print(f'Recall : {misc_utils.getRecall(metrics)}') | ||
| # print(f'F1 : {misc_utils.getF1(metrics)}') | ||
| # print(f'Total METRICS : {metrics}') | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the validation could be outside of the package for now. We can work on this in https://github.com/microfaune/microfaune |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,141 @@ | ||
| from functools import reduce | ||
| from collections import Counter | ||
| from utils import misc_utils as util | ||
| import operator | ||
| import numpy as np | ||
|
|
||
| # PREDICTION_SAMPLE_WIDTH_IN_MS = 20 | ||
|
|
||
| class Track: | ||
|
|
||
| # "tracks": [ | ||
| # { | ||
| # "id": 47, | ||
| # "name": "SWIFT_20000101_022052.wav", | ||
| # "file": "/media/SWIFT_20000101_022052.wav", | ||
| # "format": "wav", | ||
| # "project_id": 1, | ||
| # "duration": 60.0, | ||
| # "prediction": "", | ||
| # "annotation_set": [ | ||
| # { | ||
| # "id": 1, | ||
| # "track_id": 47, | ||
| # "value":"[ | ||
| # {\"id\":\"wavesurfer_4kgitqcktig\",\"start\":4.935061842665718,\"end\":10.509955195406347,\"annotation\":\"\"}, | ||
| # {\"id\":\"wavesurfer_afb13jpasm8\",\"start\":17.55982033205593,\"end\":22.95971703246838,\"annotation\":\"\"}, | ||
| # {\"id\":\"wavesurfer_jdu8bguik4\",\"start\":26.334652470226157,\"end\":30.184578821446145,\"annotation\":\"\"} | ||
| # ]", | ||
| # "user_id": 1, | ||
| # "reviewed": false, | ||
| # "reviewed_by_id": null, | ||
| # "date_time": "2020-10-17T19:12:13.800Z", | ||
| # "username": "admin", | ||
| # "reviewer": null | ||
| # } | ||
| # ] --> Fin annotation_set | ||
| # }, --> Fin du TrackElmt | ||
| # ] --> Fin du Tracks | ||
|
|
||
|
|
||
| ############################ | ||
| # Compute positive indexes # | ||
| ############################ | ||
| def map_annotation_set_to_prediction_ndxes(self, track_elmt:dict) -> [(int,int)] : | ||
| ''' | ||
| :param track_elmt: An element of 'tracks' | ||
| :return: An array of (start,end) excerpt corresponding to the indexes in Prediction structure of Tack[] | ||
| ''' | ||
| prediction_ndxes_of_annotation_set_elmt = [] | ||
| prediction_ndxes_of_annotation_set_elmt += map(lambda annotation_set_elmt : | ||
| self._map_annotation_set_elmt_to_prediction_ndxes(annotation_set_elmt.get("value"), track_elmt.get("duration"), len(track_elmt.get("prediction")) ) , | ||
| track_elmt.get("annotation_set")) | ||
| prediction_ndxes_of_annotation_set_elmt = reduce(operator.concat, prediction_ndxes_of_annotation_set_elmt, []) | ||
| return prediction_ndxes_of_annotation_set_elmt | ||
|
|
||
| def _map_annotation_set_elmt_to_prediction_ndxes(self, value_list:[dict], track_duration:float, track_predictions_count:int ) -> [(int,int)]: | ||
| ''' | ||
| :param value_list: track.annotation_set.value[] | ||
| :param track_duration: tracks.duration | ||
| :param track_predictions_count: tracks.prediction.length | ||
| :return: An array of (start,end) excerpt corresponding to the indexes in Prediction structure of a particular Tack | ||
| ''' | ||
| prediction_ndxes_of_value_elmt = [] | ||
| prediction_ndxes_of_value_elmt += map(lambda value : self._convert_from_annonation_elmt_time_to_prediction_order(value, track_duration, track_predictions_count), | ||
| value_list) | ||
| return prediction_ndxes_of_value_elmt | ||
|
|
||
|
|
||
| def _convert_from_annonation_elmt_time_to_prediction_order(self, value:dict, track_duration:float, track_predictions_count:int ) -> (int,int): | ||
| ''' | ||
| Formula of a tuple (start,end) excerpt corresponding to the indexes in Prediction structure of a particular track.value | ||
| track_duration(60 sec) ---represente-par--> track.prediction.length (2814) | ||
| start -------------------> ? | ||
| :param value: track.value | ||
| :param track_duration: tracks.duration | ||
| :param track_predictions_count: tracks.prediction.length | ||
| :return: An tuple (start,end) excerpt corresponding to the indexes in Prediction structure of a particular track.value | ||
| ''' | ||
| return ( int(value.get("start") * track_predictions_count // track_duration) , | ||
| int(value.get("end") * track_predictions_count // track_duration) ) | ||
|
|
||
|
|
||
| #################### | ||
| # Compute Metrics # | ||
| #################### | ||
| def compute_metrics_of_prediction_against_annotation(self, track_elmt:dict) -> Counter : | ||
| print(f'**Track** id:{track_elmt.get("id")} / name:{track_elmt.get("name")} / file:{track_elmt.get("file")} / duration:{track_elmt.get("duration")}') | ||
| positive_tuples_ndexes = self.map_annotation_set_to_prediction_ndxes(track_elmt) | ||
| return self.compute_track_elmt_metrics(positive_tuples_ndexes, track_elmt.get("prediction")) | ||
|
|
||
| def compute_track_elmt_metrics(self, positive_tuples_ndexes : [(int, int)], predictions:[]) -> Counter : | ||
| ''' | ||
| :param positive_tuples_ndexes: list of positive annotated tuple indexes | ||
| :param predictions: list of predictions made by the model | ||
| :return: Counter representing the confusion matrix metrics TP / TN / FP / FN | ||
| ''' | ||
| positive_tuples_ndexes.sort(key= lambda tuple: tuple[0]) | ||
| # 1 - Compute TP and FN | ||
| counter_tpfn = self._do_compute_track_elmt_metrics(positive_tuples_ndexes , predictions, 'tp_and_fn') | ||
| # 2 - Compute 'negative_tuples_ndexes' used to compute TN and FFP | ||
| negative_tuples_ndexes = self._compute_negative_tuples_ndexes(positive_tuples_ndexes, predictions) | ||
| # 3 - Compute FP and TN | ||
| counter_fptn = self._do_compute_track_elmt_metrics(negative_tuples_ndexes , predictions, 'fp_and_tn') | ||
| # 4 - Return the Confusion Matrix element | ||
| metrics = Counter({'TP': util.ifNone(counter_tpfn.get(1.0), 0.0), 'FN': util.ifNone(counter_tpfn.get(0.0), 0.0), | ||
| 'FP': util.ifNone(counter_fptn.get(1.0), 0.0), 'TN': util.ifNone(counter_fptn.get(0.0), 0.0) }) | ||
| print(f'\t{metrics}') | ||
| return metrics | ||
|
|
||
| def _do_compute_track_elmt_metrics(self, positive_or_negative_tuples_ndexes : [(int, int)], predictions:[], n_p_desc : str) -> Counter : | ||
| p_and_n = list(map(lambda positive_or_negative_tuple_ndexes: self._compute_metrics_according_to_ground_truth(positive_or_negative_tuple_ndexes, predictions), | ||
| positive_or_negative_tuples_ndexes)) | ||
| counter_positive_negative = util.convert_counter_collection_to_counter(p_and_n) | ||
| # print(f'{n_p_desc} : {p_and_n}\ncounter_{n_p_desc} : {counter_positive_negative}\n') | ||
| return counter_positive_negative | ||
|
|
||
|
|
||
| def _compute_metrics_according_to_ground_truth(self, tuple_ndexes:(int, int), predictions:[]) -> Counter : #() : | ||
| ''' | ||
| :param tuple_ndexes: TRUE positive (or) TRUE negative tuple index, according to the ground of truth 'tha annotation' | ||
| :param predictions: predictions according to the RNN model prediction | ||
| :return: In case of tuple_ndexes represents TRUE positive => Counter dictionary { 0:x , 1:y} holding the number of FN (the 0) and TP (the 1) | ||
| In case of tuple_ndexes represents TRUE negative => Counter dictionary { 0:x , 1:y} holding the number of TN (the 0) and FP (the 1) | ||
| ''' | ||
| return Counter(np.floor(np.dot(np.array(predictions[tuple_ndexes[0]:tuple_ndexes[1] + 1:], np.float32), 2)) ) | ||
|
|
||
|
|
||
| def _compute_negative_tuples_ndexes(self, positive_tuples_ndexes : [(int, int)], predictions:[]) -> [(int, int)] : | ||
| negative_tuples_ndexes = [] | ||
| # 1 - Set the 1st elmt of 'negative_tuples_ndexes'. It is likely before the 1st elmt of 'positive_tuples_ndexes' BUT not sure | ||
| if positive_tuples_ndexes[0][0] != 0 : | ||
| negative_tuples_ndexes.append( (0, positive_tuples_ndexes[0][0]-1) ) | ||
| # 2 - Initialize the remaining element of 'negative_tuples_ndexes' BUT beware how initalizing the last elmt | ||
| for i in range(0, len(positive_tuples_ndexes)) : | ||
| negative_tuples_ndexes.append( (positive_tuples_ndexes[i][1]+1, | ||
| positive_tuples_ndexes[i+1][0]-1 if i < len(positive_tuples_ndexes) - 1 | ||
| else len(predictions) - 1 ) ) | ||
| # 3 - Check that the last index did't exceed the prediction size | ||
| if negative_tuples_ndexes[-1][0] > len(predictions) - 1 : | ||
| negative_tuples_ndexes.pop() | ||
| return negative_tuples_ndexes | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as the previous comment: let's put validation in https://github.com/microfaune/microfaune |
||
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
|
|
||
| import json | ||
| from collections import Counter | ||
|
|
||
|
|
||
| def read_json_file(json_file_path:str) -> dict: | ||
| """ Read json file with labels. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| json_file_path : str | ||
| Path of json file. | ||
|
|
||
| Returns: | ||
| ------- | ||
| data_dict : list | ||
| List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation' | ||
| """ | ||
| with open(json_file_path) as json_data: | ||
| data_dict = json.load(json_data) | ||
| return data_dict | ||
|
|
||
| def convert_counter_collection_to_counter(counter_collection:[]) -> Counter : | ||
| counter_aggregation = Counter() | ||
| for elmt in counter_collection : | ||
| counter_aggregation += Counter(elmt) | ||
| return counter_aggregation | ||
|
|
||
| def ifNone(valueToCheck, defaultValue ) : | ||
| return defaultValue if valueToCheck is None else valueToCheck | ||
|
|
||
| def getAccuracy(metrics: Counter) -> float: | ||
| return (metrics.get('TP') + metrics.get('TN')) / (metrics.get('TP') + metrics.get('TN') + metrics.get('FP') + metrics.get('FN')) | ||
|
|
||
| def getPrecision(metrics: Counter) -> float: | ||
| return metrics.get('TP') / (metrics.get('TP') + metrics.get('FP')) | ||
|
|
||
| def getRecall(metrics: Counter) -> float: | ||
| return metrics.get('TP') / (metrics.get('TP') + metrics.get('FN')) | ||
|
|
||
| def getF1(metrics: Counter) -> float: | ||
| return 2 * getPrecision(metrics) * getRecall(metrics) / (getPrecision(metrics) + getRecall(metrics)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we want to keep the readme for users of the package so no need to explain how to generate a distribution? What do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Actually if we keep "how to generate the distribution", this would be helpful for the team members of this season and the next ones.
Just notice that in order to be able to perform the real upload, we should already have knowledge of the SECRETS that are private, so there no risk to keep things as they are unless you identified other issues with that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, nice!