Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
.DS_Store
.ipynb_checkpoints/
.idea/
*.iml

**/build
**/__pycache__
/.idea/
**/*.gitignore
/dist
**/*.egg-info
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
* Go to the folder *microfaune_package*
* Run the command `pipenv run pip install .`

### Generate a distribution and upload it to Pypi

* Go to the folder *microfaune_ai_package*
* Run the commands<br/>
Generate the distribution `python3 setup.py sdist bdist_wheel`<br/>
Upload the distribution `twine upload dist/*`

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to keep the readme for users of the package so no need to explain how to generate a distribution? What do you think?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually if we keep "how to generate the distribution", this would be helpful for the team members of this season and the next ones.
Just notice that in order to be able to perform the real upload, we should already have knowledge of the SECRETS that are private, so there no risk to keep things as they are unless you identified other issues with that.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, nice!

## Usage

Can be used as any package python:
Expand Down
Binary file not shown.
17 changes: 12 additions & 5 deletions microfaune/detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.math import reduce_max
from tensorflow import math

from .audio import load_wav, create_spec

from microfaune.audio import load_wav, create_spec

RNN_WEIGHTS_FILE = os.path.abspath(
os.path.join(os.path.dirname(__file__),
"data/model_weights-20190919_220113.h5"))
"data/model_weights_tf2-20200912_173814.h5"))


class RNNDetector:
Expand Down Expand Up @@ -65,15 +66,15 @@ def create_model(self):
x = layers.ReLU()(x)
x = layers.MaxPool2D((1, 2))(x)

x = reduce_max(x, axis=-2)
x = math.reduce_max(x, axis=-2)

x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x)
x = layers.Bidirectional(layers.GRU(64, return_sequences=True))(x)

x = layers.TimeDistributed(layers.Dense(64, activation="sigmoid"))(x)
local_pred = layers.TimeDistributed(
layers.Dense(1, activation="sigmoid"))(x)
pred = reduce_max(local_pred, axis=-2)
pred = math.reduce_max(local_pred, axis=-2)
return keras.Model(inputs=spec, outputs=[pred, local_pred])

def compute_features(self, audio_signals):
Expand Down Expand Up @@ -144,3 +145,9 @@ def predict(self, X):
def free_mem(self):
"""Release GPU memory."""
self._model = None

# if __name__ == '__main__' :
# detector = RNNDetector()
# global_score, local_score = detector.predict_on_wav(os.path.abspath(os.path.join(os.path.dirname(__file__), "media/SWIFT_20190723_050006.wav"))) # NB: Check that loaded wav file actually exists on your disk
# print(f"Golbal score: {global_score} - Localscore: {local_score}")

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's try to avoid commented code.

48 changes: 48 additions & 0 deletions microfaune/detection_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os
from collections import Counter
from pathlib import Path

from detection import RNNDetector
from domain.track import Track
from utils import misc_utils

class RNNDetectorValidator:

def __init__(self, detector:RNNDetector):
self.detector = detector

def computeMetricsAgainstAnnotatedDirectory(self, directory_path :str) -> Counter:
print(f'Computing metrics for files in directory: {directory_path}')
metrics_dir = Counter()
entries = Path(directory_path)
for entry in entries.iterdir():
metrics_dir += self.computeMetricsAgainstAnnotatedDirectory(os.path.join(directory_path, entry.name)) \
if entry.is_dir() else self.computeMetricsAgainstAnnotatedFile(f'{directory_path}/{entry.name}')
return metrics_dir

def computeMetricsAgainstAnnotatedFile(self, json_file_path :str) -> Counter:
media_file_annotation = self._load_json_annotation_file(json_file_path)
return self._compute_metrics_of_prediction_against_annotation(media_file_annotation)

def _load_json_annotation_file(self, json_file_path :str) -> dict :
return misc_utils.read_json_file(json_file_path)

def _compute_metrics_of_prediction_against_annotation(self, media_file_annotation:dict) -> Counter:
track = Track()
metrics_counter = []
metrics_counter += map(lambda track_elmt : track.compute_metrics_of_prediction_against_annotation(track_elmt) ,
media_file_annotation.get("tracks"))
return misc_utils.convert_counter_collection_to_counter(metrics_counter)



# if __name__ == '__main__' :
# detector = RNNDetector()
# validator = RNNDetectorValidator(detector)
# # metrics = validator.computeMetricsAgainstAnnotatedFile( os.path.abspath(os.path.join(os.path.dirname(__file__), "media-annotation/SWIFT_20000101_022052.json")) )
# metrics = validator.computeMetricsAgainstAnnotatedDirectory( os.path.abspath(os.path.join(os.path.dirname(__file__), "media-annotation")) )
# print(f'Accuracy : {misc_utils.getAccuracy(metrics)}')
# print(f'Precision : {misc_utils.getPrecision(metrics)}')
# print(f'Recall : {misc_utils.getRecall(metrics)}')
# print(f'F1 : {misc_utils.getF1(metrics)}')
# print(f'Total METRICS : {metrics}')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the validation could be outside of the package for now. We can work on this in https://github.com/microfaune/microfaune

Empty file added microfaune/domain/__init__.py
Empty file.
141 changes: 141 additions & 0 deletions microfaune/domain/track.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
from functools import reduce
from collections import Counter
from utils import misc_utils as util
import operator
import numpy as np

# PREDICTION_SAMPLE_WIDTH_IN_MS = 20

class Track:

# "tracks": [
# {
# "id": 47,
# "name": "SWIFT_20000101_022052.wav",
# "file": "/media/SWIFT_20000101_022052.wav",
# "format": "wav",
# "project_id": 1,
# "duration": 60.0,
# "prediction": "",
# "annotation_set": [
# {
# "id": 1,
# "track_id": 47,
# "value":"[
# {\"id\":\"wavesurfer_4kgitqcktig\",\"start\":4.935061842665718,\"end\":10.509955195406347,\"annotation\":\"\"},
# {\"id\":\"wavesurfer_afb13jpasm8\",\"start\":17.55982033205593,\"end\":22.95971703246838,\"annotation\":\"\"},
# {\"id\":\"wavesurfer_jdu8bguik4\",\"start\":26.334652470226157,\"end\":30.184578821446145,\"annotation\":\"\"}
# ]",
# "user_id": 1,
# "reviewed": false,
# "reviewed_by_id": null,
# "date_time": "2020-10-17T19:12:13.800Z",
# "username": "admin",
# "reviewer": null
# }
# ] --> Fin annotation_set
# }, --> Fin du TrackElmt
# ] --> Fin du Tracks


############################
# Compute positive indexes #
############################
def map_annotation_set_to_prediction_ndxes(self, track_elmt:dict) -> [(int,int)] :
'''
:param track_elmt: An element of 'tracks'
:return: An array of (start,end) excerpt corresponding to the indexes in Prediction structure of Tack[]
'''
prediction_ndxes_of_annotation_set_elmt = []
prediction_ndxes_of_annotation_set_elmt += map(lambda annotation_set_elmt :
self._map_annotation_set_elmt_to_prediction_ndxes(annotation_set_elmt.get("value"), track_elmt.get("duration"), len(track_elmt.get("prediction")) ) ,
track_elmt.get("annotation_set"))
prediction_ndxes_of_annotation_set_elmt = reduce(operator.concat, prediction_ndxes_of_annotation_set_elmt, [])
return prediction_ndxes_of_annotation_set_elmt

def _map_annotation_set_elmt_to_prediction_ndxes(self, value_list:[dict], track_duration:float, track_predictions_count:int ) -> [(int,int)]:
'''
:param value_list: track.annotation_set.value[]
:param track_duration: tracks.duration
:param track_predictions_count: tracks.prediction.length
:return: An array of (start,end) excerpt corresponding to the indexes in Prediction structure of a particular Tack
'''
prediction_ndxes_of_value_elmt = []
prediction_ndxes_of_value_elmt += map(lambda value : self._convert_from_annonation_elmt_time_to_prediction_order(value, track_duration, track_predictions_count),
value_list)
return prediction_ndxes_of_value_elmt


def _convert_from_annonation_elmt_time_to_prediction_order(self, value:dict, track_duration:float, track_predictions_count:int ) -> (int,int):
'''
Formula of a tuple (start,end) excerpt corresponding to the indexes in Prediction structure of a particular track.value
track_duration(60 sec) ---represente-par--> track.prediction.length (2814)
start -------------------> ?
:param value: track.value
:param track_duration: tracks.duration
:param track_predictions_count: tracks.prediction.length
:return: An tuple (start,end) excerpt corresponding to the indexes in Prediction structure of a particular track.value
'''
return ( int(value.get("start") * track_predictions_count // track_duration) ,
int(value.get("end") * track_predictions_count // track_duration) )


####################
# Compute Metrics #
####################
def compute_metrics_of_prediction_against_annotation(self, track_elmt:dict) -> Counter :
print(f'**Track** id:{track_elmt.get("id")} / name:{track_elmt.get("name")} / file:{track_elmt.get("file")} / duration:{track_elmt.get("duration")}')
positive_tuples_ndexes = self.map_annotation_set_to_prediction_ndxes(track_elmt)
return self.compute_track_elmt_metrics(positive_tuples_ndexes, track_elmt.get("prediction"))

def compute_track_elmt_metrics(self, positive_tuples_ndexes : [(int, int)], predictions:[]) -> Counter :
'''
:param positive_tuples_ndexes: list of positive annotated tuple indexes
:param predictions: list of predictions made by the model
:return: Counter representing the confusion matrix metrics TP / TN / FP / FN
'''
positive_tuples_ndexes.sort(key= lambda tuple: tuple[0])
# 1 - Compute TP and FN
counter_tpfn = self._do_compute_track_elmt_metrics(positive_tuples_ndexes , predictions, 'tp_and_fn')
# 2 - Compute 'negative_tuples_ndexes' used to compute TN and FFP
negative_tuples_ndexes = self._compute_negative_tuples_ndexes(positive_tuples_ndexes, predictions)
# 3 - Compute FP and TN
counter_fptn = self._do_compute_track_elmt_metrics(negative_tuples_ndexes , predictions, 'fp_and_tn')
# 4 - Return the Confusion Matrix element
metrics = Counter({'TP': util.ifNone(counter_tpfn.get(1.0), 0.0), 'FN': util.ifNone(counter_tpfn.get(0.0), 0.0),
'FP': util.ifNone(counter_fptn.get(1.0), 0.0), 'TN': util.ifNone(counter_fptn.get(0.0), 0.0) })
print(f'\t{metrics}')
return metrics

def _do_compute_track_elmt_metrics(self, positive_or_negative_tuples_ndexes : [(int, int)], predictions:[], n_p_desc : str) -> Counter :
p_and_n = list(map(lambda positive_or_negative_tuple_ndexes: self._compute_metrics_according_to_ground_truth(positive_or_negative_tuple_ndexes, predictions),
positive_or_negative_tuples_ndexes))
counter_positive_negative = util.convert_counter_collection_to_counter(p_and_n)
# print(f'{n_p_desc} : {p_and_n}\ncounter_{n_p_desc} : {counter_positive_negative}\n')
return counter_positive_negative


def _compute_metrics_according_to_ground_truth(self, tuple_ndexes:(int, int), predictions:[]) -> Counter : #() :
'''
:param tuple_ndexes: TRUE positive (or) TRUE negative tuple index, according to the ground of truth 'tha annotation'
:param predictions: predictions according to the RNN model prediction
:return: In case of tuple_ndexes represents TRUE positive => Counter dictionary { 0:x , 1:y} holding the number of FN (the 0) and TP (the 1)
In case of tuple_ndexes represents TRUE negative => Counter dictionary { 0:x , 1:y} holding the number of TN (the 0) and FP (the 1)
'''
return Counter(np.floor(np.dot(np.array(predictions[tuple_ndexes[0]:tuple_ndexes[1] + 1:], np.float32), 2)) )


def _compute_negative_tuples_ndexes(self, positive_tuples_ndexes : [(int, int)], predictions:[]) -> [(int, int)] :
negative_tuples_ndexes = []
# 1 - Set the 1st elmt of 'negative_tuples_ndexes'. It is likely before the 1st elmt of 'positive_tuples_ndexes' BUT not sure
if positive_tuples_ndexes[0][0] != 0 :
negative_tuples_ndexes.append( (0, positive_tuples_ndexes[0][0]-1) )
# 2 - Initialize the remaining element of 'negative_tuples_ndexes' BUT beware how initalizing the last elmt
for i in range(0, len(positive_tuples_ndexes)) :
negative_tuples_ndexes.append( (positive_tuples_ndexes[i][1]+1,
positive_tuples_ndexes[i+1][0]-1 if i < len(positive_tuples_ndexes) - 1
else len(predictions) - 1 ) )
# 3 - Check that the last index did't exceed the prediction size
if negative_tuples_ndexes[-1][0] > len(predictions) - 1 :
negative_tuples_ndexes.pop()
return negative_tuples_ndexes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the previous comment: let's put validation in https://github.com/microfaune/microfaune

41 changes: 21 additions & 20 deletions microfaune/labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,23 @@
from microfaune import audio, plot


def read_json_file(json_file_path):
""" Read json file with labels.

Parameters
----------
json_file_path : str
Path of json file.

Returns:
-------
data_dict : list
List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation'
"""
with open(json_file_path) as json_data:
data_dict = json.load(json_data)
return data_dict
# def read_json_file(json_file_path):
# """ Read json file with labels.
#
# Parameters
# ----------
# json_file_path : str
# Path of json file.
#
# Returns:
# -------
# data_dict : list
# List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation'
# """
# with open(json_file_path) as json_data:
# data_dict = json.load(json_data)
# return data_dict
from utils import misc_utils


def number_labels(json_file_path):
Expand All @@ -38,7 +39,7 @@ def number_labels(json_file_path):
nb_labels : int
Number of labels in json file
"""
data_dict = read_json_file(json_file_path)
data_dict = misc_utils.read_json_file(json_file_path)
nb_labels = len(data_dict)
return nb_labels

Expand All @@ -62,7 +63,7 @@ def prop_labeled(json_file_path, audio_file_path):
fs, data = audio.load_audio(audio_file_path)
total_duration = len(data) / fs

data_dict = read_json_file(json_file_path)
data_dict = misc_utils.read_json_file(json_file_path)

bird_song_duration = 0

Expand Down Expand Up @@ -90,7 +91,7 @@ def charac_function_audio(json_file_path, audio_file_path):
fs, data = audio.load_audio(audio_file_path)
charac_func = np.zeros((len(data), 1))

data_dict = read_json_file(json_file_path)
data_dict = misc_utils.read_json_file(json_file_path)

for label in data_dict:
indx_start = int(label['start'] * fs)
Expand Down Expand Up @@ -248,7 +249,7 @@ def extract_labels(json_path, start_time, duration):
labels: list
List of labelson the audio extract, each label is a dictionary with keys 'id', 'start', 'end' and 'annotation'
"""
data_dict = read_json_file(json_path)
data_dict = misc_utils.read_json_file(json_path)
labels = []

for label in data_dict:
Expand Down
54 changes: 54 additions & 0 deletions microfaune/media-annotation/SWIFT_20000101_022052.json

Large diffs are not rendered by default.

56 changes: 56 additions & 0 deletions microfaune/media-annotation/tmp1/SWIFT_20000101_122052.json

Large diffs are not rendered by default.

Binary file added microfaune/media/SWIFT_20190723_050006.wav
Binary file not shown.
Empty file added microfaune/utils/__init__.py
Empty file.
42 changes: 42 additions & 0 deletions microfaune/utils/misc_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@

import json
from collections import Counter


def read_json_file(json_file_path:str) -> dict:
""" Read json file with labels.

Parameters
----------
json_file_path : str
Path of json file.

Returns:
-------
data_dict : list
List of labels, each label is a dictionary item with entries 'id', 'start', 'end', 'annotation'
"""
with open(json_file_path) as json_data:
data_dict = json.load(json_data)
return data_dict

def convert_counter_collection_to_counter(counter_collection:[]) -> Counter :
counter_aggregation = Counter()
for elmt in counter_collection :
counter_aggregation += Counter(elmt)
return counter_aggregation

def ifNone(valueToCheck, defaultValue ) :
return defaultValue if valueToCheck is None else valueToCheck

def getAccuracy(metrics: Counter) -> float:
return (metrics.get('TP') + metrics.get('TN')) / (metrics.get('TP') + metrics.get('TN') + metrics.get('FP') + metrics.get('FN'))

def getPrecision(metrics: Counter) -> float:
return metrics.get('TP') / (metrics.get('TP') + metrics.get('FP'))

def getRecall(metrics: Counter) -> float:
return metrics.get('TP') / (metrics.get('TP') + metrics.get('FN'))

def getF1(metrics: Counter) -> float:
return 2 * getPrecision(metrics) * getRecall(metrics) / (getPrecision(metrics) + getRecall(metrics))
Loading