diff --git a/README.md b/README.md index 1b6c65e..930ef91 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,28 @@ # Subsync + **Synchronize your subtitles using machine learning** Subsync analyses and processes the sound from your media files and uses machine learning to detect speech. Speech detection is used to shift existing subtitles for a perfect match in audio and text! ## Features - - [x] Machine learning model for voice activity detection (*not recognition*) - - [x] Shift subtitle as a whole for best match - - [x] Sync every sentence in the subtitle individually - - [ ] Sync using existing matched subtitle in a different laguage + +- [x] Machine learning model for voice activity detection (*not recognition*) +- [x] Shift subtitle as a whole for best match +- [x] Sync every sentence in the subtitle individually +- [ ] Sync using existing matched subtitle in a different laguage ## Dependencies -* ffmpeg (https://www.ffmpeg.org/download.html) + +* ffmpeg () ## Installation + ```bash pip install subsync ``` ## Help + ``` usage: subsync [-h] [--version] [--graph] [-d SECONDS] [-m SECONDS] [-s] [--logfile PATH] @@ -46,4 +51,5 @@ optional arguments: ``` ## Special thanks -[[1] Automatic Subtitle Synchronization through Machine Learning](https://machinelearnings.co/automatic-subtitle-synchronization-e188a9275617) + +[[1] Automatic Subtitle Synchronization through Machine Learning](https://machinelearnings.co/automatic-subtitle-synchronization-e188a9275617) diff --git a/requirements.txt b/requirements.txt index 444bfae..3544402 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,18 +3,18 @@ astor==0.6.2 audioread==2.1.5 bleach==1.5.0 cffi==1.11.5 -chardet==3.0.4 +chardet>=3.0.4 cycler==0.10.0 decorator==4.2.1 gast==0.2.0 graphviz==0.8.2 grpcio==1.10.0 -h5py==2.8.0rc1 +h5py>2.8.0 html5lib==0.9999999 joblib==0.11 Keras==2.1.5 kiwisolver==1.0.1 -librosa==0.6.0 +librosa>=0.6.0 llvmlite==0.22.0 Markdown==2.6.11 matplotlib==2.2.2 @@ -29,12 +29,12 @@ python-dateutil==2.7.0 pytz==2018.3 PyYAML==3.12 resampy==0.2.0 -scikit-learn==0.19.1 +scikit-learn>=0.19.1 scipy==1.0.0 six==1.11.0 sklearn==0.0 tensorboard==1.6.0 -tensorflow==1.5.0 -tensorflow-tensorboard==1.5.1 +tensorflow>1.5.0 +tensorflow-tensorboard>=1.5.1 termcolor==1.1.0 Werkzeug==0.14.1 diff --git a/subsync/media.py b/subsync/media.py index 6019fe7..8cc12a5 100644 --- a/subsync/media.py +++ b/subsync/media.py @@ -13,6 +13,8 @@ import numpy as np import sklearn +from sklearn import metrics + from .ffmpeg import Transcode from .log import logger @@ -150,7 +152,7 @@ def logloss(self, pred, actual, margin=12): for i, offset in enumerate(range(-blocks, blocks)): snippet = np.roll(actual, offset) try: - logloss[i] = sklearn.metrics.log_loss(snippet[blocks:-blocks], pred[blocks:-blocks]) + logloss[i] = metrics.log_loss(snippet[blocks:-blocks], pred[blocks:-blocks]) except (ValueError, RuntimeWarning): pass indices[i] = offset diff --git a/subsync/net.py b/subsync/net.py index 76b06e3..bbc870d 100644 --- a/subsync/net.py +++ b/subsync/net.py @@ -23,12 +23,13 @@ def summary(self): def load_graph(self, frozen_graph_filename): - with tf.gfile.GFile(frozen_graph_filename, "rb") as f: - graph_def = tf.GraphDef() + # Load the graph with TensorFlow 1.x compatibility mode + with tf.compat.v1.gfile.GFile(frozen_graph_filename, "rb") as f: + graph_def = tf.compat.v1.GraphDef() graph_def.ParseFromString(f.read()) with tf.Graph().as_default() as graph: - tf.import_graph_def( + tf.compat.v1.import_graph_def( graph_def, input_map=None, return_elements=None, @@ -40,5 +41,6 @@ def load_graph(self, frozen_graph_filename): def predict(self, mfcc): print("Predicting values...") - with tf.Session(graph=self.graph) as sess: + with tf.compat.v1.Session(graph=self.graph) as sess: return sess.run(self.output, feed_dict={self.input: mfcc}) +