From d448db6dc3f84c2af78cc4fde6c0ab55fb243a4e Mon Sep 17 00:00:00 2001 From: John Ryan Date: Mon, 14 Aug 2023 13:56:36 +0100 Subject: [PATCH 1/3] Configure language from python --- whispercpp.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/whispercpp.pyx b/whispercpp.pyx index 92af4c1..1ce83b8 100644 --- a/whispercpp.pyx +++ b/whispercpp.pyx @@ -16,7 +16,6 @@ cimport numpy as cnp cdef int SAMPLE_RATE = 16000 cdef char* TEST_FILE = 'test.wav' cdef char* DEFAULT_MODEL = 'tiny' -cdef char* LANGUAGE = b'en' cdef int N_THREADS = os.cpu_count() MODELS = { @@ -68,14 +67,14 @@ cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr return frames -cdef whisper_full_params default_params() nogil: +cdef whisper_full_params default_params(char* language) nogil: cdef whisper_full_params params = whisper_full_default_params( whisper_sampling_strategy.WHISPER_SAMPLING_GREEDY ) params.print_realtime = True params.print_progress = True params.translate = False - params.language = LANGUAGE + params.language = language n_threads = N_THREADS return params @@ -96,14 +95,13 @@ cdef class Whisper: else: self.ctx = whisper_init_from_file(model_b) - self.params = default_params() whisper_print_system_info() def __dealloc__(self): whisper_free(self.ctx) - def transcribe(self, filename=TEST_FILE): + def transcribe(self, filename=TEST_FILE, lang="auto"): print("Loading data..") if (type(filename) == np.ndarray) : temp = filename @@ -115,9 +113,13 @@ cdef class Whisper: cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = temp + cdef char* c_string = lang # Convert bytes to char* + + params = default_params(c_string) print("Transcribing..") - return whisper_full(self.ctx, self.params, &frames[0], len(frames)) + + return whisper_full(self.ctx, params, &frames[0], len(frames)) def extract_text(self, int res): print("Extracting text...") From 433842383393a84f16a33f0c012f18a5a1786f5e Mon Sep 17 00:00:00 2001 From: John Ryan Date: Mon, 14 Aug 2023 14:01:15 +0100 Subject: [PATCH 2/3] Byte convert inside --- whispercpp.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/whispercpp.pyx b/whispercpp.pyx index 1ce83b8..ac020c0 100644 --- a/whispercpp.pyx +++ b/whispercpp.pyx @@ -113,7 +113,8 @@ cdef class Whisper: cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] frames = temp - cdef char* c_string = lang # Convert bytes to char* + language_bytes = str(lang).encode("utf-8") + cdef char* c_string = language_bytes # Convert bytes to char* params = default_params(c_string) From 439d49d16d010d33fd841db7b364985deee6889d Mon Sep 17 00:00:00 2001 From: John Ryan Date: Mon, 14 Aug 2023 14:21:52 +0100 Subject: [PATCH 3/3] Updated readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 694d193..09d8ed6 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ from whispercpp import Whisper w = Whisper('tiny') -result = w.transcribe("myfile.mp3") +result = w.transcribe("myfile.mp3", lang="en") text = w.extract_text(result) ```