From 1316331be3609339bc61d37854afda64180ecec4 Mon Sep 17 00:00:00 2001 From: mrq Date: Sun, 5 Mar 2023 05:22:35 +0000 Subject: [PATCH] forgot to try and have it try and auto-detect for openai/whisper when no language is specified --- src/utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/utils.py b/src/utils.py index 89dd1d1..d23f046 100755 --- a/src/utils.py +++ b/src/utils.py @@ -941,6 +941,9 @@ def whisper_transcribe( file, language=None ): load_whisper_model(language=language) if not args.whisper_cpp: + if not language: + language = None + return whisper_model.transcribe(file, language=language) res = whisper_model.transcribe(file) @@ -998,12 +1001,13 @@ def prepare_dataset( files, outdir, language=None, progress=None ): with open(f'{outdir}/whisper.json', 'w', encoding="utf-8") as f: f.write(json.dumps(results, indent='\t')) + joined = '\n'.join(transcription) with open(f'{outdir}/train.txt', 'w', encoding="utf-8") as f: - f.write("\n".join(transcription)) + f.write(joined) unload_whisper() - return f"Processed dataset to: {outdir}" + return f"Processed dataset to: {outdir}\n{joined}" def calc_iterations( epochs, lines, batch_size ): iterations = int(epochs * lines / float(batch_size))