forgot to try and have it try and auto-detect for openai/whisper when no language is specified

2023-03-05 05:22:35 +00:00 · 2023-03-05 05:22:35 +00:00 · 1316331be3
commit 1316331be3
parent 3e220ed306
1 changed files with 6 additions and 2 deletions
--- a/src/utils.py
+++ b/src/utils.py
@ -941,6 +941,9 @@ def whisper_transcribe( file, language=None ):
 		load_whisper_model(language=language)

 	if not args.whisper_cpp:
+		if not language:
+			language = None
+			
 		return whisper_model.transcribe(file, language=language)

 	res = whisper_model.transcribe(file)
@ -998,12 +1001,13 @@ def prepare_dataset( files, outdir, language=None, progress=None ):
 	with open(f'{outdir}/whisper.json', 'w', encoding="utf-8") as f:
 		f.write(json.dumps(results, indent='\t'))
 	
+	joined = '\n'.join(transcription)
 	with open(f'{outdir}/train.txt', 'w', encoding="utf-8") as f:
-		f.write("\n".join(transcription))
+		f.write(joined)

 	unload_whisper()

-	return f"Processed dataset to: {outdir}"
+	return f"Processed dataset to: {outdir}\n{joined}"

 def calc_iterations( epochs, lines, batch_size ):
 	iterations = int(epochs * lines / float(batch_size))