From df5ba634c0ed68f7be2b957f9696705d7a91cd76 Mon Sep 17 00:00:00 2001 From: mrq Date: Tue, 7 Mar 2023 05:43:26 +0000 Subject: [PATCH] brain dead --- src/utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/utils.py b/src/utils.py index fecdb0a..982252c 100755 --- a/src/utils.py +++ b/src/utils.py @@ -1201,6 +1201,10 @@ def prepare_dataset( files, outdir, language=None, skip_existings=False, progres unload_whisper() joined = "\n".join(transcription) + if not skip_existings: + with open(f'{outdir}/train.txt', 'w', encoding="utf-8") as f: + f.write(joined) + return f"Processed dataset to: {outdir}\n{joined}" def calc_iterations( epochs, lines, batch_size ):