Im not too sure if manually invoking gc actually closes all the open files from whisperx (or ROCm), but it seems to have gone away longside setting 'ulimit -Sn' to half the output of 'ulimit -Hn'

This commit is contained in:
mrq 2023-03-06 16:39:37 +00:00
parent 999878d9c6
commit 12c51b6057

View File

@ -1054,6 +1054,7 @@ def prepare_dataset( files, outdir, language=None, skip_existings=False, progres
results = {}
transcription = []
files = sorted(files)
previous_list = []
if skip_existings and os.path.exists(f'{outdir}/train.txt'):
@ -1103,6 +1104,8 @@ def prepare_dataset( files, outdir, language=None, skip_existings=False, progres
transcription.append(line)
with open(f'{outdir}/train.txt', 'a', encoding="utf-8") as f:
f.write(f'{line}\n')
do_gc()
with open(f'{outdir}/whisper.json', 'w', encoding="utf-8") as f:
f.write(json.dumps(results, indent='\t'))