This commit is contained in:
mrq 2025-02-20 13:39:22 -06:00
parent 92139b6da9
commit 4a4a46c14f

View File

@ -84,18 +84,10 @@ def process_batched_jobs( jobs, speaker_id="", device=None, raise_exceptions=Tru
# sort to avoid egregious padding # sort to avoid egregious padding
jobs = sorted(jobs, key=lambda x: x[1].shape[-1], reverse=True) jobs = sorted(jobs, key=lambda x: x[1].shape[-1], reverse=True)
buffer = []
batches = [] batches = []
while jobs:
for job in jobs: batches.append(jobs[:batch_size])
buffer.append(job) jobs = jobs[batch_size:]
if len(buffer) >= batch_size:
batches.append(buffer)
buffer = []
if buffer:
batches.append(buffer)
buffer = []
for batch in tqdm(batches, desc=f'Quantizing {speaker_id} (batch size: {batch_size})'): for batch in tqdm(batches, desc=f'Quantizing {speaker_id} (batch size: {batch_size})'):
wavs = [] wavs = []
@ -283,11 +275,13 @@ def process(
if f'{group_name}/{speaker_id}' not in dataset: if f'{group_name}/{speaker_id}' not in dataset:
dataset.append(f'{group_name}/{speaker_id}') dataset.append(f'{group_name}/{speaker_id}')
jobs = [] jobs = []
use_slices = slice == True or (slice == "auto" and len(metadata.keys()) == 1) or group_name in always_slice_groups use_slices = slice == True or (slice == "auto" and len(metadata.keys()) == 1) or group_name in always_slice_groups
for filename in sorted(metadata.keys()): for filename in sorted(metadata.keys()):
if min_utterances and len(metadata[filename].keys()) < min_utterances:
continue
inpath = Path(f'./{input_audio}/{group_name}/{speaker_id}/{filename}') inpath = Path(f'./{input_audio}/{group_name}/{speaker_id}/{filename}')
""" """
@ -335,9 +329,6 @@ def process(
else: else:
i = 0 i = 0
presliced = not inpath.exists() presliced = not inpath.exists()
if min_utterances and len(metadata[filename]["segments"]) < min_utterances:
continue
for segment in metadata[filename]["segments"]: for segment in metadata[filename]["segments"]:
id = pad(i, 4) id = pad(i, 4)