maybe fixed errors with DAC backend, added option to limit by duration in emb.process (because I only really need short utternaces right now and I'm not ready to spend a week on processing everything again)

This commit is contained in:
mrq 2025-02-06 12:37:18 -06:00
parent 299cc88821
commit 712ce4af5d
2 changed files with 8 additions and 0 deletions

View File

@ -4,6 +4,8 @@ from dac import DACFile
from audiotools import AudioSignal
from dac.utils import load_model as __load_dac_model
from typing import Union
from pathlib import Path
"""
Patch decode to skip things related to the metadata (namely the waveform trimming)
So far it seems the raw waveform can just be returned without any post-processing

View File

@ -174,6 +174,7 @@ def process(
stride_offset=0,
slice="auto",
batch_size=1,
max_duration=None,
low_memory=False,
@ -326,6 +327,9 @@ def process(
start = int((segment['start']-0.05) * sample_rate)
end = int((segment['end']+0.5) * sample_rate)
if max_duration and (end - start) / sample_rate > max_duration:
continue
if not presliced:
if start < 0:
start = 0
@ -364,6 +368,7 @@ def main():
parser.add_argument("--stride-offset", type=int, default=0)
parser.add_argument("--slice", type=str, default="auto")
parser.add_argument("--batch-size", type=int, default=0)
parser.add_argument("--max-duration", type=int, default=0)
parser.add_argument("--device", type=str, default="cuda")
parser.add_argument("--dtype", type=str, default="bfloat16")
@ -394,6 +399,7 @@ def main():
stride_offset=args.stride_offset,
slice=args.slice,
batch_size=args.batch_size,
max_duration=args.max_duration,
low_memory=args.low_memory,