seems I actually don't need RVQ bins 9-32 with the 24Khz DAC model........ (time to requantize my audio...)
This commit is contained in:
parent
33b7f81b94
commit
9f738fbd5b
|
@ -145,8 +145,6 @@ def _load_vocos_model(device="cuda", levels=cfg.model.max_levels):
|
|||
def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
||||
kwargs = dict(model_type="44khz",model_bitrate="8kbps",tag="latest")
|
||||
|
||||
# just use the 44K model, the 24K model has 32 codebooks...
|
||||
"""
|
||||
# yes there's a better way, something like f'{cfg.sample.rate//1000}hz'
|
||||
if cfg.sample_rate == 44_000:
|
||||
kwargs["model_type"] = "44kz"
|
||||
|
@ -156,7 +154,6 @@ def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
|||
kwargs["model_type"] = "16khz"
|
||||
else:
|
||||
raise Exception(f'unsupported sample rate: {cfg.sample_rate}')
|
||||
"""
|
||||
|
||||
model = __load_dac_model(**kwargs)
|
||||
model = model.to(device)
|
||||
|
@ -168,7 +165,9 @@ def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
|||
# updating it here will affect the sample rate the waveform is resampled to on encoding
|
||||
if cfg.variable_sample_rate:
|
||||
model.sample_rate = cfg.sample_rate
|
||||
|
||||
model.backend = "dac"
|
||||
model.model_type = kwargs["model_type"]
|
||||
|
||||
return model
|
||||
|
||||
|
@ -274,6 +273,11 @@ def encode(wav: Tensor, sr: int = cfg.sample_rate, device="cuda", levels=cfg.mod
|
|||
model = _load_dac_model(device, levels=levels)
|
||||
signal = AudioSignal(wav, sample_rate=sr)
|
||||
artifact = model.compress(signal, 5.0, verbose=False, n_quantizers=levels if isinstance(levels, int) else None)
|
||||
|
||||
# trim to 8 codebooks if 24Khz
|
||||
if model.model_type == "24khz":
|
||||
artifact.codes = artifact.codes[:, :8, :]
|
||||
|
||||
return artifact.codes if not return_metadata else artifact
|
||||
|
||||
# vocos does not encode wavs to encodecs, so just use normal encodec
|
||||
|
|
Loading…
Reference in New Issue
Block a user