correcting my wrong of assuming I could just use raw 24Khz audio in the 44Khz DAC without too much of an issue (there are issues)
This commit is contained in:
parent
9f738fbd5b
commit
215800484d
BIN
data/qnt.dac
BIN
data/qnt.dac
Binary file not shown.
|
@ -157,13 +157,24 @@ class Dataset:
|
|||
|
||||
tasks_list: list[str] = field(default_factory=lambda: ["tts"])
|
||||
|
||||
_frames_per_second: int = 0 # in encodec, each frame is 75 codes, in dac, each frame is 41
|
||||
_frames_per_second: int = 0 # allows setting your own hint
|
||||
|
||||
@cached_property
|
||||
def frames_per_second(self):
|
||||
if self._frames_per_second > 0:
|
||||
return self._frames_per_second
|
||||
return 41 if cfg.inference.audio_backend == "dac" else 75
|
||||
|
||||
if cfg.inference.audio_backend == "dac":
|
||||
# using the 44KHz model with 24KHz sources has a frame rate of 41Hz
|
||||
if cfg.variable_sample_rate and cfg.sample_rate == 24_000:
|
||||
return 41
|
||||
if cfg.sample_rate == 44_000:
|
||||
return 86
|
||||
if cfg.sample_rate == 16_000:
|
||||
return 50
|
||||
|
||||
# 24Khz Encodec / Vocos and incidentally DAC are all at 75Hz
|
||||
return 75
|
||||
|
||||
@property
|
||||
def min_phones(self):
|
||||
|
@ -562,7 +573,7 @@ class Config(_Config):
|
|||
tokenizer: str = "./tokenizer.json"
|
||||
|
||||
sample_rate: int = 24_000
|
||||
variable_sample_rate: bool = True # for DAC, this will override the model automatically resampling to 44KHz.
|
||||
variable_sample_rate: bool = False # NOT recommended, as running directly 24Khz audio in the 44Khz DAC model will have detrimental quality loss
|
||||
|
||||
@property
|
||||
def distributed(self):
|
||||
|
|
|
@ -143,17 +143,19 @@ def _load_vocos_model(device="cuda", levels=cfg.model.max_levels):
|
|||
|
||||
@cache
|
||||
def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
||||
kwargs = dict(model_type="44khz",model_bitrate="8kbps",tag="latest")
|
||||
|
||||
# yes there's a better way, something like f'{cfg.sample.rate//1000}hz'
|
||||
if cfg.sample_rate == 44_000:
|
||||
kwargs["model_type"] = "44kz"
|
||||
elif cfg.sample_rate == 24_000:
|
||||
kwargs["model_type"] = "24khz"
|
||||
elif cfg.sample_rate == 16_000:
|
||||
kwargs["model_type"] = "16khz"
|
||||
else:
|
||||
raise Exception(f'unsupported sample rate: {cfg.sample_rate}')
|
||||
kwargs = dict(model_type="24khz",model_bitrate="8kbps",tag="latest")
|
||||
"""
|
||||
if not cfg.variable_sample_rate:
|
||||
# yes there's a better way, something like f'{cfg.sample.rate//1000}hz'
|
||||
if cfg.sample_rate == 44_000:
|
||||
kwargs["model_type"] = "44kz"
|
||||
elif cfg.sample_rate == 24_000:
|
||||
kwargs["model_type"] = "24khz"
|
||||
elif cfg.sample_rate == 16_000:
|
||||
kwargs["model_type"] = "16khz"
|
||||
else:
|
||||
raise Exception(f'unsupported sample rate: {cfg.sample_rate}')
|
||||
"""
|
||||
|
||||
model = __load_dac_model(**kwargs)
|
||||
model = model.to(device)
|
||||
|
|
Loading…
Reference in New Issue
Block a user