correcting my wrong of assuming I could just use raw 24Khz audio in the 44Khz DAC without too much of an issue (there are issues)
This commit is contained in:
parent
9f738fbd5b
commit
215800484d
BIN
data/qnt.dac
BIN
data/qnt.dac
Binary file not shown.
|
@ -157,13 +157,24 @@ class Dataset:
|
||||||
|
|
||||||
tasks_list: list[str] = field(default_factory=lambda: ["tts"])
|
tasks_list: list[str] = field(default_factory=lambda: ["tts"])
|
||||||
|
|
||||||
_frames_per_second: int = 0 # in encodec, each frame is 75 codes, in dac, each frame is 41
|
_frames_per_second: int = 0 # allows setting your own hint
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def frames_per_second(self):
|
def frames_per_second(self):
|
||||||
if self._frames_per_second > 0:
|
if self._frames_per_second > 0:
|
||||||
return self._frames_per_second
|
return self._frames_per_second
|
||||||
return 41 if cfg.inference.audio_backend == "dac" else 75
|
|
||||||
|
if cfg.inference.audio_backend == "dac":
|
||||||
|
# using the 44KHz model with 24KHz sources has a frame rate of 41Hz
|
||||||
|
if cfg.variable_sample_rate and cfg.sample_rate == 24_000:
|
||||||
|
return 41
|
||||||
|
if cfg.sample_rate == 44_000:
|
||||||
|
return 86
|
||||||
|
if cfg.sample_rate == 16_000:
|
||||||
|
return 50
|
||||||
|
|
||||||
|
# 24Khz Encodec / Vocos and incidentally DAC are all at 75Hz
|
||||||
|
return 75
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def min_phones(self):
|
def min_phones(self):
|
||||||
|
@ -562,7 +573,7 @@ class Config(_Config):
|
||||||
tokenizer: str = "./tokenizer.json"
|
tokenizer: str = "./tokenizer.json"
|
||||||
|
|
||||||
sample_rate: int = 24_000
|
sample_rate: int = 24_000
|
||||||
variable_sample_rate: bool = True # for DAC, this will override the model automatically resampling to 44KHz.
|
variable_sample_rate: bool = False # NOT recommended, as running directly 24Khz audio in the 44Khz DAC model will have detrimental quality loss
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def distributed(self):
|
def distributed(self):
|
||||||
|
|
|
@ -143,8 +143,9 @@ def _load_vocos_model(device="cuda", levels=cfg.model.max_levels):
|
||||||
|
|
||||||
@cache
|
@cache
|
||||||
def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
||||||
kwargs = dict(model_type="44khz",model_bitrate="8kbps",tag="latest")
|
kwargs = dict(model_type="24khz",model_bitrate="8kbps",tag="latest")
|
||||||
|
"""
|
||||||
|
if not cfg.variable_sample_rate:
|
||||||
# yes there's a better way, something like f'{cfg.sample.rate//1000}hz'
|
# yes there's a better way, something like f'{cfg.sample.rate//1000}hz'
|
||||||
if cfg.sample_rate == 44_000:
|
if cfg.sample_rate == 44_000:
|
||||||
kwargs["model_type"] = "44kz"
|
kwargs["model_type"] = "44kz"
|
||||||
|
@ -154,6 +155,7 @@ def _load_dac_model(device="cuda", levels=cfg.model.max_levels):
|
||||||
kwargs["model_type"] = "16khz"
|
kwargs["model_type"] = "16khz"
|
||||||
else:
|
else:
|
||||||
raise Exception(f'unsupported sample rate: {cfg.sample_rate}')
|
raise Exception(f'unsupported sample rate: {cfg.sample_rate}')
|
||||||
|
"""
|
||||||
|
|
||||||
model = __load_dac_model(**kwargs)
|
model = __load_dac_model(**kwargs)
|
||||||
model = model.to(device)
|
model = model.to(device)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user