diff --git a/tortoise/api.py b/tortoise/api.py index 2f987ad..65c7d6e 100644 --- a/tortoise/api.py +++ b/tortoise/api.py @@ -165,7 +165,7 @@ class TextToSpeech: Main entry point into Tortoise. """ - def __init__(self, autoregressive_batch_size=16, models_dir='.models', enable_redaction=False): + def __init__(self, autoregressive_batch_size=16, models_dir='.models', enable_redaction=True): """ Constructor :param autoregressive_batch_size: Specifies how many samples to generate per batch. Lower this if you are seeing diff --git a/tortoise/utils/wav2vec_alignment.py b/tortoise/utils/wav2vec_alignment.py index 4a05659..bfcb7e1 100644 --- a/tortoise/utils/wav2vec_alignment.py +++ b/tortoise/utils/wav2vec_alignment.py @@ -50,7 +50,7 @@ class Wav2VecAlignment: def __init__(self): self.model = Wav2Vec2ForCTC.from_pretrained("jbetker/wav2vec2-large-robust-ft-libritts-voxpopuli").cpu() self.feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(f"facebook/wav2vec2-large-960h") - self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained('jbetker/tacotron_symbols') + self.tokenizer = Wav2Vec2CTCTokenizer.from_pretrained('jbetker/tacotron-symbols') def align(self, audio, expected_text, audio_sample_rate=24000): orig_len = audio.shape[-1]