From 2060b6f21c340c3ab2e6a56f5ae3515de2cdcf45 Mon Sep 17 00:00:00 2001 From: mrq Date: Tue, 22 Aug 2023 14:22:39 +0000 Subject: [PATCH] fixed issue with sliced audio being the wrong sample rate --- README.md | 4 ++-- src/utils.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index b34fd68..f04b5c4 100755 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # AI Voice Cloning -This [repo](https://git.ecker.tech/mrq/ai-voice-cloning)/[rentry](https://rentry.org/AI-Voice-Cloning/) aims to serve as both a foolproof guide for setting up AI voice cloning tools for legitimate, local use on Windows/Linux, as well as a stepping stone for anons that genuinely want to play around with [TorToiSe](https://github.com/neonbjb/tortoise-tts). +> **Note** This project has been in dire need of being rewritten from the ground up for some time. Apologies for any crust from my rather spaghetti code. -Similar to my own findings for Stable Diffusion image generation, this rentry may appear a little disheveled as I note my new findings with TorToiSe. Please keep this in mind if the guide seems to shift a bit or sound confusing. +This [repo](https://git.ecker.tech/mrq/ai-voice-cloning)/[rentry](https://rentry.org/AI-Voice-Cloning/) aims to serve as both a foolproof guide for setting up AI voice cloning tools for legitimate, local use on Windows/Linux, as well as a stepping stone for anons that genuinely want to play around with [TorToiSe](https://github.com/neonbjb/tortoise-tts). >\>Ugh... why bother when I can just abuse 11.AI? diff --git a/src/utils.py b/src/utils.py index cf93489..1eaefb7 100755 --- a/src/utils.py +++ b/src/utils.py @@ -2359,7 +2359,8 @@ def slice_dataset( voice, trim_silence=True, start_offset=0, end_offset=0, resul print(message) messages.append(message) continue - # sliced, _ = resample( sliced, sample_rate, TARGET_SAMPLE_RATE ) + + sliced, _ = resample( sliced, sample_rate, TARGET_SAMPLE_RATE ) if waveform.shape[0] == 2: waveform = waveform[:1] @@ -3067,7 +3068,7 @@ def get_voice( name, dir=get_voice_dir(), load_latents=True, extensions=["wav", if ext not in extensions: continue - voice.append(f'{subj}/{file}') + voice.append(f'{subj}/{file}') return sorted( voice )