This commit is contained in:
James Betker 2022-03-15 10:36:34 -06:00
parent bb03cbb9fc
commit 0419a64107
4 changed files with 8 additions and 9 deletions

View File

@ -104,9 +104,9 @@ def process_folder(folder, output_path, base_path, progress_file, max_files):
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-path', type=str, help='Path to search for split files (should be the direct output of phase 1)',
default='Y:\\split\\big_podcast')
default='Y:\\split\\youtube')
parser.add_argument('-progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\filtered\\big_podcast\\already_processed.txt')
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\filtered\\big_podcast')
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\filtered\\youtube')
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=6)
parser.add_argument('-max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=1000)
parser.add_argument('-classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.',

View File

@ -10,7 +10,7 @@ class Vocoder:
self.model = WaveGlow(n_mel_channels=80, n_flows=12, n_group=8, n_early_size=2, n_early_every=4, WN_config={'n_layers': 8, 'n_channels': 256, 'kernel_size': 3})
sd = torch.load('../experiments/waveglow_256channels_universal_v5.pth')
self.model.load_state_dict(sd)
self.model = self.model.to('cuda')
self.model = self.model.cpu()
self.model.eval()
def transform_mel_to_audio(self, mel):
@ -22,8 +22,6 @@ class Vocoder:
if __name__ == '__main__':
vocoder = Vocoder()
m = torch.load('test_mels.pth')
for i, b in enumerate(m):
plot_spectrogram(b.cpu())
wav = vocoder.transform_mel_to_audio(b)
wavfile.write(f'{i}.wav', 22050, wav[0].cpu().numpy())
m = torch.load('C:\\Users\\jbetk\\Documents\\tmp\\some_audio\\00008.mel').cpu()
wav = vocoder.transform_mel_to_audio(m)
wavfile.write(f'0.wav', 22050, wav[0].cpu().numpy())

View File

@ -42,7 +42,7 @@ if __name__ == '__main__':
'less_heads': {'networks': {'generator': {'kwargs': {'num_heads': 2}}}},
'eff_off': {'networks': {'generator': {'kwargs': {'efficient_convs': False}}}},
'more_time': {'networks': {'generator': {'kwargs': {'time_embed_dim_multiplier': 8}}}},
'deeper_res': {'networks': {'generator': {'kwargs': {'num_res_blocks': [3, 3, 3, 3, 3, 4, 4]}}}},
'scale_shift_off': {'networks': {'generator': {'kwargs': {'use_scale_shift_norm': False}}}},
'shallow_res': {'networks': {'generator': {'kwargs': {'num_res_blocks': [1, 1, 1, 1, 1, 2, 2]}}}},
}
opt = option.parse(base_opt, is_train=True)

View File

@ -592,6 +592,7 @@ def load_audio(audiopath, sampling_rate, raw_data=None):
def load_wav_to_torch(full_path):
import scipy.io.wavfile
sampling_rate, data = scipy.io.wavfile.read(full_path)
if data.dtype == np.int32:
norm_fix = 2 ** 31