forked from mrq/DL-Art-School
misc
This commit is contained in:
parent
bb03cbb9fc
commit
0419a64107
|
@ -104,9 +104,9 @@ def process_folder(folder, output_path, base_path, progress_file, max_files):
|
|||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-path', type=str, help='Path to search for split files (should be the direct output of phase 1)',
|
||||
default='Y:\\split\\big_podcast')
|
||||
default='Y:\\split\\youtube')
|
||||
parser.add_argument('-progress_file', type=str, help='Place to store all folders that have already been processed', default='Y:\\filtered\\big_podcast\\already_processed.txt')
|
||||
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\filtered\\big_podcast')
|
||||
parser.add_argument('-output_path', type=str, help='Path where sampled&filtered files are sent', default='Y:\\filtered\\youtube')
|
||||
parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=6)
|
||||
parser.add_argument('-max_samples_per_folder', type=int, help='Maximum number of clips that can be extracted from each folder.', default=1000)
|
||||
parser.add_argument('-classifier_model_opt', type=str, help='Train/test options file that configures the model used to classify the audio clips.',
|
||||
|
|
|
@ -10,7 +10,7 @@ class Vocoder:
|
|||
self.model = WaveGlow(n_mel_channels=80, n_flows=12, n_group=8, n_early_size=2, n_early_every=4, WN_config={'n_layers': 8, 'n_channels': 256, 'kernel_size': 3})
|
||||
sd = torch.load('../experiments/waveglow_256channels_universal_v5.pth')
|
||||
self.model.load_state_dict(sd)
|
||||
self.model = self.model.to('cuda')
|
||||
self.model = self.model.cpu()
|
||||
self.model.eval()
|
||||
|
||||
def transform_mel_to_audio(self, mel):
|
||||
|
@ -22,8 +22,6 @@ class Vocoder:
|
|||
|
||||
if __name__ == '__main__':
|
||||
vocoder = Vocoder()
|
||||
m = torch.load('test_mels.pth')
|
||||
for i, b in enumerate(m):
|
||||
plot_spectrogram(b.cpu())
|
||||
wav = vocoder.transform_mel_to_audio(b)
|
||||
wavfile.write(f'{i}.wav', 22050, wav[0].cpu().numpy())
|
||||
m = torch.load('C:\\Users\\jbetk\\Documents\\tmp\\some_audio\\00008.mel').cpu()
|
||||
wav = vocoder.transform_mel_to_audio(m)
|
||||
wavfile.write(f'0.wav', 22050, wav[0].cpu().numpy())
|
|
@ -42,7 +42,7 @@ if __name__ == '__main__':
|
|||
'less_heads': {'networks': {'generator': {'kwargs': {'num_heads': 2}}}},
|
||||
'eff_off': {'networks': {'generator': {'kwargs': {'efficient_convs': False}}}},
|
||||
'more_time': {'networks': {'generator': {'kwargs': {'time_embed_dim_multiplier': 8}}}},
|
||||
'deeper_res': {'networks': {'generator': {'kwargs': {'num_res_blocks': [3, 3, 3, 3, 3, 4, 4]}}}},
|
||||
'scale_shift_off': {'networks': {'generator': {'kwargs': {'use_scale_shift_norm': False}}}},
|
||||
'shallow_res': {'networks': {'generator': {'kwargs': {'num_res_blocks': [1, 1, 1, 1, 1, 2, 2]}}}},
|
||||
}
|
||||
opt = option.parse(base_opt, is_train=True)
|
||||
|
|
|
@ -592,6 +592,7 @@ def load_audio(audiopath, sampling_rate, raw_data=None):
|
|||
|
||||
|
||||
def load_wav_to_torch(full_path):
|
||||
import scipy.io.wavfile
|
||||
sampling_rate, data = scipy.io.wavfile.read(full_path)
|
||||
if data.dtype == np.int32:
|
||||
norm_fix = 2 ** 31
|
||||
|
|
Loading…
Reference in New Issue
Block a user