forked from mrq/DL-Art-School
Move gen_similarities and rename
This commit is contained in:
parent
8b376e63d9
commit
735f6e4640
|
@ -38,8 +38,10 @@ def process_subdir(subdir, options, clip_sz):
|
||||||
global clip_model
|
global clip_model
|
||||||
if clip_model is None:
|
if clip_model is None:
|
||||||
print('Loading CLIP model..')
|
print('Loading CLIP model..')
|
||||||
clip_model = load_model_from_config(preloaded_options=options, model_name='clip', also_load_savepoint=True)
|
clip_model = load_model_from_config(preloaded_options=options, model_name='clip', also_load_savepoint=True).cuda()
|
||||||
|
clip_model.eval()
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
root, paths = subdir
|
root, paths = subdir
|
||||||
if len(paths) == 0:
|
if len(paths) == 0:
|
||||||
return
|
return
|
||||||
|
@ -63,7 +65,7 @@ def process_subdir(subdir, options, clip_sz):
|
||||||
while len(clips) > 0:
|
while len(clips) > 0:
|
||||||
stacked = torch.stack(clips[:256], dim=0).cuda()
|
stacked = torch.stack(clips[:256], dim=0).cuda()
|
||||||
clips = clips[256:]
|
clips = clips[256:]
|
||||||
mels = wav_to_mel(stacked)
|
mels = wav_to_mel(stacked).cuda()
|
||||||
outp = clip_model.inference(mels).cpu()
|
outp = clip_model.inference(mels).cpu()
|
||||||
if sims is None:
|
if sims is None:
|
||||||
sims = outp
|
sims = outp
|
||||||
|
@ -94,12 +96,13 @@ if __name__ == '__main__':
|
||||||
"""
|
"""
|
||||||
This script iterates within a directory filled with subdirs. Each subdir contains a list of audio files from the same
|
This script iterates within a directory filled with subdirs. Each subdir contains a list of audio files from the same
|
||||||
source. The script uses an speech-to-speech clip model to find the <n> most similar audio clips within each subdir for
|
source. The script uses an speech-to-speech clip model to find the <n> most similar audio clips within each subdir for
|
||||||
each clip within that subdir.
|
each clip within that subdir. These similar files are recorded in a "similarities.pth" file in each subdirectory, which
|
||||||
|
is consumed during training when the dataset searches for conditioning clips.
|
||||||
"""
|
"""
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
|
parser.add_argument('-o', type=str, help='Path to the options YAML file used to train the CLIP model', default='../options/train_voice_voice_clip.yml')
|
||||||
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=6)
|
parser.add_argument('--num_workers', type=int, help='Number concurrent processes to use', default=6)
|
||||||
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\bigasr_dataset\\tedlium')
|
parser.add_argument('--root_path', type=str, help='Root path to search for audio directories from', default='Y:\\filtered\\big_podcast')
|
||||||
parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
|
parser.add_argument('--clip_size', type=int, help='Amount of audio samples to pull from each file', default=22050)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user