This commit is contained in:
James Betker 2022-07-29 11:24:36 -06:00
parent 4ddd01a7fb
commit 3cb14123bc

View File

@ -14,9 +14,9 @@ import numpy as np
import torch import torch
from tqdm import tqdm from tqdm import tqdm
from trainer.injectors.audio_injectors import MusicCheaterLatentInjector from trainer.injectors.audio_injectors import MusicCheaterLatentInjector, normalize_torch_mel, denormalize_mel
from codes.models.audio.music.transformer_diffusion14 import get_cheater_encoder_v2 from models.audio.music.transformer_diffusion14 import get_cheater_encoder_v2
def report_progress(progress_file, file): def report_progress(progress_file, file):
@ -24,16 +24,14 @@ def report_progress(progress_file, file):
f.write(f'{file}\n') f.write(f'{file}\n')
model = get_cheater_encoder_v2().eval().cpu() def process_folder(file, model, base_path, output_path, progress_file):
model.load_state_dict(torch.load('../experiments/tfd14_cheater_encoder.pth', map_location=torch.device('cpu')))
def process_folder(file, base_path, output_path, progress_file):
outdir = os.path.join(output_path, f'{os.path.relpath(os.path.dirname(file), base_path)}') outdir = os.path.join(output_path, f'{os.path.relpath(os.path.dirname(file), base_path)}')
os.makedirs(outdir, exist_ok=True) os.makedirs(outdir, exist_ok=True)
with np.load(file) as npz_file: with np.load(file) as npz_file:
mel = torch.tensor(npz_file['arr_0']).cuda().unsqueeze(0) mel = torch.tensor(npz_file['arr_0']).cuda().unsqueeze(0)
global model # Fix the normalization issues with the old mels. This should get reverted when these mels are re-generated.
mel = normalize_torch_mel(denormalize_mel(mel))
assert mel.min() > -1.001 and mel.max() < 1.001
model = model.cuda() model = model.cuda()
with torch.no_grad(): with torch.no_grad():
cheater = model(mel) cheater = model(mel)
@ -46,7 +44,7 @@ if __name__ == '__main__':
parser.add_argument('--path', type=str, help='Path to search for files', default='Y:\\separated\\large_mels') parser.add_argument('--path', type=str, help='Path to search for files', default='Y:\\separated\\large_mels')
parser.add_argument('--progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\separated\\large_mel_cheaters\\already_processed.txt') parser.add_argument('--progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\separated\\large_mel_cheaters\\already_processed.txt')
parser.add_argument('--output_path', type=str, help='Path for output files', default='Y:\\separated\\large_mel_cheaters') parser.add_argument('--output_path', type=str, help='Path for output files', default='Y:\\separated\\large_mel_cheaters')
parser.add_argument('--num_threads', type=int, help='Number of concurrent workers processing files.', default=1) parser.add_argument('--num_threads', type=int, help='Number of concurrent workers processing files (there must be a GPU per-worker.)', default=1)
args = parser.parse_args() args = parser.parse_args()
os.makedirs(args.output_path, exist_ok=True) os.makedirs(args.output_path, exist_ok=True)
@ -65,9 +63,21 @@ if __name__ == '__main__':
torch.save(root_music_files, cache_path) torch.save(root_music_files, cache_path)
orig_len = len(root_music_files) orig_len = len(root_music_files)
folders = root_music_files - processed_files folders = list(root_music_files - processed_files)
print(f"Found {len(folders)} files to process. Total processing is {100 * (orig_len - len(folders)) / orig_len}% complete.") print(f"Found {len(folders)} files to process. Total processing is {100 * (orig_len - len(folders)) / orig_len}% complete.")
with ThreadPool(args.num_threads) as pool: k = 0
list(tqdm(pool.imap(functools.partial(process_folder, output_path=args.output_path, base_path=args.path, for k in range(args.num_threads-1):
progress_file=args.progress_file), folders), total=len(folders))) if os.fork() == 0:
break
# k is now the process number.
partition_len = (len(folders)//args.num_threads)+1
folders = folders[k*partition_len:(k+1)*partition_len]
model = get_cheater_encoder_v2().eval().cpu()
model.load_state_dict(torch.load('../experiments/tfd14_cheater_encoder.pth', map_location=torch.device('cpu')))
model = model.to(f'cuda:{k}')
for folder in tqdm(folders):
process_folder(folder, model=model, output_path=args.output_path, base_path=args.path, progress_file=args.progress_file)