From 4c3413d008d45a473a193fd2c8884c1f8ca1a394 Mon Sep 17 00:00:00 2001 From: James Betker Date: Fri, 1 Jul 2022 00:44:20 -0600 Subject: [PATCH] Support aac datatypes --- codes/data/util.py | 2 +- codes/scripts/audio/prep_music/phase_1_split_files.py | 11 ++++++++--- codes/utils/util.py | 10 ++++++++++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/codes/data/util.py b/codes/data/util.py index 529cba29..a73eecb6 100644 --- a/codes/data/util.py +++ b/codes/data/util.py @@ -45,7 +45,7 @@ def is_wav_file(filename): def is_audio_file(filename): - AUDIO_EXTENSIONS = ['.wav', '.mp3', '.wma', '.m4b', '.flac'] + AUDIO_EXTENSIONS = ['.wav', '.mp3', '.wma', '.m4b', '.flac', '.aac'] return any(filename.endswith(extension) for extension in AUDIO_EXTENSIONS) diff --git a/codes/scripts/audio/prep_music/phase_1_split_files.py b/codes/scripts/audio/prep_music/phase_1_split_files.py index fe7f1d25..e24195c3 100644 --- a/codes/scripts/audio/prep_music/phase_1_split_files.py +++ b/codes/scripts/audio/prep_music/phase_1_split_files.py @@ -23,6 +23,11 @@ def report_progress(progress_file, file): def process_file(file, base_path, output_path, progress_file, duration_per_clip, sampling_rate=22050): + lp = os.path.basename(file).lower() + if ' live' in lp or 'concert' in lp: + print(f"Skipping file {file} because likely a live performance") + report_progress(progress_file, file) + return try: audio = load_audio(file, sampling_rate) except: @@ -42,9 +47,9 @@ def process_file(file, base_path, output_path, progress_file, duration_per_clip, if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument('-path', type=str, help='Path to search for files', default='Y:\\sources\\manual_podcasts_music') - parser.add_argument('-progress_file', type=str, help='Place to store all files that have already been processed', default='Y:\\sources\\manual_podcasts_music\\already_processed.txt') - parser.add_argument('-output_path', type=str, help='Path for output files', default='Y:\\split\\manual_podcasts_music') + parser.add_argument('-path', type=str, help='Path to search for files', default='C:\\Users\\James\\Downloads\\soundcloud-dl\\sc2') + parser.add_argument('-progress_file', type=str, help='Place to store all files that have already been processed', default='C:\\Users\\James\\Downloads\\soundcloud-dl\\sc2\\already_processed.txt') + parser.add_argument('-output_path', type=str, help='Path for output files', default='Y:\\split\\soundcloud_mixes\\bigmix1') parser.add_argument('-num_threads', type=int, help='Number of concurrent workers processing files.', default=4) parser.add_argument('-duration', type=int, help='Duration per clip in seconds', default=30) args = parser.parse_args() diff --git a/codes/utils/util.py b/codes/utils/util.py index b3e52397..d2077701 100644 --- a/codes/utils/util.py +++ b/codes/utils/util.py @@ -574,6 +574,16 @@ def load_audio(audiopath, sampling_rate, raw_data=None): import soundfile as sf audio, lsr = sf.read(audiopath) audio = torch.FloatTensor(audio) + elif audiopath[-4:] == '.aac': + # Process AAC files using pydub. I'd use this for everything except I'm cornered into backwards compatibility. + from pydub import AudioSegment + asg = AudioSegment.from_file(audiopath) + dtype = getattr(np, "int{:d}".format(asg.sample_width * 8)) + arr = np.ndarray((int(asg.frame_count()), asg.channels), buffer=asg.raw_data, dtype=dtype) + arr = arr.astype('float') / (2 ** (asg.sample_width * 8 - 1)) + arr = arr[:,0] + audio = torch.FloatTensor(arr) + lsr = asg.frame_rate else: audio, lsr = open_audio(audiopath) audio = torch.FloatTensor(audio)