DL-Art-School/dlas/scripts/do_to_files.py

44 lines
1.2 KiB
Python
Raw Normal View History

import argparse
import functools
import os
import pathlib
from multiprocessing.pool import ThreadPool
from tqdm import tqdm
'''
Helper function for scripts that iterate over large sets of files. Defines command-line arguments
for operating over a large set of files, then handles setting up a worker queue system to operate
on those files. You need to provide your own process_file_fn.
process_file_fn expected signature:
(path, output_path)
'''
def do_to_files(process_file_fn):
parser = argparse.ArgumentParser()
parser.add_argument('--path')
parser.add_argument('--glob')
parser.add_argument('--out')
parser.add_argument('--resume')
parser.add_argument('--num_workers')
args = parser.parse_args()
src = args.path
glob = args.glob
out = args.out
resume = args.resume
num_workers = int(args.num_workers)
path = pathlib.Path(src)
files = path.rglob(glob)
files = [str(f) for f in files]
files = files[resume:]
pfn = functools.partial(process_file_fn, output_path=out)
if num_workers > 0:
with ThreadPool(num_workers) as pool:
list(tqdm(pool.imap(pfn, files), total=len(files)))
else:
for f in tqdm(files):
pfn(f)