extract_temporal_squares script

For extracting related patches across a video
2021-02-08 08:10:24 -07:00 · 2021-02-08 08:10:24 -07:00 · 543d459b4e
commit 543d459b4e
parent 39fd755baa
1 changed files with 201 additions and 0 deletions
--- a/codes/scripts/extract_temporal_squares.py
+++ b/codes/scripts/extract_temporal_squares.py
@ -0,0 +1,201 @@
 """A multi-thread tool to crop large images to sub-images for faster IO."""
 import os
 import os.path as osp
 import shutil
 import subprocess
 from time import sleep
 import munch
 import numpy as np
 import cv2
 import torchvision
 from PIL import Image
 import data.util as data_util  # noqa: E402
 import torch.utils.data as data
 from tqdm import tqdm
 import torch
 import random
 from models.flownet2.networks.resample2d_package.resample2d import Resample2d
 from models.optical_flow.PWCNet import pwc_dc_net
 def main():
    opt = {}
    opt['n_thread'] = 0
    opt['compression_level'] = 95  # JPEG compression quality rating.
    opt['dest'] = 'file'
    opt['input_folder'] = 'D:\\dlas\\codes\\scripts\\test'
    opt['save_folder'] = 'D:\\dlas\\codes\\scripts\\test_out'
    opt['imgsize'] = 256
    opt['bottom_crop'] = .1
    opt['keep_folder'] = False
    save_folder = opt['save_folder']
    if not osp.exists(save_folder):
        os.makedirs(save_folder)
        print('mkdir [{:s}] ...'.format(save_folder))
    go(opt)
 def is_video(filename):
    return any(filename.endswith(extension) for extension in ['.mp4', '.MP4', '.avi', '.AVI', '.mkv', '.MKV', '.wmv', '.WMV'])
 def get_videos_in_path(path):
    assert os.path.isdir(path), '{:s} is not a valid directory'.format(path)
    videos = []
    for dirpath, _, fnames in sorted(os.walk(path)):
        for fname in sorted(fnames):
            if is_video(fname):
                videos.append(os.path.join(dirpath, fname))
    return videos
 def get_time_for_secs(secs):
    mins = int(secs / 60)
    hours = int(mins / 60)
    secs = secs - (mins * 60) - (hours * 3600)
    mins = mins % 60
    return '%02d:%02d:%06.3f' % (hours, mins, secs)
 class VideoClipDataset(data.Dataset):
    def __init__(self, opt):
        self.opt = opt
        input_folder = opt['input_folder']
        self.videos = get_videos_in_path(input_folder)
        print("Found %i videos" % (len(self.videos),))
    def __getitem__(self, index):
        return self.get(index)
    def extract_n_frames(self, video_file, dest, time_seconds, n):
        ffmpeg_args = ['ffmpeg', '-y', '-ss', get_time_for_secs(time_seconds), '-i', video_file, '-vframes', str(n), f'{dest}/%d.jpg']
        process = subprocess.Popen(ffmpeg_args, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
        process.wait()
    def get_video_length(self, video_file):
        result = subprocess.run(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of",
                             "default=noprint_wrappers=1", video_file],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
        return float(result.stdout.decode('utf-8').strip().replace("duration=", ""))
    def get_image_tensor(self, path):
        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
        # Access exceptions happen regularly, probably due to the subprocess not fully terminating.
        for tries in range(5):
            try:
                os.remove(path)
                break
            except:
                if tries >= 4:
                    assert False
                else:
                    sleep(.1)
        assert img is not None
        assert len(img.shape) > 2
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.
        # Crop off excess so image dimensions are a multiple of 64.
        h, w, _ = img.shape
        img = img[:(h//64)*64,:(w//64)*64,:]
        return torch.from_numpy(np.ascontiguousarray(np.transpose(img, (2, 0, 1)))).float()
    def get(self, index):
        path = self.videos[index]
        out_folder = self.opt['save_folder']
        vid_len = int(self.get_video_length(path))
        start = 2
        img_runs = []
        while start < vid_len:
            frames_out = os.path.join(out_folder, f'{index}_{start}')
            os.makedirs(frames_out, exist_ok=False)
            n = random.randint(5, 30)
            self.extract_n_frames(path, frames_out, start, n)
            frames = data_util.get_image_paths('img', frames_out)[0]
            assert len(frames) == n
            img_runs.append(([self.get_image_tensor(frame) for frame in frames], frames_out))
            start += random.randint(2,5)
        return img_runs
    def __len__(self):
        return len(self.videos)
 def compute_flow_and_cleanup(flownet, runs):
    resampler = Resample2d().cuda()
    for run in runs:
        run, path = run
        consolidated_flows = None
        a = run[0].unsqueeze(0).cuda()
        img = a
        dbg = a.clone()
        for i in range(1,len(run)):
            img2 = run[i].unsqueeze(0).cuda()
            flow = flownet(torch.cat([img2, img], dim=1))
            flow = torch.nn.functional.interpolate(flow, size=img.shape[2:], mode='bilinear')
            if consolidated_flows is None:
                consolidated_flows = flow
            else:
                consolidated_flows = resampler(flow, -consolidated_flows) + consolidated_flows
            img = img2
            dbg = resampler(dbg, flow)
        torchvision.utils.save_image(dbg, os.path.join(path, "debug.jpg"))
        consolidated_flows = torch.clamp(consolidated_flows / 255, -.5, .5)
        b = run[-1].unsqueeze(0).cuda()
        _, _, h, w = a.shape
        direct_flows = torch.nn.functional.interpolate(torch.clamp(flownet(torch.cat([a, b], dim=1).float()) / 255, -.5, .5), size=img.shape[2:], mode='bilinear')
        # TODO: Reshape image here.
        '''
        # Perform explicit crops first. These are generally used to get rid of watermarks so we dont even want to
        # consider these areas of the image.
        if 'bottom_crop' in self.opt.keys() and self.opt['bottom_crop'] > 0:
            bc = self.opt['bottom_crop']
            if bc > 0 and bc < 1:
                bc = int(bc * img.shape[0])
            img = img[:-bc, :, :]
        h, w, c = img.shape
        assert min(h,w) >= self.opt['imgsize']
        # We must convert the image into a square.
        dim = min(h, w)
        # Crop the image so that only the center is left, since this is often the most salient part of the image.
        img = img[(h - dim) // 2:dim + (h - dim) // 2, (w - dim) // 2:dim + (w - dim) // 2, :]
        img = cv2.resize(img, (self.opt['imgsize'], self.opt['imgsize']), interpolation=cv2.INTER_AREA)
        '''
        torchvision.utils.save_image(a, os.path.join(path, "a.jpg"))
        torchvision.utils.save_image(b, os.path.join(path, "b.jpg"))
        torch.save(consolidated_flows * 255, os.path.join(path, "consolidated_flow.pt"))
        torchvision.utils.save_image(torch.cat([consolidated_flows + .5, torch.zeros((1, 1, h, w), device='cuda')], dim=1), os.path.join(path, "consolidated_flow.png"))
        # For debugging
        torchvision.utils.save_image(resampler(a, consolidated_flows * 255), os.path.join(path, "b_flowed.jpg"))
        torchvision.utils.save_image(resampler(b, -consolidated_flows * 255), os.path.join(path, "a_flowed.jpg"))
        torchvision.utils.save_image(resampler(b, direct_flows * 255), os.path.join(path, "a_flowed_nonconsolidated.jpg"))
        torchvision.utils.save_image(torch.cat([direct_flows + .5, torch.zeros((1, 1, h, w), device='cuda')], dim=1), os.path.join(path, "direct_flow.png"))
 def identity(x):
    return x
 def go(opt):
    flownet = pwc_dc_net('../experiments/pwc_humanflow.pth')
    flownet.eval()
    flownet = flownet.cuda()
    dataset = VideoClipDataset(opt)
    dataloader = data.DataLoader(dataset, num_workers=opt['n_thread'], collate_fn=identity)
    with torch.no_grad():
        for batch in tqdm(dataloader):
            compute_flow_and_cleanup(flownet, batch[0])
 if __name__ == '__main__':
    main()