DL-Art-School/codes/scripts/extract_temporal_squares.py

"""A multi-thread tool to crop large images to sub-images for faster IO."""
import os
import os.path as osp
import shutil
import subprocess
from time import sleep

import munch
import numpy as np
import cv2
import torchvision
from PIL import Image
import data.util as data_util  # noqa: E402
import torch.utils.data as data
from tqdm import tqdm
import torch
import random

from models.flownet2.networks.resample2d_package.resample2d import Resample2d
from models.optical_flow.PWCNet import pwc_dc_net


def main():
    opt = {}
    opt['n_thread'] = 0
    opt['compression_level'] = 95  # JPEG compression quality rating.
    opt['dest'] = 'file'
    opt['input_folder'] = 'D:\\dlas\\codes\\scripts\\test'
    opt['save_folder'] = 'D:\\dlas\\codes\\scripts\\test_out'
    opt['imgsize'] = 256
    opt['bottom_crop'] = .1
    opt['keep_folder'] = False

    save_folder = opt['save_folder']
    if not osp.exists(save_folder):
        os.makedirs(save_folder)
        print('mkdir [{:s}] ...'.format(save_folder))

    go(opt)


def is_video(filename):
    return any(filename.endswith(extension) for extension in ['.mp4', '.MP4', '.avi', '.AVI', '.mkv', '.MKV', '.wmv', '.WMV'])


def get_videos_in_path(path):
    assert os.path.isdir(path), '{:s} is not a valid directory'.format(path)
    videos = []
    for dirpath, _, fnames in sorted(os.walk(path)):
        for fname in sorted(fnames):
            if is_video(fname):
                videos.append(os.path.join(dirpath, fname))
    return videos

def get_time_for_secs(secs):
    mins = int(secs / 60)
    hours = int(mins / 60)
    secs = secs - (mins * 60) - (hours * 3600)
    mins = mins % 60
    return '%02d:%02d:%06.3f' % (hours, mins, secs)

class VideoClipDataset(data.Dataset):
    def __init__(self, opt):
        self.opt = opt
        input_folder = opt['input_folder']
        self.videos = get_videos_in_path(input_folder)
        print("Found %i videos" % (len(self.videos),))

    def __getitem__(self, index):
        return self.get(index)

    def extract_n_frames(self, video_file, dest, time_seconds, n):
        ffmpeg_args = ['ffmpeg', '-y', '-ss', get_time_for_secs(time_seconds), '-i', video_file, '-vframes', str(n), f'{dest}/%d.jpg']
        process = subprocess.Popen(ffmpeg_args, stderr=subprocess.DEVNULL, stdout=subprocess.DEVNULL)
        process.wait()

    def get_video_length(self, video_file):
        result = subprocess.run(["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of",
                             "default=noprint_wrappers=1", video_file],
                            stdout=subprocess.PIPE,
                            stderr=subprocess.STDOUT)
        return float(result.stdout.decode('utf-8').strip().replace("duration=", ""))

    def get_image_tensor(self, path):
        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)

        # Access exceptions happen regularly, probably due to the subprocess not fully terminating.
        for tries in range(5):
            try:
                os.remove(path)
                break
            except:
                if tries >= 4:
                    assert False
                else:
                    sleep(.1)

        assert img is not None
        assert len(img.shape) > 2
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32) / 255.
        # Crop off excess so image dimensions are a multiple of 64.
        h, w, _ = img.shape
        img = img[:(h//64)*64,:(w//64)*64,:]
        return torch.from_numpy(np.ascontiguousarray(np.transpose(img, (2, 0, 1)))).float()

    def get(self, index):
        path = self.videos[index]
        out_folder = self.opt['save_folder']

        vid_len = int(self.get_video_length(path))
        start = 2
        img_runs = []
        while start < vid_len:
            frames_out = os.path.join(out_folder, f'{index}_{start}')
            os.makedirs(frames_out, exist_ok=False)
            n = random.randint(5, 30)
            self.extract_n_frames(path, frames_out, start, n)
            frames = data_util.find_files_of_type('img', frames_out)[0]
            assert len(frames) == n
            img_runs.append(([self.get_image_tensor(frame) for frame in frames], frames_out))
            start += random.randint(2,5)

        return img_runs

    def __len__(self):
        return len(self.videos)


def compute_flow_and_cleanup(flownet, runs):
    resampler = Resample2d().cuda()
    for run in runs:
        run, path = run
        consolidated_flows = None
        a = run[0].unsqueeze(0).cuda()
        img = a
        dbg = a.clone()
        for i in range(1,len(run)):
            img2 = run[i].unsqueeze(0).cuda()
            flow = flownet(torch.cat([img2, img], dim=1))
            flow = torch.nn.functional.interpolate(flow, size=img.shape[2:], mode='bilinear')
            if consolidated_flows is None:
                consolidated_flows = flow
            else:
                consolidated_flows = resampler(flow, -consolidated_flows) + consolidated_flows
            img = img2
            dbg = resampler(dbg, flow)
        torchvision.utils.save_image(dbg, os.path.join(path, "debug.jpg"))
        consolidated_flows = torch.clamp(consolidated_flows / 255, -.5, .5)
        b = run[-1].unsqueeze(0).cuda()
        _, _, h, w = a.shape
        direct_flows = torch.nn.functional.interpolate(torch.clamp(flownet(torch.cat([a, b], dim=1).float()) / 255, -.5, .5), size=img.shape[2:], mode='bilinear')
        # TODO: Reshape image here.
        '''
        # Perform explicit crops first. These are generally used to get rid of watermarks so we dont even want to
        # consider these areas of the image.
        if 'bottom_crop' in self.opt.keys() and self.opt['bottom_crop'] > 0:
            bc = self.opt['bottom_crop']
            if bc > 0 and bc < 1:
                bc = int(bc * img.shape[0])
            img = img[:-bc, :, :]

        h, w, c = img.shape
        assert min(h,w) >= self.opt['imgsize']

        # We must convert the image into a square.
        dim = min(h, w)
        # Crop the image so that only the center is left, since this is often the most salient part of the image.
        img = img[(h - dim) // 2:dim + (h - dim) // 2, (w - dim) // 2:dim + (w - dim) // 2, :]
        img = cv2.resize(img, (self.opt['imgsize'], self.opt['imgsize']), interpolation=cv2.INTER_AREA)
        '''

        torchvision.utils.save_image(a, os.path.join(path, "a.jpg"))
        torchvision.utils.save_image(b, os.path.join(path, "b.jpg"))
        torch.save(consolidated_flows * 255, os.path.join(path, "consolidated_flow.pt"))
        torchvision.utils.save_image(torch.cat([consolidated_flows + .5, torch.zeros((1, 1, h, w), device='cuda')], dim=1), os.path.join(path, "consolidated_flow.png"))

        # For debugging
        torchvision.utils.save_image(resampler(a, consolidated_flows * 255), os.path.join(path, "b_flowed.jpg"))
        torchvision.utils.save_image(resampler(b, -consolidated_flows * 255), os.path.join(path, "a_flowed.jpg"))
        torchvision.utils.save_image(resampler(b, direct_flows * 255), os.path.join(path, "a_flowed_nonconsolidated.jpg"))
        torchvision.utils.save_image(torch.cat([direct_flows + .5, torch.zeros((1, 1, h, w), device='cuda')], dim=1), os.path.join(path, "direct_flow.png"))


def identity(x):
    return x


def go(opt):
    flownet = pwc_dc_net('../experiments/pwc_humanflow.pth')
    flownet.eval()
    flownet = flownet.cuda()

    dataset = VideoClipDataset(opt)
    dataloader = data.DataLoader(dataset, num_workers=opt['n_thread'], collate_fn=identity)
    with torch.no_grad():
        for batch in tqdm(dataloader):
            compute_flow_and_cleanup(flownet, batch[0])


if __name__ == '__main__':
    main()