DL-Art-School/codes/data/Vimeo90K_dataset.py
XintaoWang 037933ba66 mmsr
2019-08-23 21:42:47 +08:00

168 lines
6.7 KiB
Python

'''
Vimeo90K dataset
support reading images from lmdb, image folder and memcached
'''
import os.path as osp
import random
import pickle
import logging
import numpy as np
import cv2
import lmdb
import torch
import torch.utils.data as data
import data.util as util
try:
import mc # import memcached
except ImportError:
pass
logger = logging.getLogger('base')
class Vimeo90KDataset(data.Dataset):
'''
Reading the training Vimeo90K dataset
key example: 00001_0001 (_1, ..., _7)
GT (Ground-Truth): 4th frame;
LQ (Low-Quality): support reading N LQ frames, N = 1, 3, 5, 7 centered with 4th frame
'''
def __init__(self, opt):
super(Vimeo90KDataset, self).__init__()
self.opt = opt
# temporal augmentation
self.interval_list = opt['interval_list']
self.random_reverse = opt['random_reverse']
logger.info('Temporal augmentation interval list: [{}], with random reverse is {}.'.format(
','.join(str(x) for x in opt['interval_list']), self.random_reverse))
self.GT_root, self.LQ_root = opt['dataroot_GT'], opt['dataroot_LQ']
self.data_type = self.opt['data_type']
self.LR_input = False if opt['GT_size'] == opt['LQ_size'] else True # low resolution inputs
#### determine the LQ frame list
'''
N | frames
1 | 4
3 | 3,4,5
5 | 2,3,4,5,6
7 | 1,2,3,4,5,6,7
'''
self.LQ_frames_list = []
for i in range(opt['N_frames']):
self.LQ_frames_list.append(i + (9 - opt['N_frames']) // 2)
#### directly load image keys
if self.data_type == 'lmdb':
self.paths_GT, _ = util.get_image_paths(self.data_type, opt['dataroot_GT'])
logger.info('Using lmdb meta info for cache keys.')
elif opt['cache_keys']:
logger.info('Using cache keys: {}'.format(opt['cache_keys']))
self.paths_GT = pickle.load(open(opt['cache_keys'], 'rb'))['keys']
else:
raise ValueError(
'Need to create cache keys (meta_info.pkl) by running [create_lmdb.py]')
assert self.paths_GT, 'Error: GT path is empty.'
if self.data_type == 'lmdb':
self.GT_env, self.LQ_env = None, None
elif self.data_type == 'mc': # memcached
self.mclient = None
elif self.data_type == 'img':
pass
else:
raise ValueError('Wrong data type: {}'.format(self.data_type))
def _init_lmdb(self):
# https://github.com/chainer/chainermn/issues/129
self.GT_env = lmdb.open(self.opt['dataroot_GT'], readonly=True, lock=False, readahead=False,
meminit=False)
self.LQ_env = lmdb.open(self.opt['dataroot_LQ'], readonly=True, lock=False, readahead=False,
meminit=False)
def _ensure_memcached(self):
if self.mclient is None:
# specify the config files
server_list_config_file = None
client_config_file = None
self.mclient = mc.MemcachedClient.GetInstance(server_list_config_file,
client_config_file)
def _read_img_mc(self, path):
''' Return BGR, HWC, [0, 255], uint8'''
value = mc.pyvector()
self.mclient.Get(path, value)
value_buf = mc.ConvertBuffer(value)
img_array = np.frombuffer(value_buf, np.uint8)
img = cv2.imdecode(img_array, cv2.IMREAD_UNCHANGED)
return img
def __getitem__(self, index):
if self.data_type == 'mc':
self._ensure_memcached()
elif self.data_type == 'lmdb' and (self.GT_env is None or self.LQ_env is None):
self._init_lmdb()
scale = self.opt['scale']
GT_size = self.opt['GT_size']
key = self.paths_GT[index]
name_a, name_b = key.split('_')
#### get the GT image (as the center frame)
if self.data_type == 'mc':
img_GT = self._read_img_mc(osp.join(self.GT_root, name_a, name_b, '4.png'))
img_GT = img_GT.astype(np.float32) / 255.
elif self.data_type == 'lmdb':
img_GT = util.read_img(self.GT_env, key + '_4', (3, 256, 448))
else:
img_GT = util.read_img(None, osp.join(self.GT_root, name_a, name_b, 'im4.png'))
#### get LQ images
LQ_size_tuple = (3, 64, 112) if self.LR_input else (3, 256, 448)
img_LQ_l = []
for v in self.LQ_frames_list:
if self.data_type == 'mc':
img_LQ = self._read_img_mc(
osp.join(self.LQ_root, name_a, name_b, '{}.png'.format(v)))
img_LQ = img_LQ.astype(np.float32) / 255.
elif self.data_type == 'lmdb':
img_LQ = util.read_img(self.LQ_env, key + '_{}'.format(v), LQ_size_tuple)
else:
img_LQ = util.read_img(None,
osp.join(self.LQ_root, name_a, name_b, 'im{}.png'.format(v)))
img_LQ_l.append(img_LQ)
if self.opt['phase'] == 'train':
C, H, W = LQ_size_tuple # LQ size
# randomly crop
if self.LR_input:
LQ_size = GT_size // scale
rnd_h = random.randint(0, max(0, H - LQ_size))
rnd_w = random.randint(0, max(0, W - LQ_size))
img_LQ_l = [v[rnd_h:rnd_h + LQ_size, rnd_w:rnd_w + LQ_size, :] for v in img_LQ_l]
rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale)
img_GT = img_GT[rnd_h_HR:rnd_h_HR + GT_size, rnd_w_HR:rnd_w_HR + GT_size, :]
else:
rnd_h = random.randint(0, max(0, H - GT_size))
rnd_w = random.randint(0, max(0, W - GT_size))
img_LQ_l = [v[rnd_h:rnd_h + GT_size, rnd_w:rnd_w + GT_size, :] for v in img_LQ_l]
img_GT = img_GT[rnd_h:rnd_h + GT_size, rnd_w:rnd_w + GT_size, :]
# augmentation - flip, rotate
img_LQ_l.append(img_GT)
rlt = util.augment(img_LQ_l, self.opt['use_flip'], self.opt['use_rot'])
img_LQ_l = rlt[0:-1]
img_GT = rlt[-1]
# stack LQ images to NHWC, N is the frame number
img_LQs = np.stack(img_LQ_l, axis=0)
# BGR to RGB, HWC to CHW, numpy to tensor
img_GT = img_GT[:, :, [2, 1, 0]]
img_LQs = img_LQs[:, :, :, [2, 1, 0]]
img_GT = torch.from_numpy(np.ascontiguousarray(np.transpose(img_GT, (2, 0, 1)))).float()
img_LQs = torch.from_numpy(np.ascontiguousarray(np.transpose(img_LQs,
(0, 3, 1, 2)))).float()
return {'LQs': img_LQs, 'GT': img_GT, 'key': key}
def __len__(self):
return len(self.paths_GT)