From c47925ae348962a6d18b1f9903bf495ce12a6e1c Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Fri, 13 Nov 2020 11:04:03 -0700
Subject: [PATCH] New image extractor utility

---
 codes/scripts/extract_square_images.py | 80 ++++++++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 codes/scripts/extract_square_images.py

diff --git a/codes/scripts/extract_square_images.py b/codes/scripts/extract_square_images.py
new file mode 100644
index 00000000..b6ff8cb7
--- /dev/null
+++ b/codes/scripts/extract_square_images.py
@@ -0,0 +1,80 @@
+"""A multi-thread tool to crop large images to sub-images for faster IO."""
+import os
+import os.path as osp
+import numpy as np
+import cv2
+from PIL import Image
+import data.util as data_util  # noqa: E402
+import torch.utils.data as data
+from tqdm import tqdm
+import torch
+
+
+def main():
+    split_img = False
+    opt = {}
+    opt['n_thread'] = 20
+    opt['compression_level'] = 90  # JPEG compression quality rating.
+    # CV_IMWRITE_PNG_COMPRESSION from 0 to 9. A higher value means a smaller size and longer
+    # compression time. If read raw images during training, use 0 for faster IO speed.
+
+    opt['dest'] = 'file'
+    opt['input_folder'] = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\imgset2'
+    opt['save_folder'] = 'F:\\4k6k\\datasets\\ns_images\\imagesets\\imgset_raw_2'
+    opt['imgsize'] = 1024
+
+    save_folder = opt['save_folder']
+    if not osp.exists(save_folder):
+        os.makedirs(save_folder)
+        print('mkdir [{:s}] ...'.format(save_folder))
+
+    extract_single(opt)
+
+
+class TiledDataset(data.Dataset):
+    def __init__(self, opt):
+        self.opt = opt
+        input_folder = opt['input_folder']
+        self.images = data_util._get_paths_from_images(input_folder)
+
+    def __getitem__(self, index):
+        return self.get(index)
+
+    def get(self, index):
+        path = self.images[index]
+        basename = osp.basename(path)
+        img = cv2.imread(path, cv2.IMREAD_UNCHANGED)
+
+        # Greyscale not supported.
+        if len(img.shape) == 2:
+            return None
+        h, w, c = img.shape
+        # Uncomment to filter any image that doesnt meet a threshold size.
+        if min(h,w) < 1024:
+            return None
+
+        # We must convert the image into a square.
+        dim = min(h, w)
+        # Crop the image so that only the center is left, since this is often the most salient part of the image.
+        img = img[(h - dim) // 2:dim + (h - dim) // 2, (w - dim) // 2:dim + (w - dim) // 2, :]
+        img = cv2.resize(img, (self.opt['imgsize'], self.opt['imgsize']), interpolation=cv2.INTER_AREA)
+        cv2.imwrite(osp.join(self.opt['save_folder'], basename + ".jpg"), img, [cv2.IMWRITE_JPEG_QUALITY, self.opt['compression_level']])
+        return None
+
+    def __len__(self):
+        return len(self.images)
+
+
+def identity(x):
+    return x
+
+def extract_single(opt):
+    dataset = TiledDataset(opt)
+    dataloader = data.DataLoader(dataset, num_workers=opt['n_thread'], collate_fn=identity)
+    tq = tqdm(dataloader)
+    for spl_imgs in tq:
+        pass
+
+
+if __name__ == '__main__':
+    main()