From 7938f9f50bf147c1204bf0f8c3daecdc0faef4bc Mon Sep 17 00:00:00 2001
From: James Betker <jbetker@gmail.com>
Date: Sat, 19 Dec 2020 15:13:46 -0700
Subject: [PATCH] Fix bug with single_image_dataset which prevented working on
 multiple directories from working

---
 codes/data/base_unsupervised_image_dataset.py | 2 +-
 codes/data/chunk_with_reference.py            | 4 ++++
 codes/data/single_image_dataset.py            | 1 +
 codes/scripts/extract_subimages_with_ref.py   | 8 ++++----
 4 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/codes/data/base_unsupervised_image_dataset.py b/codes/data/base_unsupervised_image_dataset.py
index 499dc258..83ae0a3b 100644
--- a/codes/data/base_unsupervised_image_dataset.py
+++ b/codes/data/base_unsupervised_image_dataset.py
@@ -50,7 +50,7 @@ class BaseUnsupervisedImageDataset(data.Dataset):
         # Indexing this dataset is tricky. Aid it by having a list of starting indices for each chunk.
         start = 0
         self.starting_indices = []
-        for c in chunks:
+        for c in self.chunks:
             self.starting_indices.append(start)
             start += len(c)
         self.len = start
diff --git a/codes/data/chunk_with_reference.py b/codes/data/chunk_with_reference.py
index b5363edb..6c21f7a2 100644
--- a/codes/data/chunk_with_reference.py
+++ b/codes/data/chunk_with_reference.py
@@ -28,6 +28,10 @@ class ChunkWithReference:
             if tile_id in centers.keys():
                 center, tile_width = centers[tile_id]
             elif self.strict:
+                print("Could not find the given tile id in the accompanying centers.pt. This generally means that "
+                      "centers.pt was overwritten at some point e.g. by duplicate data. If you don't care about tile "
+                      "centers, consider passing strict=false to the dataset options. (Note: you must re-build your"
+                      "caches for this setting change to take effect.)")
                 raise FileNotFoundError(tile_id, self.tiles[item])
             else:
                 center = torch.tensor([128, 128], dtype=torch.long)
diff --git a/codes/data/single_image_dataset.py b/codes/data/single_image_dataset.py
index 4048f197..d5ae7d88 100644
--- a/codes/data/single_image_dataset.py
+++ b/codes/data/single_image_dataset.py
@@ -50,6 +50,7 @@ if __name__ == '__main__':
         'force_multiple': 32,
         'scale': 2,
         'eval': False,
+        'strict': False,
         'fixed_corruptions': ['jpeg-broad', 'gaussian_blur'],
         'random_corruptions': ['noise-5', 'none'],
         'num_corrupts_per_image': 1,
diff --git a/codes/scripts/extract_subimages_with_ref.py b/codes/scripts/extract_subimages_with_ref.py
index 7fbaf998..1ca5abd1 100644
--- a/codes/scripts/extract_subimages_with_ref.py
+++ b/codes/scripts/extract_subimages_with_ref.py
@@ -19,12 +19,12 @@ def main():
     # compression time. If read raw images during training, use 0 for faster IO speed.
 
     opt['dest'] = 'file'
-    opt['input_folder'] = 'F:\\4k6k\\datasets\\images\youtube\\4k_quote_unquote\\images_1'
-    opt['save_folder'] = 'F:\\4k6k\\datasets\\images\\youtube_massive'
+    opt['input_folder'] = 'F:\\4k6k\\datasets\\images\youtube\\images_cook'
+    opt['save_folder'] = 'F:\\4k6k\\datasets\\images\\youtube_massive_cook'
     opt['crop_sz'] = [512, 1024, 2048]  # the size of each sub-image
-    opt['step'] = [512, 1024, 2048]  # step of the sliding crop window
+    opt['step'] = [256, 512, 1024]  # step of the sliding crop window
     opt['exclusions'] = [[],[],[]] # image names matching these terms wont be included in the processing.
-    opt['thres_sz'] = 256  # size threshold
+    opt['thres_sz'] = 128  # size threshold
     opt['resize_final_img'] = [.5, .25, .125]
     opt['only_resize'] = False
     opt['vertical_split'] = False