From 7938f9f50bf147c1204bf0f8c3daecdc0faef4bc Mon Sep 17 00:00:00 2001 From: James Betker Date: Sat, 19 Dec 2020 15:13:46 -0700 Subject: [PATCH] Fix bug with single_image_dataset which prevented working on multiple directories from working --- codes/data/base_unsupervised_image_dataset.py | 2 +- codes/data/chunk_with_reference.py | 4 ++++ codes/data/single_image_dataset.py | 1 + codes/scripts/extract_subimages_with_ref.py | 8 ++++---- 4 files changed, 10 insertions(+), 5 deletions(-) diff --git a/codes/data/base_unsupervised_image_dataset.py b/codes/data/base_unsupervised_image_dataset.py index 499dc258..83ae0a3b 100644 --- a/codes/data/base_unsupervised_image_dataset.py +++ b/codes/data/base_unsupervised_image_dataset.py @@ -50,7 +50,7 @@ class BaseUnsupervisedImageDataset(data.Dataset): # Indexing this dataset is tricky. Aid it by having a list of starting indices for each chunk. start = 0 self.starting_indices = [] - for c in chunks: + for c in self.chunks: self.starting_indices.append(start) start += len(c) self.len = start diff --git a/codes/data/chunk_with_reference.py b/codes/data/chunk_with_reference.py index b5363edb..6c21f7a2 100644 --- a/codes/data/chunk_with_reference.py +++ b/codes/data/chunk_with_reference.py @@ -28,6 +28,10 @@ class ChunkWithReference: if tile_id in centers.keys(): center, tile_width = centers[tile_id] elif self.strict: + print("Could not find the given tile id in the accompanying centers.pt. This generally means that " + "centers.pt was overwritten at some point e.g. by duplicate data. If you don't care about tile " + "centers, consider passing strict=false to the dataset options. (Note: you must re-build your" + "caches for this setting change to take effect.)") raise FileNotFoundError(tile_id, self.tiles[item]) else: center = torch.tensor([128, 128], dtype=torch.long) diff --git a/codes/data/single_image_dataset.py b/codes/data/single_image_dataset.py index 4048f197..d5ae7d88 100644 --- a/codes/data/single_image_dataset.py +++ b/codes/data/single_image_dataset.py @@ -50,6 +50,7 @@ if __name__ == '__main__': 'force_multiple': 32, 'scale': 2, 'eval': False, + 'strict': False, 'fixed_corruptions': ['jpeg-broad', 'gaussian_blur'], 'random_corruptions': ['noise-5', 'none'], 'num_corrupts_per_image': 1, diff --git a/codes/scripts/extract_subimages_with_ref.py b/codes/scripts/extract_subimages_with_ref.py index 7fbaf998..1ca5abd1 100644 --- a/codes/scripts/extract_subimages_with_ref.py +++ b/codes/scripts/extract_subimages_with_ref.py @@ -19,12 +19,12 @@ def main(): # compression time. If read raw images during training, use 0 for faster IO speed. opt['dest'] = 'file' - opt['input_folder'] = 'F:\\4k6k\\datasets\\images\youtube\\4k_quote_unquote\\images_1' - opt['save_folder'] = 'F:\\4k6k\\datasets\\images\\youtube_massive' + opt['input_folder'] = 'F:\\4k6k\\datasets\\images\youtube\\images_cook' + opt['save_folder'] = 'F:\\4k6k\\datasets\\images\\youtube_massive_cook' opt['crop_sz'] = [512, 1024, 2048] # the size of each sub-image - opt['step'] = [512, 1024, 2048] # step of the sliding crop window + opt['step'] = [256, 512, 1024] # step of the sliding crop window opt['exclusions'] = [[],[],[]] # image names matching these terms wont be included in the processing. - opt['thres_sz'] = 256 # size threshold + opt['thres_sz'] = 128 # size threshold opt['resize_final_img'] = [.5, .25, .125] opt['only_resize'] = False opt['vertical_split'] = False