added cache folder to pull from if an image exists (to remove needless refetches in the future)

2023-02-08 19:21:24 +00:00 · 2023-02-08 19:21:24 +00:00 · d1d5153874
commit d1d5153874
parent 0d5bbfa465
4 changed files with 21 additions and 0 deletions
--- a/data/config/examples/fetch.json
+++ b/data/config/examples/fetch.json
@ -1,6 +1,7 @@
 {
 	"booru": "e621",
 	"output": "./images/downloaded/",
+	"images": "./images/cache/",
 	"limit": 320,
 	"concurrency": 4,
 	"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
--- a/images/cache/.gitkeep
+++ b/images/cache/.gitkeep
--- a/src/fetch.js
+++ b/src/fetch.js
@ -45,6 +45,7 @@ let config = {

 	output: `./images/downloaded/`, // directory to save your files
 	cache: `./data/cache.json`, // JSON file of cached tags, will speed up processing when used for the renamer script
+	images: `./images/cache/`, // total cache of images, will copy if file exists here

 	limit: 320, // how many posts to pull in one go
 	concurrency: 4, // how many file requests to keep in flight at the same time
@ -139,6 +140,11 @@ let parse = async () => {
 				console.log(`Skipping existing file: ${booru.urls.posts}${post.id}`)
 				continue;
 			}
+			if ( FS.existsSync(`${config.images}${post.filename}`) ) {
+				console.log(`Copying cached file: ${booru.urls.posts}${post.id}`)
+				FS.copyFileSync(`${config.images}${post.filename}`, `${config.output}${post.filename}`)
+				continue;
+			}

 			if ( config.filter ) {
 				let filtered = null;
@ -173,6 +179,11 @@ let parse = async () => {
 					res.body.pipe(dest);
 					dest.on('close', () => {
 						console.log(`Downloaded: ${booru.urls.posts}${post.id}`)
+
+						if ( FS.existsSync(`${config.images}`) ) {
+							FS.copyFileSync(`${config.output}${post.filename}`, `${config.images}${post.filename}`)
+						}
+
 						resolve()
 					});
 					dest.on('error', reject);
--- a/src/fetch.py
+++ b/src/fetch.py
@ -50,6 +50,7 @@ config = {

 	'output': './images/downloaded/', # directory to save your files
 	'cache': './data/cache.json', # JSON file of cached tags, will speed up processing when used for the renamer script
+	'images': './images/cache/', # total cache of images, will copy if file exists here

 	'limit': 320, # how many posts to pull in one go

@ -127,6 +128,11 @@ def parse():
 			if os.path.exists(f"{config['output']}{post['filename']}"):
 				print(f"Skipping existing file: {booru['urls']['posts']}{post['id']}")
 				continue
+			
+			if os.path.exists(f"{config['images']}{post['filename']}"):
+				print(f"Copying cached file: {booru['urls']['posts']}{post['id']}")
+				shutil.copy(os.path.join(config['images'], post['filename']), os.path.join(config['output'], post['filename']))
+				continue

 			if post['url'] is None:
 				print(f"Skipping file that requires logging in: {booru['urls']['posts']}{post['id']}")
@ -151,6 +157,9 @@ def parse():

 			if not config['skipSave']:
 				urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
+				if os.path.exists(f"{config['images']}"):
+					shutil.copy(os.path.join(config['output'], post['filename']), os.path.join(config['images'], post['filename']))
+
 				print(f"Downloaded : {booru['urls']['posts']}{post['id']}")

 		if config['rateLimit']: