diff --git a/data/config/examples/fetch.json b/data/config/examples/fetch.json index 4603e24..0a865b2 100755 --- a/data/config/examples/fetch.json +++ b/data/config/examples/fetch.json @@ -1,6 +1,7 @@ { "booru": "e621", "output": "./images/downloaded/", + "images": "./images/cache/", "limit": 320, "concurrency": 4, "userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", diff --git a/images/cache/.gitkeep b/images/cache/.gitkeep new file mode 100755 index 0000000..e69de29 diff --git a/src/fetch.js b/src/fetch.js index 13acd57..0b441d9 100755 --- a/src/fetch.js +++ b/src/fetch.js @@ -45,6 +45,7 @@ let config = { output: `./images/downloaded/`, // directory to save your files cache: `./data/cache.json`, // JSON file of cached tags, will speed up processing when used for the renamer script + images: `./images/cache/`, // total cache of images, will copy if file exists here limit: 320, // how many posts to pull in one go concurrency: 4, // how many file requests to keep in flight at the same time @@ -139,6 +140,11 @@ let parse = async () => { console.log(`Skipping existing file: ${booru.urls.posts}${post.id}`) continue; } + if ( FS.existsSync(`${config.images}${post.filename}`) ) { + console.log(`Copying cached file: ${booru.urls.posts}${post.id}`) + FS.copyFileSync(`${config.images}${post.filename}`, `${config.output}${post.filename}`) + continue; + } if ( config.filter ) { let filtered = null; @@ -173,6 +179,11 @@ let parse = async () => { res.body.pipe(dest); dest.on('close', () => { console.log(`Downloaded: ${booru.urls.posts}${post.id}`) + + if ( FS.existsSync(`${config.images}`) ) { + FS.copyFileSync(`${config.output}${post.filename}`, `${config.images}${post.filename}`) + } + resolve() }); dest.on('error', reject); diff --git a/src/fetch.py b/src/fetch.py index b87a5c6..56aaeb0 100755 --- a/src/fetch.py +++ b/src/fetch.py @@ -50,6 +50,7 @@ config = { 'output': './images/downloaded/', # directory to save your files 'cache': './data/cache.json', # JSON file of cached tags, will speed up processing when used for the renamer script + 'images': './images/cache/', # total cache of images, will copy if file exists here 'limit': 320, # how many posts to pull in one go @@ -127,6 +128,11 @@ def parse(): if os.path.exists(f"{config['output']}{post['filename']}"): print(f"Skipping existing file: {booru['urls']['posts']}{post['id']}") continue + + if os.path.exists(f"{config['images']}{post['filename']}"): + print(f"Copying cached file: {booru['urls']['posts']}{post['id']}") + shutil.copy(os.path.join(config['images'], post['filename']), os.path.join(config['output'], post['filename'])) + continue if post['url'] is None: print(f"Skipping file that requires logging in: {booru['urls']['posts']}{post['id']}") @@ -151,6 +157,9 @@ def parse(): if not config['skipSave']: urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}") + if os.path.exists(f"{config['images']}"): + shutil.copy(os.path.join(config['output'], post['filename']), os.path.join(config['images'], post['filename'])) + print(f"Downloaded : {booru['urls']['posts']}{post['id']}") if config['rateLimit']: