added cache folder to pull from if an image exists (to remove needless refetches in the future)

This commit is contained in:
mrq 2023-02-08 19:21:24 +00:00
parent 0d5bbfa465
commit d1d5153874
4 changed files with 21 additions and 0 deletions

View File

@ -1,6 +1,7 @@
{
"booru": "e621",
"output": "./images/downloaded/",
"images": "./images/cache/",
"limit": 320,
"concurrency": 4,
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",

0
images/cache/.gitkeep vendored Executable file
View File

View File

@ -45,6 +45,7 @@ let config = {
output: `./images/downloaded/`, // directory to save your files
cache: `./data/cache.json`, // JSON file of cached tags, will speed up processing when used for the renamer script
images: `./images/cache/`, // total cache of images, will copy if file exists here
limit: 320, // how many posts to pull in one go
concurrency: 4, // how many file requests to keep in flight at the same time
@ -139,6 +140,11 @@ let parse = async () => {
console.log(`Skipping existing file: ${booru.urls.posts}${post.id}`)
continue;
}
if ( FS.existsSync(`${config.images}${post.filename}`) ) {
console.log(`Copying cached file: ${booru.urls.posts}${post.id}`)
FS.copyFileSync(`${config.images}${post.filename}`, `${config.output}${post.filename}`)
continue;
}
if ( config.filter ) {
let filtered = null;
@ -173,6 +179,11 @@ let parse = async () => {
res.body.pipe(dest);
dest.on('close', () => {
console.log(`Downloaded: ${booru.urls.posts}${post.id}`)
if ( FS.existsSync(`${config.images}`) ) {
FS.copyFileSync(`${config.output}${post.filename}`, `${config.images}${post.filename}`)
}
resolve()
});
dest.on('error', reject);

View File

@ -50,6 +50,7 @@ config = {
'output': './images/downloaded/', # directory to save your files
'cache': './data/cache.json', # JSON file of cached tags, will speed up processing when used for the renamer script
'images': './images/cache/', # total cache of images, will copy if file exists here
'limit': 320, # how many posts to pull in one go
@ -127,6 +128,11 @@ def parse():
if os.path.exists(f"{config['output']}{post['filename']}"):
print(f"Skipping existing file: {booru['urls']['posts']}{post['id']}")
continue
if os.path.exists(f"{config['images']}{post['filename']}"):
print(f"Copying cached file: {booru['urls']['posts']}{post['id']}")
shutil.copy(os.path.join(config['images'], post['filename']), os.path.join(config['output'], post['filename']))
continue
if post['url'] is None:
print(f"Skipping file that requires logging in: {booru['urls']['posts']}{post['id']}")
@ -151,6 +157,9 @@ def parse():
if not config['skipSave']:
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
if os.path.exists(f"{config['images']}"):
shutil.copy(os.path.join(config['output'], post['filename']), os.path.join(config['images'], post['filename']))
print(f"Downloaded : {booru['urls']['posts']}{post['id']}")
if config['rateLimit']: