From caa1a1707bbcdc7a64e320f3723f687ecabd4e28 Mon Sep 17 00:00:00 2001 From: mrq Date: Wed, 12 Oct 2022 20:05:54 +0000 Subject: [PATCH] small flag added to skip saving in the fetch scripts, useful to grab tags before running the preprocess script on files already downloaded --- src/fetch.js | 34 ++++++++++++++++++---------------- src/fetch.py | 17 ++++++++++------- 2 files changed, 28 insertions(+), 23 deletions(-) diff --git a/src/fetch.js b/src/fetch.js index 1f3450b..75a929f 100755 --- a/src/fetch.js +++ b/src/fetch.js @@ -53,6 +53,8 @@ let config = { filters: [ // consult the preprocess.js script for examples "animated", // training only supports static images ], + + skipSave: false, // useful if you want to just cache your tags before running pre-process on files you already downloaded } let booru = boorus[config.booru]; @@ -112,22 +114,21 @@ let parse = async () => { } if ( config.filter ) { - let filtered = false; - + let filtered = null; // nasty nested loops, dying for a go-to for ( let j in post.tags ) { - let tag = post.tags[j]; + tag = post.tags[j]; for ( let k in config.filters ) { let filter = config.filters[k]; if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) { - filtered = true; + filtered = tag; break; } } if ( filtered ) break; } if ( filtered ) { - console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, tag) + console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, filtered) break; } } @@ -139,17 +140,18 @@ let parse = async () => { - promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => { - const dest = FS.createWriteStream(`${config.output}${post.filename}`); - res.body.pipe(dest); - dest.on('close', () => { - console.log(`Downloaded: ${booru.urls.posts}${post.id}`) - resolve() - }); - dest.on('error', reject); - })).catch((err)=>{ - console.error(`Error while fetching: ${post.id}`, posts[i], err); - })); + if ( !config.skipSave) + promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => { + const dest = FS.createWriteStream(`${config.output}${post.filename}`); + res.body.pipe(dest); + dest.on('close', () => { + console.log(`Downloaded: ${booru.urls.posts}${post.id}`) + resolve() + }); + dest.on('error', reject); + })).catch((err)=>{ + console.error(`Error while fetching: ${post.id}`, posts[i], err); + })); } if ( config.rateLimit ) await new Promise( (resolve) => { diff --git a/src/fetch.py b/src/fetch.py index c6284ab..93738b5 100755 --- a/src/fetch.py +++ b/src/fetch.py @@ -58,6 +58,8 @@ config = { "animated", # training only supports static images ], 'filtersRegex': [], + + 'skipSave': False, # useful if you want to just cache your tags before running pre-process on files you already downloaded } booru = boorus[config['booru']] @@ -118,24 +120,25 @@ def parse(): continue if config["filter"]: - filtered = False + filtered = None for tag in post['tags']: if tag in config['filters']: - filtered = True + filtered = tag break for filter in config['filtersRegex']: if re.search(filter, tag): - filtered = True + filtered = tag break - if filtered: + if filtered is not None: break - if filtered: + if filtered is not None: print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}") continue - urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}") - print(f"Downloaded : {booru['urls']['posts']}{post['id']}") + if not config['skipSave']: + urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}") + print(f"Downloaded : {booru['urls']['posts']}{post['id']}") if config['rateLimit']: time.sleep(config['rateLimit'] / 1000.0)