small flag added to skip saving in the fetch scripts, useful to grab tags before running the preprocess script on files already downloaded

This commit is contained in:
mrq 2022-10-12 20:05:54 +00:00
parent f000aab55f
commit caa1a1707b
2 changed files with 28 additions and 23 deletions

View File

@ -53,6 +53,8 @@ let config = {
filters: [ // consult the preprocess.js script for examples filters: [ // consult the preprocess.js script for examples
"animated", // training only supports static images "animated", // training only supports static images
], ],
skipSave: false, // useful if you want to just cache your tags before running pre-process on files you already downloaded
} }
let booru = boorus[config.booru]; let booru = boorus[config.booru];
@ -112,22 +114,21 @@ let parse = async () => {
} }
if ( config.filter ) { if ( config.filter ) {
let filtered = false; let filtered = null;
// nasty nested loops, dying for a go-to // nasty nested loops, dying for a go-to
for ( let j in post.tags ) { for ( let j in post.tags ) {
let tag = post.tags[j]; tag = post.tags[j];
for ( let k in config.filters ) { for ( let k in config.filters ) {
let filter = config.filters[k]; let filter = config.filters[k];
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) { if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
filtered = true; filtered = tag;
break; break;
} }
} }
if ( filtered ) break; if ( filtered ) break;
} }
if ( filtered ) { if ( filtered ) {
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, tag) console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, filtered)
break; break;
} }
} }
@ -139,6 +140,7 @@ let parse = async () => {
if ( !config.skipSave)
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => { promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
const dest = FS.createWriteStream(`${config.output}${post.filename}`); const dest = FS.createWriteStream(`${config.output}${post.filename}`);
res.body.pipe(dest); res.body.pipe(dest);

View File

@ -58,6 +58,8 @@ config = {
"animated", # training only supports static images "animated", # training only supports static images
], ],
'filtersRegex': [], 'filtersRegex': [],
'skipSave': False, # useful if you want to just cache your tags before running pre-process on files you already downloaded
} }
booru = boorus[config['booru']] booru = boorus[config['booru']]
@ -118,22 +120,23 @@ def parse():
continue continue
if config["filter"]: if config["filter"]:
filtered = False filtered = None
for tag in post['tags']: for tag in post['tags']:
if tag in config['filters']: if tag in config['filters']:
filtered = True filtered = tag
break break
for filter in config['filtersRegex']: for filter in config['filtersRegex']:
if re.search(filter, tag): if re.search(filter, tag):
filtered = True filtered = tag
break break
if filtered: if filtered is not None:
break break
if filtered: if filtered is not None:
print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}") print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}")
continue continue
if not config['skipSave']:
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}") urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
print(f"Downloaded : {booru['urls']['posts']}{post['id']}") print(f"Downloaded : {booru['urls']['posts']}{post['id']}")