small flag added to skip saving in the fetch scripts, useful to grab tags before running the preprocess script on files already downloaded

This commit is contained in:
mrq 2022-10-12 20:05:54 +00:00
parent f000aab55f
commit caa1a1707b
2 changed files with 28 additions and 23 deletions

View File

@ -53,6 +53,8 @@ let config = {
filters: [ // consult the preprocess.js script for examples
"animated", // training only supports static images
],
skipSave: false, // useful if you want to just cache your tags before running pre-process on files you already downloaded
}
let booru = boorus[config.booru];
@ -112,22 +114,21 @@ let parse = async () => {
}
if ( config.filter ) {
let filtered = false;
let filtered = null;
// nasty nested loops, dying for a go-to
for ( let j in post.tags ) {
let tag = post.tags[j];
tag = post.tags[j];
for ( let k in config.filters ) {
let filter = config.filters[k];
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
filtered = true;
filtered = tag;
break;
}
}
if ( filtered ) break;
}
if ( filtered ) {
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, tag)
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, filtered)
break;
}
}
@ -139,6 +140,7 @@ let parse = async () => {
if ( !config.skipSave)
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
const dest = FS.createWriteStream(`${config.output}${post.filename}`);
res.body.pipe(dest);

View File

@ -58,6 +58,8 @@ config = {
"animated", # training only supports static images
],
'filtersRegex': [],
'skipSave': False, # useful if you want to just cache your tags before running pre-process on files you already downloaded
}
booru = boorus[config['booru']]
@ -118,22 +120,23 @@ def parse():
continue
if config["filter"]:
filtered = False
filtered = None
for tag in post['tags']:
if tag in config['filters']:
filtered = True
filtered = tag
break
for filter in config['filtersRegex']:
if re.search(filter, tag):
filtered = True
filtered = tag
break
if filtered:
if filtered is not None:
break
if filtered:
if filtered is not None:
print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}")
continue
if not config['skipSave']:
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
print(f"Downloaded : {booru['urls']['posts']}{post['id']}")