small flag added to skip saving in the fetch scripts, useful to grab tags before running the preprocess script on files already downloaded
This commit is contained in:
parent
f000aab55f
commit
caa1a1707b
12
src/fetch.js
12
src/fetch.js
|
@ -53,6 +53,8 @@ let config = {
|
||||||
filters: [ // consult the preprocess.js script for examples
|
filters: [ // consult the preprocess.js script for examples
|
||||||
"animated", // training only supports static images
|
"animated", // training only supports static images
|
||||||
],
|
],
|
||||||
|
|
||||||
|
skipSave: false, // useful if you want to just cache your tags before running pre-process on files you already downloaded
|
||||||
}
|
}
|
||||||
|
|
||||||
let booru = boorus[config.booru];
|
let booru = boorus[config.booru];
|
||||||
|
@ -112,22 +114,21 @@ let parse = async () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( config.filter ) {
|
if ( config.filter ) {
|
||||||
let filtered = false;
|
let filtered = null;
|
||||||
|
|
||||||
// nasty nested loops, dying for a go-to
|
// nasty nested loops, dying for a go-to
|
||||||
for ( let j in post.tags ) {
|
for ( let j in post.tags ) {
|
||||||
let tag = post.tags[j];
|
tag = post.tags[j];
|
||||||
for ( let k in config.filters ) {
|
for ( let k in config.filters ) {
|
||||||
let filter = config.filters[k];
|
let filter = config.filters[k];
|
||||||
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
|
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
|
||||||
filtered = true;
|
filtered = tag;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( filtered ) break;
|
if ( filtered ) break;
|
||||||
}
|
}
|
||||||
if ( filtered ) {
|
if ( filtered ) {
|
||||||
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, tag)
|
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, filtered)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -139,6 +140,7 @@ let parse = async () => {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if ( !config.skipSave)
|
||||||
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
|
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
|
||||||
const dest = FS.createWriteStream(`${config.output}${post.filename}`);
|
const dest = FS.createWriteStream(`${config.output}${post.filename}`);
|
||||||
res.body.pipe(dest);
|
res.body.pipe(dest);
|
||||||
|
|
13
src/fetch.py
13
src/fetch.py
|
@ -58,6 +58,8 @@ config = {
|
||||||
"animated", # training only supports static images
|
"animated", # training only supports static images
|
||||||
],
|
],
|
||||||
'filtersRegex': [],
|
'filtersRegex': [],
|
||||||
|
|
||||||
|
'skipSave': False, # useful if you want to just cache your tags before running pre-process on files you already downloaded
|
||||||
}
|
}
|
||||||
|
|
||||||
booru = boorus[config['booru']]
|
booru = boorus[config['booru']]
|
||||||
|
@ -118,22 +120,23 @@ def parse():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if config["filter"]:
|
if config["filter"]:
|
||||||
filtered = False
|
filtered = None
|
||||||
for tag in post['tags']:
|
for tag in post['tags']:
|
||||||
if tag in config['filters']:
|
if tag in config['filters']:
|
||||||
filtered = True
|
filtered = tag
|
||||||
break
|
break
|
||||||
for filter in config['filtersRegex']:
|
for filter in config['filtersRegex']:
|
||||||
if re.search(filter, tag):
|
if re.search(filter, tag):
|
||||||
filtered = True
|
filtered = tag
|
||||||
break
|
break
|
||||||
if filtered:
|
if filtered is not None:
|
||||||
break
|
break
|
||||||
if filtered:
|
if filtered is not None:
|
||||||
print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}")
|
print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
|
if not config['skipSave']:
|
||||||
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
|
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
|
||||||
print(f"Downloaded : {booru['urls']['posts']}{post['id']}")
|
print(f"Downloaded : {booru['urls']['posts']}{post['id']}")
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user