small flag added to skip saving in the fetch scripts, useful to grab tags before running the preprocess script on files already downloaded
This commit is contained in:
parent
f000aab55f
commit
caa1a1707b
34
src/fetch.js
34
src/fetch.js
|
@ -53,6 +53,8 @@ let config = {
|
||||||
filters: [ // consult the preprocess.js script for examples
|
filters: [ // consult the preprocess.js script for examples
|
||||||
"animated", // training only supports static images
|
"animated", // training only supports static images
|
||||||
],
|
],
|
||||||
|
|
||||||
|
skipSave: false, // useful if you want to just cache your tags before running pre-process on files you already downloaded
|
||||||
}
|
}
|
||||||
|
|
||||||
let booru = boorus[config.booru];
|
let booru = boorus[config.booru];
|
||||||
|
@ -112,22 +114,21 @@ let parse = async () => {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( config.filter ) {
|
if ( config.filter ) {
|
||||||
let filtered = false;
|
let filtered = null;
|
||||||
|
|
||||||
// nasty nested loops, dying for a go-to
|
// nasty nested loops, dying for a go-to
|
||||||
for ( let j in post.tags ) {
|
for ( let j in post.tags ) {
|
||||||
let tag = post.tags[j];
|
tag = post.tags[j];
|
||||||
for ( let k in config.filters ) {
|
for ( let k in config.filters ) {
|
||||||
let filter = config.filters[k];
|
let filter = config.filters[k];
|
||||||
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
|
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
|
||||||
filtered = true;
|
filtered = tag;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( filtered ) break;
|
if ( filtered ) break;
|
||||||
}
|
}
|
||||||
if ( filtered ) {
|
if ( filtered ) {
|
||||||
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, tag)
|
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, filtered)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -139,17 +140,18 @@ let parse = async () => {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
|
if ( !config.skipSave)
|
||||||
const dest = FS.createWriteStream(`${config.output}${post.filename}`);
|
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
|
||||||
res.body.pipe(dest);
|
const dest = FS.createWriteStream(`${config.output}${post.filename}`);
|
||||||
dest.on('close', () => {
|
res.body.pipe(dest);
|
||||||
console.log(`Downloaded: ${booru.urls.posts}${post.id}`)
|
dest.on('close', () => {
|
||||||
resolve()
|
console.log(`Downloaded: ${booru.urls.posts}${post.id}`)
|
||||||
});
|
resolve()
|
||||||
dest.on('error', reject);
|
});
|
||||||
})).catch((err)=>{
|
dest.on('error', reject);
|
||||||
console.error(`Error while fetching: ${post.id}`, posts[i], err);
|
})).catch((err)=>{
|
||||||
}));
|
console.error(`Error while fetching: ${post.id}`, posts[i], err);
|
||||||
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( config.rateLimit ) await new Promise( (resolve) => {
|
if ( config.rateLimit ) await new Promise( (resolve) => {
|
||||||
|
|
17
src/fetch.py
17
src/fetch.py
|
@ -58,6 +58,8 @@ config = {
|
||||||
"animated", # training only supports static images
|
"animated", # training only supports static images
|
||||||
],
|
],
|
||||||
'filtersRegex': [],
|
'filtersRegex': [],
|
||||||
|
|
||||||
|
'skipSave': False, # useful if you want to just cache your tags before running pre-process on files you already downloaded
|
||||||
}
|
}
|
||||||
|
|
||||||
booru = boorus[config['booru']]
|
booru = boorus[config['booru']]
|
||||||
|
@ -118,24 +120,25 @@ def parse():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if config["filter"]:
|
if config["filter"]:
|
||||||
filtered = False
|
filtered = None
|
||||||
for tag in post['tags']:
|
for tag in post['tags']:
|
||||||
if tag in config['filters']:
|
if tag in config['filters']:
|
||||||
filtered = True
|
filtered = tag
|
||||||
break
|
break
|
||||||
for filter in config['filtersRegex']:
|
for filter in config['filtersRegex']:
|
||||||
if re.search(filter, tag):
|
if re.search(filter, tag):
|
||||||
filtered = True
|
filtered = tag
|
||||||
break
|
break
|
||||||
if filtered:
|
if filtered is not None:
|
||||||
break
|
break
|
||||||
if filtered:
|
if filtered is not None:
|
||||||
print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}")
|
print(f"Skipping filtered post: {booru['urls']['posts']}{post['id']} {tag}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
||||||
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
|
if not config['skipSave']:
|
||||||
print(f"Downloaded : {booru['urls']['posts']}{post['id']}")
|
urllib.request.urlretrieve(post['url'], f"{config['output']}{post['filename']}")
|
||||||
|
print(f"Downloaded : {booru['urls']['posts']}{post['id']}")
|
||||||
|
|
||||||
if config['rateLimit']:
|
if config['rateLimit']:
|
||||||
time.sleep(config['rateLimit'] / 1000.0)
|
time.sleep(config['rateLimit'] / 1000.0)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user