158 lines
4.8 KiB
JavaScript
Executable File
158 lines
4.8 KiB
JavaScript
Executable File
let FS = require("fs")
|
|
let Fetch = require("node-fetch")
|
|
|
|
let boorus = {
|
|
"e621": {
|
|
urls: {
|
|
api: "https://e621.net/posts.json", // endpoint to grab tag info from
|
|
posts: "https://e621.net/posts/", // url to show post page, only for console logging
|
|
},
|
|
config: {
|
|
rateLimit: 500, // time to wait between requests, in milliseconds, e621 imposes a rate limit of 2 requests per second
|
|
cookie: null, // put your account cookie here if for whatever reason you need it
|
|
},
|
|
posts: ( json ) => { return json.posts; }, // way to process API output into an array of posts
|
|
post: ( json ) => { // way to process JSON into a format this script uses
|
|
let tags = [];
|
|
for ( let cat in json.tags ) {
|
|
for ( let k in json.tags[cat] ) {
|
|
tags.push(json.tags[cat][k])
|
|
}
|
|
}
|
|
|
|
return {
|
|
id: json.id,
|
|
url: json.file.url,
|
|
md5: json.file.md5,
|
|
filename: `${json.file.md5}.${json.file.ext}`,
|
|
tags
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
let config = {
|
|
booru: "e621", // booru definition to use from the above object, currently only supports e621
|
|
|
|
query: ``, // example query if no argument is passed, kept empty so the script can scream at you for not having it tagged
|
|
|
|
output: `./in/`, // directory to save your files
|
|
cache: `./cache.json`, // JSON file of cached tags, will speed up processing when used for the renamer script
|
|
|
|
limit: 10, // how many posts to pull in one go
|
|
concurrency: 4, // how many file requests to keep in flight at the same time
|
|
|
|
userAgent: 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36',
|
|
|
|
filter: true, // sort of redundant given you can "filter" with the query, but e621 has a query limit, so here you can bypass it
|
|
filters: [ // consult the preprocess.js script for examples
|
|
"animated", // training only supports static images
|
|
],
|
|
}
|
|
|
|
let booru = boorus[config.booru];
|
|
// merge booru and config
|
|
for ( let k in booru.config ) if ( !config[k] ) config[k] = booru.config[k];
|
|
|
|
// load tag cache to merge into later
|
|
let cache;
|
|
try {
|
|
cache = JSON.parse( FS.readFileSync(config.cache) )
|
|
} catch ( e ) {
|
|
cache = {};
|
|
}
|
|
|
|
// grab requested query from arguments
|
|
let args = process.argv;
|
|
args.shift();
|
|
args.shift();
|
|
if ( args.length ) config.query = args.join(" ");
|
|
// require a query, without it you effectively have a script to download the entirety of e621
|
|
if ( !config.query ) {
|
|
console.error("No arguments passed; example: `node fetch.js 'kemono -dog'")
|
|
return;
|
|
}
|
|
// clamp concurrency
|
|
if ( !config.concurrency || config.concurrency < 1 ) config.concurrency = 1;
|
|
// fetch options to use for each request
|
|
let options = {
|
|
headers: {
|
|
'user-agent': config.userAgent,
|
|
'cookie': config.cookie,
|
|
}
|
|
}
|
|
|
|
let parse = async () => {
|
|
let posts = [];
|
|
let last = ''; // last ID used, used for grabbing the next page
|
|
do {
|
|
let query = [`tags=${config.query}`]
|
|
if ( config.limit ) query.push(`limit=${config.limit}`)
|
|
if ( last ) query.push(`page=b${last}`)
|
|
query = encodeURI(query.join("&"));
|
|
|
|
let r = await Fetch( `${booru.urls.api}?${query}`, options );
|
|
posts = booru.posts(JSON.parse(await r.text()));
|
|
|
|
let promises = [];
|
|
for ( let i in posts ) {
|
|
let post = booru.post(posts[i]);
|
|
|
|
last = `${post.id}`
|
|
cache[post.md5] = posts[i];
|
|
|
|
if ( FS.existsSync(`${config.output}${post.filename}`) ) {
|
|
console.log(`Skipping existing file: ${booru.urls.posts}${post.id}`)
|
|
continue;
|
|
}
|
|
|
|
if ( config.filter ) {
|
|
let filtered = false;
|
|
|
|
// nasty nested loops, dying for a go-to
|
|
for ( let j in post.tags ) {
|
|
let tag = post.tags[j];
|
|
for ( let k in config.filters ) {
|
|
let filter = config.filters[k];
|
|
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
|
|
filtered = true;
|
|
break;
|
|
}
|
|
}
|
|
if ( filtered ) break;
|
|
}
|
|
if ( filtered ) {
|
|
console.log(`Skipping filtered post: ${booru.urls.posts}${post.id}`, tag)
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ( promises.length >= config.concurrency ) {
|
|
for ( let i in promises ) await promises[i];
|
|
promises = [];
|
|
}
|
|
|
|
|
|
|
|
promises.push(Fetch(post.url, options).then(res => new Promise((resolve, reject) => {
|
|
const dest = FS.createWriteStream(`${config.output}${post.filename}`);
|
|
res.body.pipe(dest);
|
|
dest.on('close', () => {
|
|
console.log(`Downloaded: ${booru.urls.posts}${post.id}`)
|
|
resolve()
|
|
});
|
|
dest.on('error', reject);
|
|
})).catch((err)=>{
|
|
console.error(`Error while fetching: ${post.id}`, err);
|
|
}));
|
|
}
|
|
|
|
if ( config.rateLimit ) await new Promise( (resolve) => {
|
|
setTimeout(resolve, config.rateLimit)
|
|
} )
|
|
|
|
FS.writeFileSync(config.cache, JSON.stringify( cache, null, "\t" ))
|
|
} while ( posts.length );
|
|
}
|
|
|
|
parse(); |