177 lines
6.5 KiB
JavaScript
Executable File
177 lines
6.5 KiB
JavaScript
Executable File
let FS = require("fs")
|
|
let Fetch = require("node-fetch")
|
|
|
|
let config = {
|
|
input: `./in/`, // files to process
|
|
output: `./out/`, // files to copy files to
|
|
tags: `./tags.csv`, // csv of tags associated with the yiffy model (replace for other flavor of booru's taglist associated with the model you're training against)
|
|
cache: `./cache.json`, // JSON file of cached tags, will speed up processing if re-running
|
|
|
|
rateLimit: 500, // time to wait between requests, in milliseconds, e621 imposes a rate limit of 2 requests per second
|
|
filenameLimit: 245, // maximum characters to put in the filename, necessary to abide by filesystem limitations, and to "limit" token count for the prompt parser
|
|
|
|
filter: true,
|
|
// fill it with tags of whatever you don't want to make it into the filename
|
|
// for starters, you can also add "anthro", "male", "female", as they're very common tags
|
|
filters: [
|
|
// commented because it'll help hypernetworks
|
|
// "anthro",
|
|
// "fur",
|
|
// "male",
|
|
// "female",
|
|
|
|
"animal genitalia", // redundant tag, usually anything will have the nasty dog dick tag or horse cock tag
|
|
|
|
"genitals", // useless tag when everything will have penis or vagina
|
|
"video games", // you hear about VIDEOGAMES
|
|
/clothing$/, // all the various verbose clothing tags
|
|
/fluids$/, // bodily fluids, genital fluids, etc.
|
|
/ (fe)?male$/, // overweight male, overweight female
|
|
|
|
// /^(fe)?male /, male penetrating, female penetrating, etc.
|
|
],
|
|
|
|
// treat these tags as already being included in the
|
|
// if you're cautious (paranoid), include species you want, but I found I don't really even need to include specis
|
|
// you can also include character names / series names if you're using this for hypernetworks
|
|
// you can also use this to boost a tag already defined to max priority
|
|
tagsOverride: ["character", "species", "copyright"], // useful for hypernetwork training
|
|
tagsOverrideCategories: true, // override categories
|
|
tagsOverrideStart: 1000000, // starting score that your overriden tags will start from, for sorting purposes
|
|
|
|
// tags to always include in the list
|
|
// I HIGHLY suggest including these tags in your training template instead
|
|
tagsAutoInclude: [],
|
|
|
|
removeParentheses: true, // removes shit like `blaidd_(elden_ring)` or `curt_(animal_crossing)` without needing to specify it all in the above
|
|
// good because it messes with a lot of shit
|
|
|
|
onlyIncludeModelArtists: true, // if true, only include the artist's tag if in the model's taglist, if false, add all artists
|
|
// i've noticed some artists that weren't included in the taglist, but is available in LAION's (vanilla SD)
|
|
|
|
reverseTags: false, // inverts sorting, prioritizing tags with little representation in the model
|
|
|
|
tagDelimiter: ",", // what separates each tag in the filename, web UI will accept comma separated filenames, but will insert it without commas
|
|
}
|
|
|
|
let files = FS.readdirSync(config.input);
|
|
let csv = FS.readFileSync(config.tags)
|
|
csv = csv.toString().split("\n")
|
|
config.tags = {}
|
|
for ( let i in csv ) {
|
|
let [k, v] = csv[i].split(",")
|
|
config.tags[k] = parseInt(v);
|
|
}
|
|
|
|
for ( let i in config.tagsOverride ) {
|
|
let override = config.tagsOverride[i].replace(/_/g, " ");
|
|
config.tags[override] = config.tagsOverrideStart--;
|
|
}
|
|
|
|
let cache;
|
|
try {
|
|
cache = JSON.parse( FS.readFileSync(config.cache) )
|
|
} catch ( e ) {
|
|
cache = {};
|
|
}
|
|
|
|
let parse = async () => {
|
|
for ( let i in files ) {
|
|
let file = files[i];
|
|
let md5 = file.match(/^([a-f0-9]{32})/);
|
|
if ( !md5 ) {
|
|
md5 = file.match(/([a-f0-9]{32})\.(jpe?g|png)$/);
|
|
if ( !md5 ) continue;
|
|
}
|
|
md5 = md5[1];
|
|
|
|
console.log(`[${(100.0 * i / files.length).toFixed(3)}%]: ${md5}`);
|
|
|
|
let rateLimit = false;
|
|
if ( !cache[md5] ) {
|
|
rateLimit = true;
|
|
let r = await Fetch( `https://e621.net/posts.json?tags=md5:${md5}`, {
|
|
headers: {
|
|
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
|
|
}
|
|
} );
|
|
let j = JSON.parse(await r.text());
|
|
cache[md5] = j.posts[0];
|
|
}
|
|
let json = cache[md5];
|
|
if ( !json ) continue;
|
|
tags = [...config.tagsAutoInclude];
|
|
|
|
let artist = "";
|
|
let content = "";
|
|
switch ( json.rating ) {
|
|
case "s": content = "safe content"; break;
|
|
case "q": content = "questionable content"; break;
|
|
case "e": content = "explict content"; break;
|
|
}
|
|
|
|
for ( let cat in json.tags ) {
|
|
let override = config.tagsOverride.includes(cat);
|
|
if ( cat === "artist" ) {
|
|
let tag = `by ${json.tags["artist"].join(" and ")}`
|
|
if ( config.onlyIncludeModelArtists && !config.tags[tag] ) continue;
|
|
artist = tag;
|
|
} else for ( let k in json.tags[cat] ) {
|
|
let tag = json.tags[cat][k].replace(/_/g, " ");
|
|
if ( !override ) override = config.tagsOverride.includes(tag)
|
|
|
|
if ( override ) {
|
|
if ( !config.tags[tag] ) {
|
|
let idx = config.tagsOverride.indexOf( cat );
|
|
let scale = idx >= 0 ? Math.pow( 10, config.tagsOverride.length - idx + 1 ) : 1;
|
|
config.tags[tag] = (config.tagsOverrideStart--) * scale;
|
|
}
|
|
} else if ( !config.tags[tag] ) continue;
|
|
if ( tag.indexOf("/") >= 0 ) continue; // illegal filename character
|
|
|
|
if ( config.filter ) {
|
|
let should = false;
|
|
for ( let i in config.filters ) {
|
|
let filter = config.filters[i];
|
|
if ( filter === tag || ( filter instanceof RegExp && tag.match(filter) ) ) {
|
|
should = true;
|
|
break;
|
|
}
|
|
}
|
|
if ( should ) continue;
|
|
}
|
|
|
|
tags.push(tag);
|
|
}
|
|
}
|
|
tags = tags.sort( (a, b) => {
|
|
let polarity = config.reverseTags ? -1 : 1;
|
|
return (config.tags[b] - config.tags[a]) * polarity;
|
|
})
|
|
if ( artist ) tags.unshift(artist);
|
|
if ( content ) tags.unshift(content);
|
|
|
|
let jointmp = "";
|
|
let filtered = [];
|
|
for ( let i in tags ) {
|
|
if ( (jointmp + config.tagDelimiter + tags[i]).length > config.filenameLimit ) break;
|
|
jointmp += config.tagDelimiter + tags[i];
|
|
if ( config.removeParentheses )
|
|
tags[i] = tags[i].replace(/\(.+?\)$/, "").trim()
|
|
filtered.push(tags[i])
|
|
}
|
|
let joined = filtered.join(config.tagDelimiter)
|
|
|
|
// NOOOOOO YOU'RE SUPPOSE TO DO IT ASYNCHRONOUSLY SO IT'S NOT BLOCKING
|
|
FS.copyFileSync(`${config.input}/${file}`, `${config.output}/${file.replace(md5, joined)}`)
|
|
|
|
if ( rateLimit && config.rateLimit ) await new Promise( (resolve) => {
|
|
setTimeout(resolve, config.rateLimit)
|
|
} )
|
|
}
|
|
|
|
// NOOOOOOOO YOU'RE WASTING SPACE BY PRETTIFYING
|
|
FS.writeFileSync(config.cache, JSON.stringify( cache, null, "\t" ))
|
|
}
|
|
|
|
parse(); |