From a31ff6ba1dc3958afb2243f1069c3b6317b389d1 Mon Sep 17 00:00:00 2001
From: mrq <barry.quiggles@protonmail.com>
Date: Wed, 12 Oct 2022 03:45:16 +0000
Subject: [PATCH] bug fixes, more features, some starting notes on
 hypernetworks

---
 .gitignore                  |  3 +-
 README.md                   | 66 ++++++++++++++++++++++++++++-----
 utils/renamer/README.md     |  2 +
 utils/renamer/fetch.js      |  7 +++-
 utils/renamer/fetch.py      |  8 ++++
 utils/renamer/preprocess.js | 73 ++++++++++++++++++++++++++++---------
 utils/renamer/preprocess.py | 65 ++++++++++++++++++++++++++-------
 7 files changed, 182 insertions(+), 42 deletions(-)

diff --git a/.gitignore b/.gitignore
index e29f1f1..f4132cf 100755
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,11 @@
 # ---> Node
-# data/config/master.*.json
 utils/renamer/in/*.jpg
 utils/renamer/in/*.png
+utils/renamer/in/*.gif
 
 utils/renamer/out/*.png
 utils/renamer/out/*.jpg
+utils/renamer/out/*.gif
 utils/renamer/cache.json
 
 package-lock.json
diff --git a/README.md b/README.md
index a1e4c06..f09584d 100755
--- a/README.md
+++ b/README.md
@@ -24,6 +24,10 @@ Below is a list of terms clarified. I notice I'll use some terms interchangably
 * `style`: an artist's style. Textual Inversion can also incorporate subjects in a style.
 * `source content/material`: the images you're using to train against; pulled from e621 (or another booru)
 * `embedding`: the trained "model" of the subject or style in question. "Model" would be wrong to call the trained output, as Textual Inversion isn't true training
+* `hypernetwork`: a different way to train custom content against a model, almost all of the same prinicples here apply for hypernetworks
+* `epoch`: a term derived from typical neural network training
+	- normally, it's referred to as a full training cycle over your source material
+	- in this context, it's the above times the number of repeats per single image
 
 ## Preface
 
@@ -116,7 +120,7 @@ There's some "bugs" with the script, be it limitations with interfacing with web
 
 The final piece of the puzzle is providing a decent template to train against. Under `./stable-diffusion-webui/textual_inversion_templates/` are text files for these templates. The Web UI provides rudimentary keywords (\[name\] and \[filewords\]) to help provide better crafted prompts used during training. The pre-processing script handles the \[filewords\] requirement, while \[name\] will be where you want the embedding's name to plop in the prompt.
 
-An adequate starting point is simply:
+The ~~adequate~~ ***recommended*** starting point is simply:
 
 ```
 uploaded on e621, [name], [filewords]
@@ -151,6 +155,8 @@ uploaded on e621, by [name], [filewords]
 
 Now that everything is set up, it's time to start training. For systems with adequate enough VRAM, you're free to run the web UI with `--no-half --precision full` (whatever "adequate entails"). You'll take a very slight performance hit, but quality improves barely enough I was able to notice.
 
+Make sure you're using the correct model you want to train against, as training uses the currently selected model.
+
 Run the Web UI, and click the `Textual Inversion` tab.
 
 Create your embedding to train on by providing:
@@ -170,17 +176,43 @@ Click create, and the starting file will be created.
 Afterwards, you can pre-process your source material further by duplicating to flip (will remove the filenames if you preprocessed them already, so beware), or split (presumably will also eat your filenames).
 
 Next:
-* select your embedding to train on in the dropdown
-* if you're adventurous, adjust the learning rate. The default of `0.005` is fine enough, and shouldn't cause learning/loss problems, but if you're erring on the side of caution, you can set it to `0.0005`, but more training will be needed. 
-* pass in the path to the folder of your source material to train against
-* put in the path to the prompt file you created earlier. if you put it in the same folder as the web UI's default prompts, just rename the filename there
-* adjust how long you want the training to be done before terminating. Paperspace seems to let me do ~70000 on an A6000 before shutting down after 6 hours. An 80GB A100 will let me get shy of the full 100000 before auto-shutting down after 6 hours.
-* the last two values are creature comforts and have no real effect on training, values are up to player preference.
+* `embedding` or `hypernetwork`: select your embedding/hypernetwork to train on in the dropdown
+* `learning rate`: if you're adventurous, adjust the learning rate. The default of `0.005` is fine enough, and shouldn't cause learning/loss problems, but if you're erring on the side of caution, you can set it to `0.0005`, but more training will be needed. 
+	- If you're training a hypernetwork, use `0.000005` or `0.0000005` for a learning rate.
+* `dataset directory`: pass in the path to the folder of your source material to train against
+* `log directory`: player preference, the default is sane enough
+* `prompt template file`: put in the path to the prompt file you created earlier. if you put it in the same folder as the web UI's default prompts, just rename the filename there
+* `width` and `height`: I assume this determines the size of the image to generate when requested, I'd leave it to the default 512x512 for now
+* `max steps`: adjust how long you want the training to be done before terminating. Paperspace seems to let me do ~70000 on an A6000 before shutting down after 6 hours. An 80GB A100 will let me get shy of the full 100000 before auto-shutting down after 6 hours.
+* `epoch length`: this value governs the learning rate correction when training based on defining how long an epoch is. for larger training sets, you would want to decrease this.
+* `save an image/copy`: the last two values are creature comforts and have no real effect on training, values are up to player preference.
 
 Afterwards, hit Train, and wait and watch your creation come to life.
 
 If you didn't pre-process your images with flipped copies, I suggest midway through to pause training, then use ImageMagick's `mogrify` to flip your images with `mogrify -flop *` in the directory of your source material. I feel I've gotten nicer quality pictures because of it over an embedding I trained without it (but with a different prompt template).
 
+### For Training a Hypernetwork
+
+Please, please, ***please*** be aware that training a hypernetwork also uses any embeddings from textual inversion. You ***will*** get false results if you use a hypernetowrk trained with a textual inversion embedding. This is very easy to do if you have your hypernetwork named the same as an embedding you have, especially if you're using the `[name]` keyword in your training template.
+
+You're free to use a embedding in your hypernetwork training, but some caveats I've noticed:
+* it is imperative to use a really low learning rate, or you'll fry the hypernetwork and get garbage output after 2200, 4400, or 5200 steps
+* any image generation without your embedding will get terrible output
+* using a hypernetwork + embedding of the same concept doesn't seem to give very much of a difference, although my test was with a embedding I didn't have very great results from anyways
+* if you wish to share your hypernetwork, and you in fact did train it with an embedding, it's important the very same embedding is included
+* embedding files are orders of magnitude larger than an embedding, but doesn't *seem* to grow in size as you train it, unlike an embedding where it's still pretty small, but grows in size as you train it.
+* like embeddings, hypernetworks are still bound to the model you trained against. unlike an embedding, using this on a different model will absolutely not work.
+
+Now that you understand the caveats, training a hypernetwork is (almost) the same as training an embedding through Textual Inversion. The only real difference in training seems to be needing a very much lower learning rate of either `0.000005` or `0.0000005`. As of 2022.10.11, it seems voldy's web UI also can adjust your learning rate based on how many epochs have passed (as a refresher, it's how many times you processed your source material, times whatever value you set in the web UI). I'm not too keen on how to adjust it, but there seems to be commits involving it being added in.
+
+I'm also not too keen whether you need to have a `[name]` token in your training template, as hypernetworks apply more on a model level than a token level.
+
+### Using the Hypernetwork
+
+To be discovered later. As of now, you just have to go into Settings, scroll at the bottom, and select your newly trained hypernetwork in the dropdown.
+
+I can *assume* that you do not need to have any additional keywords if you trained with a template that did not include the `[name]` keyword. I also *feel* like you don't need to even if you did, but I'll come back and edit my findings after I re-train a hypernetwork.
+
 ## Using the Embedding
 
 Using your newly trained embedding is as simple as putting in the name of the file in the prompt. Before, you would need to signal to the prompt parser with `<token>`, but it seems now you do not. I don't know if still using `<>` has any bearings on output, but take note you do not need it anymore.
@@ -201,8 +233,22 @@ Ordering ***really*** matters when it comes to your embedding, and additionally
 
 If you're using an embedding primarily focused on an artstyle, and you're also using an embedding trained on a subject, take great care in your weights on your additional embedding. Too much, even the smallest amount, and you'll destroy your style's embedding in the final output.
 
+Lastly, when you do use your embedding, make sure you're using the same model you trained against. You *can* use embeddings on different models, as you'll definitely get usable results, but don't expect it to give stellar ones.
+
 ## After Words
 
-I've mentioned adding in a drop-in replacement for `dataset.py`, with fancier stuff, like an easier way to grab tags, and to shuffle during training, but so far I don't think it's necessary. It also messes with `git pull`s, as any future updates will need intervention if that file updates.
-The initial need to "fix" it was just to not use commas, but it also updated to accept booru strings.
-I will try later to see if the grandeur of shuffling tags has an effect, but I imagine it's minor at most.
\ No newline at end of file
+Despite being very wordy, I do hope that it's digestable and able to process for even the most inexperience of users. Everything in here is pretty much from my own observations and tests, so I can get (You), anon, closer to generating what you love.
+
+Lastly, the following section serves no bearings on training, but serve as way to put my observations on:
+
+### The Nature of Textual Inversion embeddings
+
+I'm definitely no expert on this, and I could definitely just try and read the source code to confirm whether I'm right or wrong, but keep in mind this is just from my observations on training and using embeddings.
+
+Textual Inversion embeddings serve as mini-"models" to extend a current one. When the prompt is parsed, the keyword taps into the embedding to figure out which tokens to pull from and their associated weights. Training is just figuring out the right tokens necessary to represent the source material. This is evident through:
+* "vectors per token" consumes how many tokens from the prompt
+* subjects that are easy to describe in a prompt (vintage white fur, a certain shape and colored glasses, eye color, fur shagginess, three toes, etc.) give far better results
+* subjects that are nigh impossible to describe in a prompt (four ears, half are shaped one way, the other half another, middle eye, tusks, neckbeard tufts, etc. // brown fur, vintage white muzzle and chest marking) are *very* hard for an embedding to output
+* using an embedding trained on a different model will still give the concepts that it was trained against (using an embedding of a species of animal will generate something somewhat reminiscent of a real live version of that species of animal)
+
+Contrarily, hypernetworks are another variation of extending the model with another mini-"model". They apply to the entire model as whole, rather than tokens, allowing it to target a subsection of the model.
\ No newline at end of file
diff --git a/utils/renamer/README.md b/utils/renamer/README.md
index 86b2b49..8e528ad 100755
--- a/utils/renamer/README.md
+++ b/utils/renamer/README.md
@@ -4,6 +4,8 @@ Included are the utilities provided for ~~scraping~~ acquiring your source conte
 
 If you're targeting another booru, the same principles apply, but you'll need to adjust your repo URL and processing your booru's JSON output. Doing so is left as an exercise to the reader.
 
+Lastly, feature parity between the two scripts may not be up to par, as I'm a sepples programmer, not a Python dev. The initial `preprocess.py` was gratiously written by an anon, and I've cobbled together the `fetch.py` one myself. The node.js version will definitely have more features, as I'm better at node.js
+
 ## Dependencies
 
 The python scripts have no additional dependencies, while node.js scripts requires running `npm install node-fetch@2` (v2.x because I'm old and still using `require` for my includes).
diff --git a/utils/renamer/fetch.js b/utils/renamer/fetch.js
index addcada..11579c1 100755
--- a/utils/renamer/fetch.js
+++ b/utils/renamer/fetch.js
@@ -20,6 +20,11 @@ let boorus = {
 				}
 			}
 
+			// need to log in to get it, reconstruct
+			if ( !json.file.url ) {
+				json.file.url = `https://static1.e621.net/data/${json.file.md5.slice(0,2)}/${json.file.md5.slice(2,4)}/${json.file.md5}.${json.file.ext}`
+			}
+
 			return {
 				id: json.id,
 				url: json.file.url,
@@ -143,7 +148,7 @@ let parse = async () => {
 				});
 				dest.on('error', reject);
 			})).catch((err)=>{
-				console.error(`Error while fetching: ${post.id}`, err);
+				console.error(`Error while fetching: ${post.id}`, posts[i], err);
 			}));
 		}		
 
diff --git a/utils/renamer/fetch.py b/utils/renamer/fetch.py
index fdd90f2..108bde5 100755
--- a/utils/renamer/fetch.py
+++ b/utils/renamer/fetch.py
@@ -13,6 +13,10 @@ def booru_e621_post( json ):
 		for tag in json['tags'][cat]:
 			tags.append(tag)
 
+	# need to log in to get it, reconstruct
+	if json['file']['url'] is None:
+		json['file']['url'] = f"https://static1.e621.net/data/{json['file']['md5'][0:2]}/{json['file']['md5'][2:4]}/{json['file']['md5']}.{json['file']['ext']}"
+	
 	return {
 		'id': json['id'],
 		'url': json['file']['url'],
@@ -109,6 +113,10 @@ def parse():
 				print(f"Skipping existing file: {booru['urls']['posts']}{post['id']}")
 				continue
 
+			if post['url'] is None:
+				print(f"Skipping file that requires logging in: {booru['urls']['posts']}{post['id']}")
+				continue
+
 			if config["filter"]:
 				filtered = False
 				for tag in post['tags']:
diff --git a/utils/renamer/preprocess.js b/utils/renamer/preprocess.js
index 46d26c9..486a229 100755
--- a/utils/renamer/preprocess.js
+++ b/utils/renamer/preprocess.js
@@ -8,26 +8,50 @@ let config = {
 	cache: `./cache.json`, // JSON file of cached tags, will speed up processing if re-running
 
 	rateLimit: 500, // time to wait between requests, in milliseconds, e621 imposes a rate limit of 2 requests per second
-	filenameLimit: 192, // maximum characters to put in the filename, necessary to abide by filesystem limitations, and to "limit" token count for the prompt parser
+	filenameLimit: 245, // maximum characters to put in the filename, necessary to abide by filesystem limitations, and to "limit" token count for the prompt parser
 
 	filter: true,
 	// fill it with tags of whatever you don't want to make it into the filename
 	// for starters, you can also add "anthro", "male", "female", as they're very common tags
 	filters: [
-		"anthro",
-		"fur",
-		"male",
-		"female",
-		"genitals",
-		"video games",
-		"animal genitalia",
-		/clothing$/,
+		// commented because it'll help hypernetworks
+	//	"anthro",
+	//	"fur",
+	//	"male",
+	//	"female",
+
+		"animal genitalia", // redundant tag, usually anything will have the nasty dog dick tag or horse cock tag
+
+		"genitals", // useless tag when everything will have penis or vagina
+		"video games", // you hear about VIDEOGAMES
+		/clothing$/, // all the various verbose clothing tags
+		/fluids$/, // bodily fluids, genital fluids, etc.
+		/ (fe)?male$/, // overweight male, overweight female
+
+	//	/^(fe)?male /, male penetrating, female penetrating, etc.
 	],
 
+	// treat these tags as already being included in the 
+	// if you're cautious (paranoid), include species you want, but I found I don't really even need to include specis
+	// you can also include character names / series names if you're using this for hypernetworks
+	// you can also use this to boost a tag already defined to max priority
+	tagsOverride: ["character", "species", "copyright"], // useful for hypernetwork training
+	tagsOverrideCategories: true, // override categories
+	tagsOverrideStart: 1000000, // starting score that your overriden tags will start from, for sorting purposes
+
+	// tags to always include in the list
+	// I HIGHLY suggest including these tags in your training template instead
+	tagsAutoInclude: [],
+
+	removeParentheses: true, // removes shit like `blaidd_(elden_ring)` or `curt_(animal_crossing)` without needing to specify it all in the above
+	// good because it messes with a lot of shit
+
 	onlyIncludeModelArtists: true, // if true, only include the artist's tag if in the model's taglist, if false, add all artists
 	// i've noticed some artists that weren't included in the taglist, but is available in LAION's (vanilla SD) 
 
 	reverseTags: false, // inverts sorting, prioritizing tags with little representation in the model
+
+	tagDelimiter: ",", // what separates each tag in the filename, web UI will accept comma separated filenames, but will insert it without commas
 }
 
 let files = FS.readdirSync(config.input);
@@ -39,6 +63,11 @@ for ( let i in csv ) {
 	config.tags[k] = parseInt(v);
 }
 
+for ( let i in config.tagsOverride ) {
+	let override = config.tagsOverride[i].replace(/_/g, " ");
+	config.tags[override] = config.tagsOverrideStart--;
+}
+
 let cache;
 try {
 	cache = JSON.parse( FS.readFileSync(config.cache) )
@@ -71,7 +100,7 @@ let parse = async () => {
 		}
 		let json = cache[md5];
 		if ( !json ) continue;
-		tags = [];
+		tags = [...config.tagsAutoInclude];
 
 		let artist = "";
 		let content = "";
@@ -82,15 +111,23 @@ let parse = async () => {
 		}
 
 		for ( let cat in json.tags ) {
+			let override = config.tagsOverride.includes(cat);
 			if ( cat === "artist" ) {
-				let tag = "by " + json.tags["artist"].join(" and ")
+				let tag = `by ${json.tags["artist"].join(" and ")}`
 				if ( config.onlyIncludeModelArtists && !config.tags[tag] ) continue;
 				artist = tag;
 			} else for ( let k in json.tags[cat] ) {
 				let tag = json.tags[cat][k].replace(/_/g, " ");
-				if ( !config.tags[tag] ) continue;
-				if ( tag.indexOf("/") >= 0 ) continue; // illegal filename character
+				if ( !override ) override = config.tagsOverride.includes(tag)
 
+				if ( override ) {
+					if ( !config.tags[tag] ) {
+						let idx = config.tagsOverride.indexOf( cat );
+						let scale = idx >= 0 ? Math.pow( 10, config.tagsOverride.length - idx + 1 ) : 1;
+						config.tags[tag] = (config.tagsOverrideStart--) * scale;
+					}
+				} else if ( !config.tags[tag] ) continue;
+				if ( tag.indexOf("/") >= 0 ) continue; // illegal filename character
 
 				if ( config.filter ) {
 					let should = false;
@@ -117,14 +154,16 @@ let parse = async () => {
 		let jointmp = "";
 		let filtered = [];
 		for ( let i in tags ) {
-			if ( (jointmp + " " + tags[i]).length > config.filenameLimit ) break;
-			jointmp += " " + tags[i];
+			if ( (jointmp + config.tagDelimiter + tags[i]).length > config.filenameLimit ) break;
+			jointmp += config.tagDelimiter + tags[i];
+			if ( config.removeParentheses ) 
+				tags[i] = tags[i].replace(/\(.+?\)$/, "").trim()
 			filtered.push(tags[i])
 		}
-		let joined = filtered.join(" ")
+		let joined = filtered.join(config.tagDelimiter)
 
 		// NOOOOOO YOU'RE SUPPOSE TO DO IT ASYNCHRONOUSLY SO IT'S NOT BLOCKING
-		require("fs").copyFileSync(`${config.input}/${file}`, `${config.output}/${file.replace(md5, joined)}`)
+		FS.copyFileSync(`${config.input}/${file}`, `${config.output}/${file.replace(md5, joined)}`)
 
 		if ( rateLimit && config.rateLimit ) await new Promise( (resolve) => {
 			setTimeout(resolve, config.rateLimit)
diff --git a/utils/renamer/preprocess.py b/utils/renamer/preprocess.py
index e09819a..cf8845e 100755
--- a/utils/renamer/preprocess.py
+++ b/utils/renamer/preprocess.py
@@ -5,6 +5,7 @@ import re
 import json
 import time
 import shutil
+import math
 import urllib.request
 
 config = {
@@ -14,29 +15,48 @@ config = {
 	'cache': './cache.json', # JSON file of cached tags, will speed up processing if re-running
 
 	'rateLimit': 500, # time to wait between requests, in milliseconds, e621 imposes a rate limit of 2 requests per second
-	'filenameLimit': 192, # maximum characters to put in the filename, necessary to abide by filesystem limitations
-	# you can set this to 250, as the web UI has uncapped the prompt limit, but I have yet to test this if this limit was also lifted for textual inversion
+	'filenameLimit': 245, # maximum characters to put in the filename, necessary to abide by filesystem limitations
+	# you can set this to 245, as the web UI has uncapped the prompt limit, but I have yet to test this if this limit was also lifted for textual inversion
 
 	'filter': True,
 	# fill it with tags of whatever you don't want to make it into the filename
 	# for starters, you can also add "anthro", "male", "female", as they're very common tags
 	'filters': [
-		"anthro",
-		"fur",
-		"male",
-		"female",
+	#	"anthro",
+	#	"fur",
+	#	"male",
+	#	"female",
+
+		"animal genitalia",
+
 		"genitals",
 		"video games",
-		"animal genitalia",
 	],
 	'filtersRegex': [
 		r"clothing$",
+		r"fluids$",
+		r" (fe)?male$",
 	],
 
+	# treat these tags as already being included in the 
+	# if you're cautious (paranoid), include species you want, but I found I don't really even need to include specis
+	# you can also include character names / series names if you're using this for hypernetworks
+	'tagsOverride': ["species", "character", "copyright"], # useful for hypernetwork training
+	'tagsOverrideStart': 1000000, # starting score that your overriden tags will start from, for sorting purposes
+
+	# tags to always include in the list
+	# I HIGHLY suggest including these tags in your training template instead
+	'tagsAutoInclude': [],
+
+	'removeParentheses': True, # removes shit like `blaidd_(elden_ring)` or `curt_(animal_crossing)` without needing to specify it all in the above
+	# good because it messes with a lot of shit
+
 	'onlyIncludeModelArtists': True, # if True, only include the artist's tag if in the model's taglist, if false, add all artists
 	# i've noticed some artists that weren't included in the taglist, but is available in LAION's (vanilla SD)
 
 	'reverseTags': False, # inverts sorting, prioritizing tags with little representation in the model
+
+	'tagDelimiter': ",", # what separates each tag in the filename, web UI will accept comma separated filenames, but will insert it without commas
 }
 
 with open(config['tags'], 'rb') as f:
@@ -46,6 +66,11 @@ for i in csv:
 	k, v = i.split(',')
 	config['tags'][k] = int(v)
 
+for i in range(len(config['tagsOverride'])):
+	override = config['tagsOverride'][i].replace("_", " ")
+	config['tags'][override] = config['tagsOverrideStart']
+	config['tagsOverrideStart'] = config['tagsOverrideStart'] - 1
+
 cache = {}
 try:
 	with open(config['cache'], 'rb') as f:
@@ -84,7 +109,7 @@ def parse():
 		json_meta = cache[md5]
 		if not json_meta:
 			continue
-		tags = []
+		tags = config['tagsAutoInclude'].copy()
 
 		artist = ""
 		content = {
@@ -94,6 +119,7 @@ def parse():
 		}.get(json_meta["rating"], "")
 
 		for cat in json_meta["tags"]:
+			override = cat in config['tagsOverride']
 			if cat == "artist":
 				tag = "by " + " and ".join(json_meta["tags"]["artist"])
 				if config['onlyIncludeModelArtists'] and not tag in config['tags']:
@@ -102,12 +128,23 @@ def parse():
 			else:
 				for tag in json_meta["tags"][cat]:
 					tag = tag.replace("_", " ")
-					if tag not in config['tags']:
+					if not override:
+						override = tag in config['tagsOverride']
+
+					if override:
+						if tag not in config['tags']:
+							idx = config['tagsOverride'].index( cat )
+							if idx >= 0:
+								scale = math.pow(10, len(config['tagsOverride']) - idx + 1)
+							else:
+								scale = 1
+							config['tags'][tag] = config['tagsOverrideStart'] * scale
+							config['tagsOverrideStart'] = config['tagsOverrideStart'] - 1
+					elif tag not in config['tags']:
 						continue
 					if "/" in tag or ":" in tag:
 						continue # illegal filename character
 
-
 					if config['filter']:
 						should = False
 						if tag in config['filters']:
@@ -129,11 +166,13 @@ def parse():
 		jointmp = ""
 		filtered = []
 		for i in tags:
-			if len(jointmp + " " + i) > config['filenameLimit']:
+			if len(jointmp + config['tagDelimiter'] + i) > config['filenameLimit']:
 				break
-			jointmp += " " + i
+			jointmp += config['tagDelimiter'] + i
+			if config['removeParentheses']:
+				i = re.sub(r"\(.+?\)$", "", i).strip()
 			filtered.append(i)
-		joined = " ".join(filtered)
+		joined = config['tagDelimiter'].join(filtered)
 
 		shutil.copy(os.path.join(config['input'], file), os.path.join(config['output'], file.replace(md5, joined)))