Add sanitizer for captions in Textual inversion

This commit is contained in:
Raphael Stoeckli 2022-10-05 21:57:18 +02:00 committed by AUTOMATIC1111
parent 0e92c36707
commit 2499fb4e19

View File

@ -1,5 +1,8 @@
from cmath import log
import os import os
from PIL import Image, ImageOps from PIL import Image, ImageOps
import platform
import sys
import tqdm import tqdm
from modules import shared, images from modules import shared, images
@ -25,6 +28,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
def save_pic_with_caption(image, index): def save_pic_with_caption(image, index):
if process_caption: if process_caption:
caption = "-" + shared.interrogator.generate_caption(image) caption = "-" + shared.interrogator.generate_caption(image)
caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png")
else: else:
caption = filename caption = filename
caption = os.path.splitext(caption)[0] caption = os.path.splitext(caption)[0]
@ -75,3 +79,27 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
if process_caption: if process_caption:
shared.interrogator.send_blip_to_ram() shared.interrogator.send_blip_to_ram()
def sanitize_caption(base_path, original_caption, suffix):
operating_system = platform.system().lower()
if (operating_system == "windows"):
invalid_path_characters = "\\/:*?\"<>|"
max_path_length = 259
else:
invalid_path_characters = "/" #linux/macos
max_path_length = 1023
caption = original_caption
for invalid_character in invalid_path_characters:
caption = caption.replace(invalid_character, "")
fixed_path_length = len(base_path) + len(suffix)
if fixed_path_length + len(caption) <= max_path_length:
return caption
caption_tokens = caption.split()
new_caption = ""
for token in caption_tokens:
last_caption = new_caption
new_caption = new_caption + token + " "
if (len(new_caption) + fixed_path_length - 1 > max_path_length):
break
print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr)
return last_caption.strip()