add BREAK keyword to end current text chunk and start the next

2023-01-15 22:29:53 +03:00 · 2023-01-15 22:29:53 +03:00 · 8e2aeee4a1
commit 8e2aeee4a1
parent 205991df78
2 changed files with 19 additions and 5 deletions
--- a/modules/prompt_parser.py
+++ b/modules/prompt_parser.py
@ -274,6 +274,7 @@ re_attention = re.compile(r"""
 :
 """, re.X)
 re_break = re.compile(r"\s*\bBREAK\b\s*", re.S)
 def parse_prompt_attention(text):
    """
@ -339,7 +340,11 @@ def parse_prompt_attention(text):
        elif text == ']' and len(square_brackets) > 0:
            multiply_range(square_brackets.pop(), square_bracket_multiplier)
        else:
-            res.append([text, 1.0])
+            parts = re.split(re_break, text)
            for i, part in enumerate(parts):
                if i > 0:
                    res.append(["BREAK", -1])
                res.append([part, 1.0])
    for pos in round_brackets:
        multiply_range(pos, round_bracket_multiplier)
--- a/modules/sd_hijack_clip.py
+++ b/modules/sd_hijack_clip.py
@ -96,13 +96,18 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
        token_count = 0
        last_comma = -1
-        def next_chunk():
+        def next_chunk(is_last=False):
-            """puts current chunk into the list of results and produces the next one - empty"""
+            """puts current chunk into the list of results and produces the next one - empty;
            if is_last is true, tokens <end-of-text> tokens at the end won't add to token_count"""
            nonlocal token_count
            nonlocal last_comma
            nonlocal chunk
-            token_count += len(chunk.tokens)
+            if is_last:
                token_count += len(chunk.tokens)
            else:
                token_count += self.chunk_length
            to_add = self.chunk_length - len(chunk.tokens)
            if to_add > 0:
                chunk.tokens += [self.id_end] * to_add
@ -116,6 +121,10 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
            chunk = PromptChunk()
        for tokens, (text, weight) in zip(tokenized, parsed):
            if text == 'BREAK' and weight == -1:
                next_chunk()
                continue
            position = 0
            while position < len(tokens):
                token = tokens[position]
@ -159,7 +168,7 @@ class FrozenCLIPEmbedderWithCustomWordsBase(torch.nn.Module):
                position += embedding_length_in_tokens
        if len(chunk.tokens) > 0 or len(chunks) == 0:
-            next_chunk()
+            next_chunk(is_last=True)
        return chunks, token_count