deduce if preprocessing text by checking the JSON itself instead

This commit is contained in:
mrq 2023-03-16 14:41:04 +00:00
parent e201746eeb
commit af78e3978a

View File

@ -1,5 +1,6 @@
import os import os
import re import re
import json
import inflect import inflect
import torch import torch
@ -172,7 +173,9 @@ DEFAULT_VOCAB_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), '
class VoiceBpeTokenizer: class VoiceBpeTokenizer:
def __init__(self, vocab_file=DEFAULT_VOCAB_FILE, preprocess=None): def __init__(self, vocab_file=DEFAULT_VOCAB_FILE, preprocess=None):
if preprocess is None: if preprocess is None:
self.preprocess = vocab_file[-8:] != "ipa.json" with open(vocab_file, 'r', encoding='utf-8') as f:
vocab = json.load(f)
self.preprocess = 'pre_tokenizer' in vocab and vocab['pre_tokenizer']
else: else:
self.preprocess = preprocess self.preprocess = preprocess
if vocab_file is not None: if vocab_file is not None: