diff --git a/data/demo/index.template.html b/data/demo/index.template.html index dcb1e64..e5efdff 100644 --- a/data/demo/index.template.html +++ b/data/demo/index.template.html @@ -11,6 +11,7 @@ Text WER↓ + CER↓ SIM-O↑ Prompt Our VALL-E @@ -27,6 +28,7 @@ Text WER↓ + CER↓ SIM-O↑ Prompt Our VALL-E diff --git a/vall_e/demo.py b/vall_e/demo.py index e0e0323..e676cf0 100644 --- a/vall_e/demo.py +++ b/vall_e/demo.py @@ -155,9 +155,9 @@ def main(): 'Below are some samples from my VALL-E implementation: https://git.ecker.tech/mrq/vall-e/.', 'Unlike the original VALL-E demo page, I\'m placing emphasis on the input prompt, as the model adheres to it stronger than others.', f'Objective metrics are computed by transcribing ({args.transcription_model}) then comparing the word error rate on transcriptions (WER/CER), and computing the cosine similarities on embeddings through a speaker feature extraction model ({args.speaker_similarity_model}) (SIM-O)', - 'Total WER: ${WER}' - 'Total CER: ${CER}' - 'Total SIM-O: ${SIM-O}' + 'Total WER: ${WER}
' + 'Total CER: ${CER}
' + 'Total SIM-O: ${SIM-O}
' ]) # comparison kwargs diff --git a/vall_e/emb/transcribe.py b/vall_e/emb/transcribe.py index ef4d56a..774a479 100644 --- a/vall_e/emb/transcribe.py +++ b/vall_e/emb/transcribe.py @@ -9,12 +9,14 @@ import argparse import torch import torchaudio +""" try: import whisperx except Exception as e: whisperx = None print(f"Error while querying for whisperx: {str(e)}") pass +""" from transformers import pipeline @@ -193,6 +195,7 @@ def transcribe( return metadata # for backwards compat since it also handles some other things for me +""" def transcribe_whisperx( audio, language = "auto", @@ -248,6 +251,7 @@ def transcribe_whisperx( metadata["end"] = end return metadata +""" def transcribe_batch( input_audio = "voices", @@ -315,7 +319,7 @@ def transcribe_batch( if os.path.isdir(inpath): continue - metadata[filename] = transcribe_whisperx( inpath, model_name=model_name, diarize=diarize, device=device, dtype=dtype ) + metadata[filename] = transcribe( inpath, model_name=model_name, diarize=diarize, device=device, dtype=dtype ) open(outpath, 'w', encoding='utf-8').write(json.dumps(metadata))