diff --git a/data/demo/index.template.html b/data/demo/index.template.html
index dcb1e64..e5efdff 100644
--- a/data/demo/index.template.html
+++ b/data/demo/index.template.html
@@ -11,6 +11,7 @@
Text |
WER↓ |
+ CER↓ |
SIM-O↑ |
Prompt |
Our VALL-E |
@@ -27,6 +28,7 @@
Text |
WER↓ |
+ CER↓ |
SIM-O↑ |
Prompt |
Our VALL-E |
diff --git a/vall_e/demo.py b/vall_e/demo.py
index e0e0323..e676cf0 100644
--- a/vall_e/demo.py
+++ b/vall_e/demo.py
@@ -155,9 +155,9 @@ def main():
'Below are some samples from my VALL-E implementation: https://git.ecker.tech/mrq/vall-e/.',
'Unlike the original VALL-E demo page, I\'m placing emphasis on the input prompt, as the model adheres to it stronger than others.',
f'Objective metrics are computed by transcribing ({args.transcription_model}) then comparing the word error rate on transcriptions (WER/CER), and computing the cosine similarities on embeddings through a speaker feature extraction model ({args.speaker_similarity_model}) (SIM-O)',
- 'Total WER: ${WER}'
- 'Total CER: ${CER}'
- 'Total SIM-O: ${SIM-O}'
+ 'Total WER: ${WER}
'
+ 'Total CER: ${CER}
'
+ 'Total SIM-O: ${SIM-O}
'
])
# comparison kwargs
diff --git a/vall_e/emb/transcribe.py b/vall_e/emb/transcribe.py
index ef4d56a..774a479 100644
--- a/vall_e/emb/transcribe.py
+++ b/vall_e/emb/transcribe.py
@@ -9,12 +9,14 @@ import argparse
import torch
import torchaudio
+"""
try:
import whisperx
except Exception as e:
whisperx = None
print(f"Error while querying for whisperx: {str(e)}")
pass
+"""
from transformers import pipeline
@@ -193,6 +195,7 @@ def transcribe(
return metadata
# for backwards compat since it also handles some other things for me
+"""
def transcribe_whisperx(
audio,
language = "auto",
@@ -248,6 +251,7 @@ def transcribe_whisperx(
metadata["end"] = end
return metadata
+"""
def transcribe_batch(
input_audio = "voices",
@@ -315,7 +319,7 @@ def transcribe_batch(
if os.path.isdir(inpath):
continue
- metadata[filename] = transcribe_whisperx( inpath, model_name=model_name, diarize=diarize, device=device, dtype=dtype )
+ metadata[filename] = transcribe( inpath, model_name=model_name, diarize=diarize, device=device, dtype=dtype )
open(outpath, 'w', encoding='utf-8').write(json.dumps(metadata))