template cleanup
This commit is contained in:
parent
7e54e897f7
commit
0c69e798f7
|
@ -11,6 +11,7 @@
|
||||||
<tr>
|
<tr>
|
||||||
<th>Text</th>
|
<th>Text</th>
|
||||||
<th>WER↓</th>
|
<th>WER↓</th>
|
||||||
|
<th>CER↓</th>
|
||||||
<th>SIM-O↑</th>
|
<th>SIM-O↑</th>
|
||||||
<th>Prompt</th>
|
<th>Prompt</th>
|
||||||
<th>Our VALL-E</th>
|
<th>Our VALL-E</th>
|
||||||
|
@ -27,6 +28,7 @@
|
||||||
<tr>
|
<tr>
|
||||||
<th>Text</th>
|
<th>Text</th>
|
||||||
<th>WER↓</th>
|
<th>WER↓</th>
|
||||||
|
<th>CER↓</th>
|
||||||
<th>SIM-O↑</th>
|
<th>SIM-O↑</th>
|
||||||
<th>Prompt</th>
|
<th>Prompt</th>
|
||||||
<th>Our VALL-E</th>
|
<th>Our VALL-E</th>
|
||||||
|
|
|
@ -155,9 +155,9 @@ def main():
|
||||||
'Below are some samples from my VALL-E implementation: <a href="https://git.ecker.tech/mrq/vall-e/">https://git.ecker.tech/mrq/vall-e/</a>.',
|
'Below are some samples from my VALL-E implementation: <a href="https://git.ecker.tech/mrq/vall-e/">https://git.ecker.tech/mrq/vall-e/</a>.',
|
||||||
'Unlike the original VALL-E demo page, I\'m placing emphasis on the input prompt, as the model adheres to it stronger than others.',
|
'Unlike the original VALL-E demo page, I\'m placing emphasis on the input prompt, as the model adheres to it stronger than others.',
|
||||||
f'Objective metrics are computed by transcribing ({args.transcription_model}) then comparing the word error rate on transcriptions (WER/CER), and computing the cosine similarities on embeddings through a speaker feature extraction model ({args.speaker_similarity_model}) (SIM-O)',
|
f'Objective metrics are computed by transcribing ({args.transcription_model}) then comparing the word error rate on transcriptions (WER/CER), and computing the cosine similarities on embeddings through a speaker feature extraction model ({args.speaker_similarity_model}) (SIM-O)',
|
||||||
'<b>Total WER:</b> ${WER}'
|
'<b>Total WER:</b> ${WER}<br>'
|
||||||
'<b>Total CER:</b> ${CER}'
|
'<b>Total CER:</b> ${CER}<br>'
|
||||||
'<b>Total SIM-O:</b> ${SIM-O}'
|
'<b>Total SIM-O:</b> ${SIM-O}<br>'
|
||||||
])
|
])
|
||||||
|
|
||||||
# comparison kwargs
|
# comparison kwargs
|
||||||
|
|
|
@ -9,12 +9,14 @@ import argparse
|
||||||
import torch
|
import torch
|
||||||
import torchaudio
|
import torchaudio
|
||||||
|
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
import whisperx
|
import whisperx
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
whisperx = None
|
whisperx = None
|
||||||
print(f"Error while querying for whisperx: {str(e)}")
|
print(f"Error while querying for whisperx: {str(e)}")
|
||||||
pass
|
pass
|
||||||
|
"""
|
||||||
|
|
||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
|
|
||||||
|
@ -193,6 +195,7 @@ def transcribe(
|
||||||
return metadata
|
return metadata
|
||||||
|
|
||||||
# for backwards compat since it also handles some other things for me
|
# for backwards compat since it also handles some other things for me
|
||||||
|
"""
|
||||||
def transcribe_whisperx(
|
def transcribe_whisperx(
|
||||||
audio,
|
audio,
|
||||||
language = "auto",
|
language = "auto",
|
||||||
|
@ -248,6 +251,7 @@ def transcribe_whisperx(
|
||||||
metadata["end"] = end
|
metadata["end"] = end
|
||||||
|
|
||||||
return metadata
|
return metadata
|
||||||
|
"""
|
||||||
|
|
||||||
def transcribe_batch(
|
def transcribe_batch(
|
||||||
input_audio = "voices",
|
input_audio = "voices",
|
||||||
|
@ -315,7 +319,7 @@ def transcribe_batch(
|
||||||
if os.path.isdir(inpath):
|
if os.path.isdir(inpath):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
metadata[filename] = transcribe_whisperx( inpath, model_name=model_name, diarize=diarize, device=device, dtype=dtype )
|
metadata[filename] = transcribe( inpath, model_name=model_name, diarize=diarize, device=device, dtype=dtype )
|
||||||
|
|
||||||
open(outpath, 'w', encoding='utf-8').write(json.dumps(metadata))
|
open(outpath, 'w', encoding='utf-8').write(json.dumps(metadata))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user