forked from camenduru/ai-voice-cloning
now whisperx should output json that aligns with what's expected
This commit is contained in:
parent
8877960062
commit
13605f980c
|
@ -1150,6 +1150,7 @@ def whisper_transcribe( file, language=None ):
|
||||||
segments = whisper_model.extract_text_and_timestamps( res )
|
segments = whisper_model.extract_text_and_timestamps( res )
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
|
'text': []
|
||||||
'segments': []
|
'segments': []
|
||||||
}
|
}
|
||||||
for segment in segments:
|
for segment in segments:
|
||||||
|
@ -1157,8 +1158,12 @@ def whisper_transcribe( file, language=None ):
|
||||||
'start': segment[0] / 100.0,
|
'start': segment[0] / 100.0,
|
||||||
'end': segment[1] / 100.0,
|
'end': segment[1] / 100.0,
|
||||||
'text': segment[2],
|
'text': segment[2],
|
||||||
|
'id': len(result['segments'])
|
||||||
}
|
}
|
||||||
|
result['text'].append( segment[2] )
|
||||||
result['segments'].append(reparsed)
|
result['segments'].append(reparsed)
|
||||||
|
|
||||||
|
result['text'] = " ".join(result['text'])
|
||||||
return result
|
return result
|
||||||
|
|
||||||
if args.whisper_backend == "m-bain/whisperx":
|
if args.whisper_backend == "m-bain/whisperx":
|
||||||
|
@ -1194,6 +1199,7 @@ def whisper_transcribe( file, language=None ):
|
||||||
result['segments'] = result_aligned['segments']
|
result['segments'] = result_aligned['segments']
|
||||||
result['text'] = []
|
result['text'] = []
|
||||||
for segment in result['segments']:
|
for segment in result['segments']:
|
||||||
|
segment['id'] = len(result['text'])
|
||||||
result['text'].append(segment['text'].strip())
|
result['text'].append(segment['text'].strip())
|
||||||
result['text'] = " ".join(result['text'])
|
result['text'] = " ".join(result['text'])
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user