2023-02-17 00:08:27 +00:00
import os
import argparse
import time
import json
import base64
import re
2023-03-09 00:26:47 +00:00
import inspect
2023-02-17 00:08:27 +00:00
import urllib . request
import torch
import torchaudio
import music_tag
import gradio as gr
import gradio . utils
from datetime import datetime
import tortoise . api
2023-02-17 05:42:55 +00:00
from tortoise . utils . audio import get_voice_dir , get_voices
2023-03-04 20:42:54 +00:00
from tortoise . utils . device import get_device_count
2023-02-17 00:08:27 +00:00
from utils import *
args = setup_args ( )
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS = { }
TRANSCRIBE_SETTINGS = { }
EXEC_SETTINGS = { }
TRAINING_SETTINGS = { }
2023-03-09 18:34:52 +00:00
GENERATE_SETTINGS_ARGS = [ ]
2023-03-09 00:26:47 +00:00
PRESETS = {
' Ultra Fast ' : { ' num_autoregressive_samples ' : 16 , ' diffusion_iterations ' : 30 , ' cond_free ' : False } ,
' Fast ' : { ' num_autoregressive_samples ' : 96 , ' diffusion_iterations ' : 80 } ,
' Standard ' : { ' num_autoregressive_samples ' : 256 , ' diffusion_iterations ' : 200 } ,
' High Quality ' : { ' num_autoregressive_samples ' : 256 , ' diffusion_iterations ' : 400 } ,
}
HISTORY_HEADERS = {
" Name " : " " ,
" Samples " : " num_autoregressive_samples " ,
" Iterations " : " diffusion_iterations " ,
" Temp. " : " temperature " ,
" Sampler " : " diffusion_sampler " ,
" CVVP " : " cvvp_weight " ,
" Top P " : " top_p " ,
" Diff. Temp. " : " diffusion_temperature " ,
" Len Pen " : " length_penalty " ,
" Rep Pen " : " repetition_penalty " ,
" Cond-Free K " : " cond_free_k " ,
" Time " : " time " ,
" Datetime " : " datetime " ,
" Model " : " model " ,
" Model Hash " : " model_hash " ,
}
# can't use *args OR **kwargs if I want to retain the ability to use progress
def generate_proxy (
2023-02-17 03:05:27 +00:00
text ,
delimiter ,
emotion ,
prompt ,
voice ,
mic_audio ,
voice_latents_chunks ,
candidates ,
2023-03-09 00:26:47 +00:00
seed ,
2023-02-17 03:05:27 +00:00
num_autoregressive_samples ,
diffusion_iterations ,
temperature ,
diffusion_sampler ,
breathing_room ,
cvvp_weight ,
top_p ,
diffusion_temperature ,
length_penalty ,
repetition_penalty ,
cond_free_k ,
2023-03-09 00:26:47 +00:00
experimentals ,
2023-02-17 03:05:27 +00:00
progress = gr . Progress ( track_tqdm = True )
) :
2023-03-09 00:26:47 +00:00
kwargs = locals ( )
2023-02-23 13:18:51 +00:00
2023-02-17 03:05:27 +00:00
try :
2023-03-09 00:26:47 +00:00
sample , outputs , stats = generate ( * * kwargs )
2023-02-17 03:05:27 +00:00
except Exception as e :
message = str ( e )
if message == " Kill signal detected " :
2023-02-24 23:13:13 +00:00
unload_tts ( )
2023-02-17 03:05:27 +00:00
2023-03-09 00:26:47 +00:00
raise e
2023-02-17 03:05:27 +00:00
return (
outputs [ 0 ] ,
gr . update ( value = sample , visible = sample is not None ) ,
gr . update ( choices = outputs , value = outputs [ 0 ] , visible = len ( outputs ) > 1 , interactive = True ) ,
gr . update ( value = stats , visible = True ) ,
)
2023-03-09 00:26:47 +00:00
2023-02-17 00:08:27 +00:00
def update_presets ( value ) :
if value in PRESETS :
preset = PRESETS [ value ]
return ( gr . update ( value = preset [ ' num_autoregressive_samples ' ] ) , gr . update ( value = preset [ ' diffusion_iterations ' ] ) )
else :
return ( gr . update ( ) , gr . update ( ) )
2023-02-17 19:06:05 +00:00
def get_training_configs ( ) :
configs = [ ]
for i , file in enumerate ( sorted ( os . listdir ( f " ./training/ " ) ) ) :
if file [ - 5 : ] != " .yaml " or file [ 0 ] == " . " :
continue
configs . append ( f " ./training/ { file } " )
return configs
def update_training_configs ( ) :
2023-02-18 14:51:00 +00:00
return gr . update ( choices = get_training_list ( ) )
2023-02-17 19:06:05 +00:00
def history_view_results ( voice ) :
results = [ ]
files = [ ]
outdir = f " ./results/ { voice } / "
for i , file in enumerate ( sorted ( os . listdir ( outdir ) ) ) :
if file [ - 4 : ] != " .wav " :
continue
metadata , _ = read_generate_settings ( f " { outdir } / { file } " , read_latents = False )
if metadata is None :
continue
values = [ ]
2023-03-09 00:26:47 +00:00
for k in HISTORY_HEADERS :
2023-02-17 19:06:05 +00:00
v = file
if k != " Name " :
2023-03-09 00:26:47 +00:00
v = metadata [ HISTORY_HEADERS [ k ] ] if HISTORY_HEADERS [ k ] in metadata else ' ? '
2023-02-17 19:06:05 +00:00
values . append ( v )
files . append ( file )
results . append ( values )
return (
results ,
gr . Dropdown . update ( choices = sorted ( files ) )
)
2023-03-09 18:34:52 +00:00
def import_generate_settings_proxy ( file = None ) :
global GENERATE_SETTINGS_ARGS
settings = import_generate_settings ( file )
res = [ ]
for k in GENERATE_SETTINGS_ARGS :
res . append ( settings [ k ] if k in settings else None )
2023-03-11 16:32:35 +00:00
2023-03-09 18:34:52 +00:00
return tuple ( res )
2023-03-07 03:55:35 +00:00
def compute_latents_proxy ( voice , voice_latents_chunks , progress = gr . Progress ( track_tqdm = True ) ) :
compute_latents ( voice = voice , voice_latents_chunks = voice_latents_chunks , progress = progress )
return voice
2023-02-18 02:07:22 +00:00
def import_voices_proxy ( files , name , progress = gr . Progress ( track_tqdm = True ) ) :
import_voices ( files , name , progress )
return gr . update ( )
2023-02-17 19:06:05 +00:00
def read_generate_settings_proxy ( file , saveAs = ' .temp ' ) :
j , latents = read_generate_settings ( file )
if latents :
outdir = f ' { get_voice_dir ( ) } / { saveAs } / '
os . makedirs ( outdir , exist_ok = True )
with open ( f ' { outdir } /cond_latents.pth ' , ' wb ' ) as f :
f . write ( latents )
latents = f ' { outdir } /cond_latents.pth '
return (
2023-02-18 02:07:22 +00:00
gr . update ( value = j , visible = j is not None ) ,
2023-02-17 19:06:05 +00:00
gr . update ( value = latents , visible = latents is not None ) ,
2023-03-05 23:55:27 +00:00
None if j is None else j [ ' voice ' ] ,
gr . update ( visible = j is not None ) ,
2023-02-17 19:06:05 +00:00
)
2023-03-11 16:32:35 +00:00
def prepare_dataset_proxy ( voice , language , validation_text_length , validation_audio_length , skip_existings , slice_audio , progress = gr . Progress ( track_tqdm = True ) ) :
2023-03-09 06:20:05 +00:00
messages = [ ]
2023-03-11 17:27:01 +00:00
message = prepare_dataset ( get_voices ( load_latents = False ) [ voice ] , outdir = f " ./training/ { voice } / " , language = language , skip_existings = skip_existings , progress = progress )
2023-03-09 06:20:05 +00:00
messages . append ( message )
2023-03-11 17:27:01 +00:00
if slice_audio :
message = slice_dataset ( voice )
messages . append ( message )
2023-03-11 16:32:35 +00:00
if validation_text_length > 0 or validation_audio_length > 0 :
message = prepare_validation_dataset ( voice , text_length = validation_text_length , audio_length = validation_audio_length )
2023-03-09 06:20:05 +00:00
messages . append ( message )
return " \n " . join ( messages )
2023-02-17 20:43:12 +00:00
2023-03-09 00:26:47 +00:00
def update_args_proxy ( * args ) :
kwargs = { }
keys = list ( EXEC_SETTINGS . keys ( ) )
for i in range ( len ( args ) ) :
k = keys [ i ]
v = args [ i ]
kwargs [ k ] = v
update_args ( * * kwargs )
def optimize_training_settings_proxy ( * args ) :
kwargs = { }
keys = list ( TRAINING_SETTINGS . keys ( ) )
for i in range ( len ( args ) ) :
k = keys [ i ]
v = args [ i ]
kwargs [ k ] = v
settings , messages = optimize_training_settings ( * * kwargs )
output = list ( settings . values ( ) )
return output [ : - 1 ] + [ " \n " . join ( messages ) ]
2023-02-19 20:22:03 +00:00
2023-02-26 01:57:56 +00:00
def import_training_settings_proxy ( voice ) :
2023-02-23 23:22:23 +00:00
messages = [ ]
2023-03-09 00:26:47 +00:00
injson = f ' ./training/ { voice } /train.json '
2023-03-09 02:25:32 +00:00
statedir = f ' ./training/ { voice } /finetune/training_state/ '
2023-03-11 03:28:04 +00:00
output = { }
try :
with open ( injson , ' r ' , encoding = " utf-8 " ) as f :
settings = json . loads ( f . read ( ) )
except :
messages . append ( f " Error import / { voice } /train.json " )
for k in TRAINING_SETTINGS :
output [ k ] = TRAINING_SETTINGS [ k ] . value
2023-03-01 01:17:38 +00:00
2023-03-11 03:28:04 +00:00
output = list ( output . values ( ) )
return output [ : - 1 ] + [ " \n " . join ( messages ) ]
2023-03-01 01:17:38 +00:00
2023-02-28 22:13:21 +00:00
if os . path . isdir ( statedir ) :
resumes = sorted ( [ int ( d [ : - 6 ] ) for d in os . listdir ( statedir ) if d [ - 6 : ] == " .state " ] )
2023-02-23 23:22:23 +00:00
2023-03-09 00:26:47 +00:00
if len ( resumes ) > 0 :
settings [ ' resume_state ' ] = f ' { statedir } / { resumes [ - 1 ] } .state '
messages . append ( f " Found most recent training state: { settings [ ' resume_state ' ] } " )
2023-02-18 14:51:00 +00:00
2023-03-09 02:08:06 +00:00
output = { }
for k in TRAINING_SETTINGS :
if k not in settings :
continue
output [ k ] = settings [ k ]
2023-03-09 18:34:52 +00:00
2023-03-09 02:08:06 +00:00
output = list ( output . values ( ) )
2023-03-11 16:32:35 +00:00
2023-03-09 00:26:47 +00:00
messages . append ( f " Imported training settings: { injson } " )
2023-03-09 02:08:06 +00:00
2023-03-09 00:26:47 +00:00
return output [ : - 1 ] + [ " \n " . join ( messages ) ]
2023-03-07 20:38:31 +00:00
2023-03-09 00:26:47 +00:00
def save_training_settings_proxy ( * args ) :
kwargs = { }
keys = list ( TRAINING_SETTINGS . keys ( ) )
for i in range ( len ( args ) ) :
k = keys [ i ]
v = args [ i ]
kwargs [ k ] = v
2023-03-08 04:47:05 +00:00
2023-03-09 00:26:47 +00:00
settings , messages = save_training_settings ( * * kwargs )
2023-02-19 16:16:44 +00:00
return " \n " . join ( messages )
2023-02-18 14:51:00 +00:00
2023-02-17 19:06:05 +00:00
def update_voices ( ) :
return (
2023-02-21 21:50:05 +00:00
gr . Dropdown . update ( choices = get_voice_list ( append_defaults = True ) ) ,
2023-02-17 19:06:05 +00:00
gr . Dropdown . update ( choices = get_voice_list ( ) ) ,
gr . Dropdown . update ( choices = get_voice_list ( " ./results/ " ) ) ,
)
def history_copy_settings ( voice , file ) :
return import_generate_settings ( f " ./results/ { voice } / { file } " )
2023-02-17 00:08:27 +00:00
def setup_gradio ( ) :
global args
global ui
2023-03-09 18:34:52 +00:00
2023-02-17 00:08:27 +00:00
if not args . share :
def noop ( function , return_value = None ) :
def wrapped ( * args , * * kwargs ) :
return return_value
return wrapped
gradio . utils . version_check = noop ( gradio . utils . version_check )
gradio . utils . initiated_analytics = noop ( gradio . utils . initiated_analytics )
gradio . utils . launch_analytics = noop ( gradio . utils . launch_analytics )
gradio . utils . integration_analytics = noop ( gradio . utils . integration_analytics )
gradio . utils . error_analytics = noop ( gradio . utils . error_analytics )
gradio . utils . log_feature_analytics = noop ( gradio . utils . log_feature_analytics )
#gradio.utils.get_local_ip_address = noop(gradio.utils.get_local_ip_address, 'localhost')
if args . models_from_local_only :
os . environ [ ' TRANSFORMERS_OFFLINE ' ] = ' 1 '
2023-03-01 01:17:38 +00:00
voice_list_with_defaults = get_voice_list ( append_defaults = True )
voice_list = get_voice_list ( )
r esult_voices = get_voice_list ( " ./results/ " )
autoregressive_models = get_autoregressive_models ( )
dataset_list = get_dataset_list ( )
2023-03-09 18:34:52 +00:00
global GENERATE_SETTINGS_ARGS
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS_ARGS = list ( inspect . signature ( generate_proxy ) . parameters . keys ( ) ) [ : - 1 ]
for i in range ( len ( GENERATE_SETTINGS_ARGS ) ) :
arg = GENERATE_SETTINGS_ARGS [ i ]
GENERATE_SETTINGS [ arg ] = None
2023-02-17 00:08:27 +00:00
with gr . Blocks ( ) as ui :
with gr . Tab ( " Generate " ) :
with gr . Row ( ) :
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " text " ] = gr . Textbox ( lines = 4 , label = " Input Prompt " )
2023-02-17 00:08:27 +00:00
with gr . Row ( ) :
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " delimiter " ] = gr . Textbox ( lines = 1 , label = " Line Delimiter " , placeholder = " \\ n " )
2023-02-17 00:08:27 +00:00
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " emotion " ] = gr . Radio ( [ " Happy " , " Sad " , " Angry " , " Disgusted " , " Arrogant " , " Custom " , " None " ] , value = " None " , label = " Emotion " , type = " value " , interactive = True )
GENERATE_SETTINGS [ " prompt " ] = gr . Textbox ( lines = 1 , label = " Custom Emotion " , visible = False )
GENERATE_SETTINGS [ " voice " ] = gr . Dropdown ( choices = voice_list_with_defaults , label = " Voice " , type = " value " , value = voice_list_with_defaults [ 0 ] ) # it'd be very cash money if gradio was able to default to the first value in the list without this shit
GENERATE_SETTINGS [ " mic_audio " ] = gr . Audio ( label = " Microphone Source " , source = " microphone " , type = " filepath " , visible = False )
GENERATE_SETTINGS [ " voice_latents_chunks " ] = gr . Number ( label = " Voice Chunks " , precision = 0 , value = 0 )
2023-02-24 12:58:41 +00:00
with gr . Row ( ) :
refresh_voices = gr . Button ( value = " Refresh Voice List " )
recompute_voice_latents = gr . Button ( value = " (Re)Compute Voice Latents " )
2023-02-22 03:31:46 +00:00
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " voice " ] . change (
2023-02-22 03:31:46 +00:00
fn = update_baseline_for_latents_chunks ,
2023-03-09 00:26:47 +00:00
inputs = GENERATE_SETTINGS [ " voice " ] ,
outputs = GENERATE_SETTINGS [ " voice_latents_chunks " ]
2023-02-22 03:31:46 +00:00
)
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " voice " ] . change (
2023-03-05 23:55:27 +00:00
fn = lambda value : gr . update ( visible = value == " microphone " ) ,
2023-03-09 00:26:47 +00:00
inputs = GENERATE_SETTINGS [ " voice " ] ,
outputs = GENERATE_SETTINGS [ " mic_audio " ] ,
2023-03-05 23:55:27 +00:00
)
2023-02-17 00:08:27 +00:00
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " candidates " ] = gr . Slider ( value = 1 , minimum = 1 , maximum = 6 , step = 1 , label = " Candidates " )
GENERATE_SETTINGS [ " seed " ] = gr . Number ( value = 0 , precision = 0 , label = " Seed " )
2023-02-17 00:08:27 +00:00
2023-02-18 02:07:22 +00:00
preset = gr . Radio ( [ " Ultra Fast " , " Fast " , " Standard " , " High Quality " ] , label = " Preset " , type = " value " )
2023-02-17 00:08:27 +00:00
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " num_autoregressive_samples " ] = gr . Slider ( value = 128 , minimum = 2 , maximum = 512 , step = 1 , label = " Samples " )
GENERATE_SETTINGS [ " diffusion_iterations " ] = gr . Slider ( value = 128 , minimum = 0 , maximum = 512 , step = 1 , label = " Iterations " )
GENERATE_SETTINGS [ " temperature " ] = gr . Slider ( value = 0.2 , minimum = 0 , maximum = 1 , step = 0.1 , label = " Temperature " )
2023-02-17 00:08:27 +00:00
show_experimental_settings = gr . Checkbox ( label = " Show Experimental Settings " )
reset_generation_settings_button = gr . Button ( value = " Reset to Default " )
with gr . Column ( visible = False ) as col :
experimental_column = col
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " experimentals " ] = gr . CheckboxGroup ( [ " Half Precision " , " Conditioning-Free " ] , value = [ " Conditioning-Free " ] , label = " Experimental Flags " )
GENERATE_SETTINGS [ " breathing_room " ] = gr . Slider ( value = 8 , minimum = 1 , maximum = 32 , step = 1 , label = " Pause Size " )
GENERATE_SETTINGS [ " diffusion_sampler " ] = gr . Radio (
2023-03-05 23:55:27 +00:00
[ " P " , " DDIM " ] , # + ["K_Euler_A", "DPM++2M"],
value = " DDIM " , label = " Diffusion Samplers " , type = " value "
)
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ " cvvp_weight " ] = gr . Slider ( value = 0 , minimum = 0 , maximum = 1 , label = " CVVP Weight " )
GENERATE_SETTINGS [ " top_p " ] = gr . Slider ( value = 0.8 , minimum = 0 , maximum = 1 , label = " Top P " )
GENERATE_SETTINGS [ " diffusion_temperature " ] = gr . Slider ( value = 1.0 , minimum = 0 , maximum = 1 , label = " Diffusion Temperature " )
GENERATE_SETTINGS [ " length_penalty " ] = gr . Slider ( value = 1.0 , minimum = 0 , maximum = 8 , label = " Length Penalty " )
GENERATE_SETTINGS [ " repetition_penalty " ] = gr . Slider ( value = 2.0 , minimum = 0 , maximum = 8 , label = " Repetition Penalty " )
GENERATE_SETTINGS [ " cond_free_k " ] = gr . Slider ( value = 2.0 , minimum = 0 , maximum = 4 , label = " Conditioning-Free K " )
2023-02-17 00:08:27 +00:00
with gr . Column ( ) :
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
submit = gr . Button ( value = " Generate " )
stop = gr . Button ( value = " Stop " )
2023-02-17 00:08:27 +00:00
generation_results = gr . Dataframe ( label = " Results " , headers = [ " Seed " , " Time " ] , visible = False )
source_sample = gr . Audio ( label = " Source Sample " , visible = False )
output_audio = gr . Audio ( label = " Output " )
2023-02-21 03:00:45 +00:00
candidates_list = gr . Dropdown ( label = " Candidates " , type = " value " , visible = False , choices = [ " " ] , value = " " )
def change_candidate ( val ) :
if not val :
return
return val
candidates_list . change (
fn = change_candidate ,
inputs = candidates_list ,
outputs = output_audio ,
)
2023-02-17 00:08:27 +00:00
with gr . Tab ( " History " ) :
with gr . Row ( ) :
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
history_info = gr . Dataframe ( label = " Results " , headers = list ( HISTORY_HEADERS . keys ( ) ) )
2023-02-17 00:08:27 +00:00
with gr . Row ( ) :
with gr . Column ( ) :
2023-02-22 03:27:28 +00:00
history_voices = gr . Dropdown ( choices = result_voices , label = " Voice " , type = " value " , value = result_voices [ 0 ] if len ( result_voices ) > 0 else " " )
2023-02-17 00:08:27 +00:00
with gr . Column ( ) :
2023-02-21 03:00:45 +00:00
history_results_list = gr . Dropdown ( label = " Results " , type = " value " , interactive = True , value = " " )
2023-02-17 00:08:27 +00:00
with gr . Column ( ) :
history_audio = gr . Audio ( )
history_copy_settings_button = gr . Button ( value = " Copy Settings " )
with gr . Tab ( " Utilities " ) :
with gr . Row ( ) :
with gr . Column ( ) :
2023-02-18 02:07:22 +00:00
audio_in = gr . Files ( type = " file " , label = " Audio Input " , file_types = [ " audio " ] )
2023-02-17 00:08:27 +00:00
import_voice_name = gr . Textbox ( label = " Voice Name " )
import_voice_button = gr . Button ( value = " Import Voice " )
2023-03-05 23:55:27 +00:00
with gr . Column ( visible = False ) as col :
utilities_metadata_column = col
metadata_out = gr . JSON ( label = " Audio Metadata " )
copy_button = gr . Button ( value = " Copy Settings " )
latents_out = gr . File ( type = " binary " , label = " Voice Latents " )
2023-02-17 03:05:27 +00:00
with gr . Tab ( " Training " ) :
2023-02-17 06:01:14 +00:00
with gr . Tab ( " Prepare Dataset " ) :
2023-02-17 03:05:27 +00:00
with gr . Row ( ) :
2023-02-17 05:42:55 +00:00
with gr . Column ( ) :
2023-03-09 06:20:05 +00:00
DATASET_SETTINGS = { }
DATASET_SETTINGS [ ' voice ' ] = gr . Dropdown ( choices = voice_list , label = " Dataset Source " , type = " value " , value = voice_list [ 0 ] if len ( voice_list ) > 0 else " " )
with gr . Row ( ) :
DATASET_SETTINGS [ ' language ' ] = gr . Textbox ( label = " Language " , value = " en " )
2023-03-11 16:32:35 +00:00
DATASET_SETTINGS [ ' validation_text_length ' ] = gr . Number ( label = " Validation Text Length Threshold " , value = 12 , precision = 0 )
DATASET_SETTINGS [ ' validation_audio_length ' ] = gr . Number ( label = " Validation Audio Length Threshold " , value = 1 )
with gr . Row ( ) :
DATASET_SETTINGS [ ' skip ' ] = gr . Checkbox ( label = " Skip Already Transcribed " , value = False )
DATASET_SETTINGS [ ' slice ' ] = gr . Checkbox ( label = " Slice Segments " , value = False )
2023-03-09 06:20:05 +00:00
with gr . Row ( ) :
transcribe_button = gr . Button ( value = " Transcribe " )
2023-03-11 17:27:01 +00:00
prepare_validation_button = gr . Button ( value = " (Re)Create Validation Dataset " )
slice_dataset_button = gr . Button ( value = " (Re)Slice Audio " )
2023-03-09 06:20:05 +00:00
2023-03-11 16:32:35 +00:00
with gr . Row ( ) :
EXEC_SETTINGS [ ' whisper_backend ' ] = gr . Dropdown ( WHISPER_BACKENDS , label = " Whisper Backends " , value = args . whisper_backend )
EXEC_SETTINGS [ ' whisper_model ' ] = gr . Dropdown ( WHISPER_MODELS , label = " Whisper Model " , value = args . whisper_model )
2023-03-09 06:20:05 +00:00
dataset_settings = list ( DATASET_SETTINGS . values ( ) )
2023-02-18 02:07:22 +00:00
with gr . Column ( ) :
prepare_dataset_output = gr . TextArea ( label = " Console Output " , interactive = False , max_lines = 8 )
2023-02-17 06:01:14 +00:00
with gr . Tab ( " Generate Configuration " ) :
with gr . Row ( ) :
2023-02-17 03:05:27 +00:00
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
TRAINING_SETTINGS [ " epochs " ] = gr . Number ( label = " Epochs " , value = 500 , precision = 0 )
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
2023-03-09 14:17:01 +00:00
TRAINING_SETTINGS [ " learning_rate " ] = gr . Slider ( label = " Learning Rate " , value = 1e-5 , minimum = 0 , maximum = 1e-4 , step = 1e-6 )
TRAINING_SETTINGS [ " text_ce_lr_weight " ] = gr . Slider ( label = " Text_CE LR Ratio " , value = 0.01 , minimum = 0 , maximum = 1 )
2023-03-09 00:26:47 +00:00
2023-03-09 14:17:01 +00:00
with gr . Row ( ) :
lr_schemes = list ( LEARNING_RATE_SCHEMES . keys ( ) )
TRAINING_SETTINGS [ " learning_rate_scheme " ] = gr . Radio ( lr_schemes , label = " Learning Rate Scheme " , value = lr_schemes [ 0 ] , type = " value " )
TRAINING_SETTINGS [ " learning_rate_schedule " ] = gr . Textbox ( label = " Learning Rate Schedule " , placeholder = str ( LEARNING_RATE_SCHEDULE ) , visible = True )
TRAINING_SETTINGS [ " learning_rate_restarts " ] = gr . Number ( label = " Learning Rate Restarts " , value = 4 , precision = 0 , visible = False )
TRAINING_SETTINGS [ " learning_rate_scheme " ] . change (
fn = lambda x : ( gr . update ( visible = x == lr_schemes [ 0 ] ) , gr . update ( visible = x == lr_schemes [ 1 ] ) ) ,
inputs = TRAINING_SETTINGS [ " learning_rate_scheme " ] ,
outputs = [
TRAINING_SETTINGS [ " learning_rate_schedule " ] ,
TRAINING_SETTINGS [ " learning_rate_restarts " ] ,
]
)
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
2023-03-09 00:26:47 +00:00
TRAINING_SETTINGS [ " batch_size " ] = gr . Number ( label = " Batch Size " , value = 128 , precision = 0 )
TRAINING_SETTINGS [ " gradient_accumulation_size " ] = gr . Number ( label = " Gradient Accumulation Size " , value = 4 , precision = 0 )
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
2023-03-09 00:26:47 +00:00
TRAINING_SETTINGS [ " save_rate " ] = gr . Number ( label = " Save Frequency (in epochs) " , value = 5 , precision = 0 )
TRAINING_SETTINGS [ " validation_rate " ] = gr . Number ( label = " Validation Frequency (in epochs) " , value = 5 , precision = 0 )
2023-03-05 05:17:19 +00:00
with gr . Row ( ) :
2023-03-09 00:26:47 +00:00
TRAINING_SETTINGS [ " half_p " ] = gr . Checkbox ( label = " Half Precision " , value = args . training_default_halfp )
TRAINING_SETTINGS [ " bitsandbytes " ] = gr . Checkbox ( label = " BitsAndBytes " , value = args . training_default_bnb )
2023-03-05 05:17:19 +00:00
2023-03-09 02:08:06 +00:00
with gr . Row ( ) :
TRAINING_SETTINGS [ " workers " ] = gr . Number ( label = " Worker Processes " , value = 2 , precision = 0 )
TRAINING_SETTINGS [ " gpus " ] = gr . Number ( label = " GPUs " , value = get_device_count ( ) , precision = 0 )
2023-03-09 02:25:32 +00:00
2023-03-09 00:26:47 +00:00
TRAINING_SETTINGS [ " source_model " ] = gr . Dropdown ( choices = autoregressive_models , label = " Source Model " , type = " value " , value = autoregressive_models [ 0 ] )
2023-03-09 02:27:20 +00:00
TRAINING_SETTINGS [ " resume_state " ] = gr . Textbox ( label = " Resume State Path " , placeholder = " ./training/$ {voice} /finetune/training_state/$ {last_state} .state " )
2023-03-09 00:26:47 +00:00
TRAINING_SETTINGS [ " voice " ] = gr . Dropdown ( choices = dataset_list , label = " Dataset " , type = " value " , value = dataset_list [ 0 ] if len ( dataset_list ) else " " )
2023-02-26 01:57:56 +00:00
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
2023-03-09 00:26:47 +00:00
training_refresh_dataset = gr . Button ( value = " Refresh Dataset List " )
training_import_settings = gr . Button ( value = " Reuse/Import Dataset " )
2023-02-18 02:07:22 +00:00
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
training_configuration_output = gr . TextArea ( label = " Console Output " , interactive = False , max_lines = 8 )
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
2023-03-09 00:26:47 +00:00
training_optimize_configuration = gr . Button ( value = " Validate Training Configuration " )
training_save_configuration = gr . Button ( value = " Save Training Configuration " )
2023-02-18 02:07:22 +00:00
with gr . Tab ( " Run Training " ) :
2023-02-17 16:29:27 +00:00
with gr . Row ( ) :
with gr . Column ( ) :
2023-02-18 14:51:00 +00:00
training_configs = gr . Dropdown ( label = " Training Configuration " , choices = get_training_list ( ) )
2023-03-09 05:54:08 +00:00
refresh_configs = gr . Button ( value = " Refresh Configurations " )
training_output = gr . TextArea ( label = " Console Output " , interactive = False , max_lines = 8 )
verbose_training = gr . Checkbox ( label = " Verbose Console Output " , value = True )
training_keep_x_past_datasets = gr . Slider ( label = " Keep X Previous States " , minimum = 0 , maximum = 8 , value = 0 , step = 1 )
2023-02-23 23:22:23 +00:00
with gr . Row ( ) :
2023-03-09 05:54:08 +00:00
start_training_button = gr . Button ( value = " Train " )
stop_training_button = gr . Button ( value = " Stop " )
reconnect_training_button = gr . Button ( value = " Reconnect " )
2023-03-02 01:35:12 +00:00
2023-03-09 05:54:08 +00:00
with gr . Column ( ) :
2023-03-01 19:32:11 +00:00
training_loss_graph = gr . LinePlot ( label = " Training Metrics " ,
x = " step " ,
y = " value " ,
title = " Training Metrics " ,
2023-02-28 06:18:18 +00:00
color = " type " ,
2023-03-01 19:32:11 +00:00
tooltip = [ ' step ' , ' value ' , ' type ' ] ,
2023-03-09 05:54:08 +00:00
width = 500 ,
height = 350 ,
)
training_lr_graph = gr . LinePlot ( label = " Training Metrics " ,
x = " step " ,
y = " value " ,
title = " Training Metrics " ,
color = " type " ,
tooltip = [ ' step ' , ' value ' , ' type ' ] ,
width = 500 ,
2023-03-02 01:35:12 +00:00
height = 350 ,
2023-02-28 01:01:50 +00:00
)
2023-03-02 01:35:12 +00:00
view_losses = gr . Button ( value = " View Losses " )
2023-02-17 00:08:27 +00:00
with gr . Tab ( " Settings " ) :
with gr . Row ( ) :
exec_inputs = [ ]
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' listen ' ] = gr . Textbox ( label = " Listen " , value = args . listen , placeholder = " 127.0.0.1:7860/ " )
EXEC_SETTINGS [ ' share ' ] = gr . Checkbox ( label = " Public Share Gradio " , value = args . share )
EXEC_SETTINGS [ ' check_for_updates ' ] = gr . Checkbox ( label = " Check For Updates " , value = args . check_for_updates )
EXEC_SETTINGS [ ' models_from_local_only ' ] = gr . Checkbox ( label = " Only Load Models Locally " , value = args . models_from_local_only )
EXEC_SETTINGS [ ' low_vram ' ] = gr . Checkbox ( label = " Low VRAM " , value = args . low_vram )
EXEC_SETTINGS [ ' embed_output_metadata ' ] = gr . Checkbox ( label = " Embed Output Metadata " , value = args . embed_output_metadata )
EXEC_SETTINGS [ ' latents_lean_and_mean ' ] = gr . Checkbox ( label = " Slimmer Computed Latents " , value = args . latents_lean_and_mean )
EXEC_SETTINGS [ ' voice_fixer ' ] = gr . Checkbox ( label = " Use Voice Fixer on Generated Output " , value = args . voice_fixer )
EXEC_SETTINGS [ ' voice_fixer_use_cuda ' ] = gr . Checkbox ( label = " Use CUDA for Voice Fixer " , value = args . voice_fixer_use_cuda )
EXEC_SETTINGS [ ' force_cpu_for_conditioning_latents ' ] = gr . Checkbox ( label = " Force CPU for Conditioning Latents " , value = args . force_cpu_for_conditioning_latents )
EXEC_SETTINGS [ ' defer_tts_load ' ] = gr . Checkbox ( label = " Do Not Load TTS On Startup " , value = args . defer_tts_load )
EXEC_SETTINGS [ ' prune_nonfinal_outputs ' ] = gr . Checkbox ( label = " Delete Non-Final Output " , value = args . prune_nonfinal_outputs )
EXEC_SETTINGS [ ' device_override ' ] = gr . Textbox ( label = " Device Override " , value = args . device_override )
2023-02-17 00:08:27 +00:00
with gr . Column ( ) :
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' sample_batch_size ' ] = gr . Number ( label = " Sample Batch Size " , precision = 0 , value = args . sample_batch_size )
EXEC_SETTINGS [ ' concurrency_count ' ] = gr . Number ( label = " Gradio Concurrency Count " , precision = 0 , value = args . concurrency_count )
EXEC_SETTINGS [ ' autocalculate_voice_chunk_duration_size ' ] = gr . Number ( label = " Auto-Calculate Voice Chunk Duration (in seconds) " , precision = 0 , value = args . autocalculate_voice_chunk_duration_size )
EXEC_SETTINGS [ ' output_volume ' ] = gr . Slider ( label = " Output Volume " , minimum = 0 , maximum = 2 , value = args . output_volume )
2023-02-18 14:10:26 +00:00
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' autoregressive_model ' ] = gr . Dropdown ( choices = autoregressive_models , label = " Autoregressive Model " , value = args . autoregressive_model if args . autoregressive_model else autoregressive_models [ 0 ] )
2023-02-27 19:20:06 +00:00
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' vocoder_model ' ] = gr . Dropdown ( VOCODERS , label = " Vocoder " , value = args . vocoder_model if args . vocoder_model else VOCODERS [ - 1 ] )
2023-03-11 16:32:35 +00:00
2023-02-27 19:20:06 +00:00
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' training_default_halfp ' ] = TRAINING_SETTINGS [ ' half_p ' ]
EXEC_SETTINGS [ ' training_default_bnb ' ] = TRAINING_SETTINGS [ ' bitsandbytes ' ]
2023-02-27 19:20:06 +00:00
2023-02-24 12:58:41 +00:00
with gr . Row ( ) :
autoregressive_models_update_button = gr . Button ( value = " Refresh Model List " )
gr . Button ( value = " Check for Updates " ) . click ( check_for_updates )
gr . Button ( value = " (Re)Load TTS " ) . click (
reload_tts ,
2023-03-09 00:26:47 +00:00
inputs = EXEC_SETTINGS [ ' autoregressive_model ' ] ,
2023-02-24 12:58:41 +00:00
outputs = None
)
2023-03-03 21:13:48 +00:00
# kill_button = gr.Button(value="Close UI")
2023-02-24 12:58:41 +00:00
def update_model_list_proxy ( val ) :
autoregressive_models = get_autoregressive_models ( )
if val not in autoregressive_models :
val = autoregressive_models [ 0 ]
return gr . update ( choices = autoregressive_models , value = val )
autoregressive_models_update_button . click (
update_model_list_proxy ,
2023-03-09 00:26:47 +00:00
inputs = EXEC_SETTINGS [ ' autoregressive_model ' ] ,
outputs = EXEC_SETTINGS [ ' autoregressive_model ' ] ,
2023-02-24 12:58:41 +00:00
)
2023-02-21 03:00:45 +00:00
2023-03-09 00:26:47 +00:00
exec_inputs = list ( EXEC_SETTINGS . values ( ) )
for k in EXEC_SETTINGS :
EXEC_SETTINGS [ k ] . change ( fn = update_args_proxy , inputs = exec_inputs )
2023-02-27 19:20:06 +00:00
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' autoregressive_model ' ] . change (
2023-02-27 19:20:06 +00:00
fn = update_autoregressive_model ,
2023-03-09 00:26:47 +00:00
inputs = EXEC_SETTINGS [ ' autoregressive_model ' ] ,
2023-02-27 19:20:06 +00:00
outputs = None
)
2023-02-17 00:08:27 +00:00
2023-03-09 00:26:47 +00:00
EXEC_SETTINGS [ ' vocoder_model ' ] . change (
2023-03-07 02:45:22 +00:00
fn = update_vocoder_model ,
2023-03-09 00:26:47 +00:00
inputs = EXEC_SETTINGS [ ' vocoder_model ' ] ,
2023-03-07 02:45:22 +00:00
outputs = None
)
2023-02-21 03:00:45 +00:00
history_voices . change (
2023-02-18 02:07:22 +00:00
fn = history_view_results ,
inputs = history_voices ,
outputs = [
history_info ,
history_results_list ,
]
)
2023-02-21 03:00:45 +00:00
history_results_list . change (
2023-02-18 02:07:22 +00:00
fn = lambda voice , file : f " ./results/ { voice } / { file } " ,
inputs = [
history_voices ,
history_results_list ,
] ,
outputs = history_audio
)
audio_in . upload (
fn = read_generate_settings_proxy ,
inputs = audio_in ,
outputs = [
metadata_out ,
latents_out ,
2023-03-05 23:55:27 +00:00
import_voice_name ,
utilities_metadata_column ,
2023-02-18 02:07:22 +00:00
]
)
import_voice_button . click (
fn = import_voices_proxy ,
inputs = [
audio_in ,
import_voice_name ,
] ,
outputs = import_voice_name #console_output
)
show_experimental_settings . change (
fn = lambda x : gr . update ( visible = x ) ,
inputs = show_experimental_settings ,
outputs = experimental_column
)
preset . change ( fn = update_presets ,
inputs = preset ,
outputs = [
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ ' num_autoregressive_samples ' ] ,
GENERATE_SETTINGS [ ' diffusion_iterations ' ] ,
2023-02-18 02:07:22 +00:00
] ,
)
2023-03-07 03:55:35 +00:00
recompute_voice_latents . click ( compute_latents_proxy ,
2023-02-18 02:07:22 +00:00
inputs = [
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ ' voice ' ] ,
GENERATE_SETTINGS [ ' voice_latents_chunks ' ] ,
2023-02-18 02:07:22 +00:00
] ,
2023-03-09 00:26:47 +00:00
outputs = GENERATE_SETTINGS [ ' voice ' ] ,
2023-02-18 02:07:22 +00:00
)
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ ' emotion ' ] . change (
2023-03-05 23:55:27 +00:00
fn = lambda value : gr . update ( visible = value == " Custom " ) ,
2023-03-09 00:26:47 +00:00
inputs = GENERATE_SETTINGS [ ' emotion ' ] ,
outputs = GENERATE_SETTINGS [ ' prompt ' ]
2023-02-18 02:07:22 +00:00
)
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ ' mic_audio ' ] . change ( fn = lambda value : gr . update ( value = " microphone " ) ,
inputs = GENERATE_SETTINGS [ ' mic_audio ' ] ,
outputs = GENERATE_SETTINGS [ ' voice ' ]
2023-02-18 02:07:22 +00:00
)
2023-02-17 00:08:27 +00:00
refresh_voices . click ( update_voices ,
inputs = None ,
outputs = [
2023-03-09 00:26:47 +00:00
GENERATE_SETTINGS [ ' voice ' ] ,
2023-02-18 02:07:22 +00:00
dataset_settings [ 0 ] ,
2023-02-17 00:08:27 +00:00
history_voices
]
)
2023-03-09 00:26:47 +00:00
generate_settings = list ( GENERATE_SETTINGS . values ( ) )
2023-02-17 00:08:27 +00:00
submit . click (
2023-02-21 03:00:45 +00:00
lambda : ( gr . update ( visible = False ) , gr . update ( visible = False ) , gr . update ( visible = False ) ) ,
outputs = [ source_sample , candidates_list , generation_results ] ,
2023-02-17 00:08:27 +00:00
)
2023-03-09 00:26:47 +00:00
submit_event = submit . click ( generate_proxy ,
inputs = generate_settings ,
2023-02-21 03:00:45 +00:00
outputs = [ output_audio , source_sample , candidates_list , generation_results ] ,
2023-03-06 05:21:33 +00:00
api_name = " generate " ,
2023-02-17 00:08:27 +00:00
)
2023-03-09 18:34:52 +00:00
copy_button . click ( import_generate_settings_proxy ,
2023-02-17 00:08:27 +00:00
inputs = audio_in , # JSON elements cannot be used as inputs
2023-03-09 00:26:47 +00:00
outputs = generate_settings
2023-02-17 00:08:27 +00:00
)
reset_generation_settings_button . click (
2023-03-10 22:35:32 +00:00
fn = reset_generation_settings ,
2023-02-17 00:08:27 +00:00
inputs = None ,
2023-03-09 00:26:47 +00:00
outputs = generate_settings
2023-02-17 00:08:27 +00:00
)
history_copy_settings_button . click ( history_copy_settings ,
inputs = [
history_voices ,
history_results_list ,
] ,
2023-03-09 00:26:47 +00:00
outputs = generate_settings
2023-02-17 00:08:27 +00:00
)
2023-02-18 14:51:00 +00:00
refresh_configs . click (
lambda : gr . update ( choices = get_training_list ( ) ) ,
inputs = None ,
outputs = training_configs
)
2023-02-18 02:07:22 +00:00
start_training_button . click ( run_training ,
2023-02-19 05:05:30 +00:00
inputs = [
training_configs ,
verbose_training ,
2023-03-07 20:38:31 +00:00
training_keep_x_past_datasets ,
2023-02-19 05:05:30 +00:00
] ,
2023-02-28 01:01:50 +00:00
outputs = [
training_output ,
] ,
)
training_output . change (
fn = update_training_dataplot ,
inputs = None ,
outputs = [
training_loss_graph ,
2023-03-09 05:54:08 +00:00
training_lr_graph ,
2023-02-28 01:01:50 +00:00
] ,
show_progress = False ,
2023-02-18 02:07:22 +00:00
)
2023-03-02 01:35:12 +00:00
view_losses . click (
fn = update_training_dataplot ,
inputs = [
training_configs
] ,
outputs = [
training_loss_graph ,
2023-03-09 05:54:08 +00:00
training_lr_graph ,
2023-03-02 01:35:12 +00:00
] ,
)
2023-02-18 02:07:22 +00:00
stop_training_button . click ( stop_training ,
inputs = None ,
outputs = training_output #console_output
)
2023-02-23 06:24:54 +00:00
reconnect_training_button . click ( reconnect_training ,
inputs = [
verbose_training ,
] ,
outputs = training_output #console_output
)
2023-03-08 02:58:00 +00:00
transcribe_button . click (
2023-02-18 02:07:22 +00:00
prepare_dataset_proxy ,
inputs = dataset_settings ,
outputs = prepare_dataset_output #console_output
)
2023-03-08 02:58:00 +00:00
prepare_validation_button . click (
prepare_validation_dataset ,
inputs = [
dataset_settings [ 0 ] ,
2023-03-11 16:32:35 +00:00
DATASET_SETTINGS [ ' validation_text_length ' ] ,
DATASET_SETTINGS [ ' validation_audio_length ' ] ,
2023-03-08 02:58:00 +00:00
] ,
outputs = prepare_dataset_output #console_output
)
2023-03-11 17:27:01 +00:00
slice_dataset_button . click (
slice_dataset ,
inputs = [
dataset_settings [ 0 ]
] ,
outputs = prepare_dataset_output
)
2023-03-09 00:26:47 +00:00
training_refresh_dataset . click (
2023-02-18 14:51:00 +00:00
lambda : gr . update ( choices = get_dataset_list ( ) ) ,
inputs = None ,
2023-03-09 00:26:47 +00:00
outputs = TRAINING_SETTINGS [ " voice " ] ,
2023-02-18 14:51:00 +00:00
)
2023-03-09 00:26:47 +00:00
training_settings = list ( TRAINING_SETTINGS . values ( ) )
training_optimize_configuration . click ( optimize_training_settings_proxy ,
2023-02-19 20:22:03 +00:00
inputs = training_settings ,
2023-03-09 00:26:47 +00:00
outputs = training_settings [ : - 1 ] + [ training_configuration_output ] #console_output
2023-02-19 20:22:03 +00:00
)
2023-03-09 00:26:47 +00:00
training_import_settings . click ( import_training_settings_proxy ,
inputs = TRAINING_SETTINGS [ ' voice ' ] ,
outputs = training_settings [ : - 1 ] + [ training_configuration_output ] #console_output
2023-02-23 23:22:23 +00:00
)
2023-03-09 00:26:47 +00:00
training_save_configuration . click ( save_training_settings_proxy ,
2023-02-18 02:07:22 +00:00
inputs = training_settings ,
2023-03-09 00:26:47 +00:00
outputs = training_configuration_output #console_output
2023-03-03 21:13:48 +00:00
)
2023-02-17 00:08:27 +00:00
if os . path . isfile ( ' ./config/generate.json ' ) :
2023-03-09 18:34:52 +00:00
ui . load ( import_generate_settings_proxy , inputs = None , outputs = generate_settings )
2023-02-17 00:08:27 +00:00
if args . check_for_updates :
ui . load ( check_for_updates )
2023-02-24 23:13:13 +00:00
stop . click ( fn = cancel_generate , inputs = None , outputs = None )
2023-02-17 00:08:27 +00:00
ui . queue ( concurrency_count = args . concurrency_count )
webui = ui
return webui