forked from mrq/tortoise-tts
commit
aad67d0e78
|
@ -121,7 +121,7 @@ For the those in the ML space: this is created by projecting a random vector ont
|
||||||
|
|
||||||
This repo comes with several pre-packaged voices. Voices prepended with "train_" came from the training set and perform
|
This repo comes with several pre-packaged voices. Voices prepended with "train_" came from the training set and perform
|
||||||
far better than the others. If your goal is high quality speech, I recommend you pick one of them. If you want to see
|
far better than the others. If your goal is high quality speech, I recommend you pick one of them. If you want to see
|
||||||
what Tortoise can do for zero-shot mimicing, take a look at the others.
|
what Tortoise can do for zero-shot mimicking, take a look at the others.
|
||||||
|
|
||||||
### Adding a new voice
|
### Adding a new voice
|
||||||
|
|
||||||
|
|
|
@ -110,7 +110,7 @@ tuning_group.add_argument(
|
||||||
tuning_group.add_argument(
|
tuning_group.add_argument(
|
||||||
'--cvvp-amount', type=float, default=None,
|
'--cvvp-amount', type=float, default=None,
|
||||||
help='How much the CVVP model should influence the output.'
|
help='How much the CVVP model should influence the output.'
|
||||||
'Increasing this can in some cases reduce the likelyhood of multiple speakers.')
|
'Increasing this can in some cases reduce the likelihood of multiple speakers.')
|
||||||
tuning_group.add_argument(
|
tuning_group.add_argument(
|
||||||
'--diffusion-iterations', type=int, default=None,
|
'--diffusion-iterations', type=int, default=None,
|
||||||
help='Number of diffusion steps to perform. More steps means the network has more chances to iteratively'
|
help='Number of diffusion steps to perform. More steps means the network has more chances to iteratively'
|
||||||
|
|
|
@ -20,7 +20,7 @@ if __name__ == '__main__':
|
||||||
parser.add_argument('--seed', type=int, help='Random seed which can be used to reproduce results.', default=None)
|
parser.add_argument('--seed', type=int, help='Random seed which can be used to reproduce results.', default=None)
|
||||||
parser.add_argument('--produce_debug_state', type=bool, help='Whether or not to produce debug_state.pth, which can aid in reproducing problems. Defaults to true.', default=True)
|
parser.add_argument('--produce_debug_state', type=bool, help='Whether or not to produce debug_state.pth, which can aid in reproducing problems. Defaults to true.', default=True)
|
||||||
parser.add_argument('--cvvp_amount', type=float, help='How much the CVVP model should influence the output.'
|
parser.add_argument('--cvvp_amount', type=float, help='How much the CVVP model should influence the output.'
|
||||||
'Increasing this can in some cases reduce the likelyhood of multiple speakers. Defaults to 0 (disabled)', default=.0)
|
'Increasing this can in some cases reduce the likelihood of multiple speakers. Defaults to 0 (disabled)', default=.0)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
os.makedirs(args.output_path, exist_ok=True)
|
os.makedirs(args.output_path, exist_ok=True)
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,7 @@ def normalization(channels):
|
||||||
|
|
||||||
class QKVAttentionLegacy(nn.Module):
|
class QKVAttentionLegacy(nn.Module):
|
||||||
"""
|
"""
|
||||||
A module which performs QKV attention. Matches legacy QKVAttention + input/ouput heads shaping
|
A module which performs QKV attention. Matches legacy QKVAttention + input/output heads shaping
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, n_heads):
|
def __init__(self, n_heads):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user