diff --git a/README.md b/README.md index 2159293..ec993fa 100644 --- a/README.md +++ b/README.md @@ -271,6 +271,7 @@ credit a few of the amazing folks in the community that have helped make this ha - [Ramesh et al](https://arxiv.org/pdf/2102.12092.pdf) who authored the DALLE paper, which is the inspiration behind Tortoise. - [Nichol and Dhariwal](https://arxiv.org/pdf/2102.09672.pdf) who authored the (revision of) the code that drives the diffusion model. - [Jang et al](https://arxiv.org/pdf/2106.07889.pdf) who developed and open-sourced univnet, the vocoder this repo uses. +- [Kim and Jung](https://github.com/mindslab-ai/univnet) who implemented univnet pytorch model. - [lucidrains](https://github.com/lucidrains) who writes awesome open source pytorch models, many of which are used here. - [Patrick von Platen](https://huggingface.co/patrickvonplaten) whose guides on setting up wav2vec were invaluable to building my dataset. diff --git a/tortoise/models/vocoder.py b/tortoise/models/vocoder.py index 346f381..8b60dbd 100644 --- a/tortoise/models/vocoder.py +++ b/tortoise/models/vocoder.py @@ -223,7 +223,11 @@ class LVCBlock(torch.nn.Module): class UnivNetGenerator(nn.Module): - """UnivNet Generator""" + """ + UnivNet Generator + + Originally from https://github.com/mindslab-ai/univnet/blob/master/model/generator.py. + """ def __init__(self, noise_dim=64, channel_size=32, dilations=[1,3,9,27], strides=[8,8,4], lReLU_slope=.2, kpnet_conv_size=3, # Below are MEL configurations options that this generator requires.