From 215800484dbfce434cbe2f1393a69eb9f0d0fc78 Mon Sep 17 00:00:00 2001 From: mrq Date: Sat, 4 May 2024 23:49:15 -0500 Subject: [PATCH] correcting my wrong of assuming I could just use raw 24Khz audio in the 44Khz DAC without too much of an issue (there are issues) --- data/qnt.dac | Bin 4123 -> 5593 bytes vall_e/config.py | 17 ++++++++++++++--- vall_e/emb/qnt.py | 24 +++++++++++++----------- 3 files changed, 27 insertions(+), 14 deletions(-) diff --git a/data/qnt.dac b/data/qnt.dac index f4594c11180ac931430a4365ed000ff2b966f6a1..20ba61a81d017dfae928e97494bb72db6c3cca66 100644 GIT binary patch literal 5593 zcmbtYcYMxw*S^jEx2<~yKaJk7~&JN(W&|M%rO+#=(W zW8+f-A_pWm0%DV6lKO@xB!s89h9x*62gD^NB_u~CxjOOpyIkX<{=Vy!K#X>3ONnsW z7-y~y&fLC%;`iS-MJ1)gJ6xT4LKMIk4V-z$jUD53>EO)g8)zZIYiUl`;NalT-tp_h zOJK7kXa0zBp-#7<&H}z6x%rmJ0nCQeJsQy;Rw@|HWeRn07UnYib_uE{W1K~z|6U@w zmb2*lC5m-$7XN37CeDxk_Yz2QmWUYFqE}JiqOL$Q?I9P<-?VY#C2xsBJAgtXpGQi+C=+Vf@bl)zpa-h8Ymmp zTl!glZH+)FDwE{0Y}XuK$+1(spJR71N#6454BMt{7||u%hd(0 zt~_DP)$rJQNM4y{hcs0N@%4me$pLG`INMr?t(8!Gk6T!y53!RmO-3Gjs5!_h+hnoi zu|>$O=Oon{+jKc@=~$+R(ao0OK3~z0e9_4dGwG)=% zDGswr9@c`9&ybN=t)9}BeG?(unc=a3`QtifN(QRPEi^$j{;bwYaugfnPu(d_9f*T^ z-}cEv%hI0M0WXWjR8+)bJ8pe+p?V?2j!Gfiu#%{*@rcyItjJWG2QQ>qcjh8m+d(Xg z5#-ihwwRgiXESkG_Tv(gZN8l0T3uL)Wc!L)eP7LrM2L%2ksJD0u44%{*%51|3y^`M z+D#qUq2F3N9M>=_U~v{By)0Y4*1{5lXV%9WzypOO9DSuFcAEn?w4TODAxqcqHO&gk zYjd*{S%@CCQ^&{~xo(AZzSd+-U8S}PQqU4v{M5zjB70@Gd_#o$$z8P4y4nMW^@#1l zTQtKP&BS%>g}>AjuVtV;l2J&+WXzK=X6Z-0ix*tso|+cZwHRsZ`Pr2%3f(k_8Ozk` z8ifvgT`tv`r4Z)yx)qhdI+uI>!Rkn5dx_3mp%~ssDchm7^(C6f4e4QRWVSxW5WS}B zrHqZ0YF5TFnT<6XVYN`m{?gg#tAA*H*`qaZ#yk-(5!Rk@_0k|5gu7JJz3h@9_8a@+ zXB);0F1NPq1rLmoAg;7R`r(O8)Thk+A6nchX*I^_k3Te2ylg!7YALNC z+LkEq^(7Yoxxg_xmo7wlV{rSWa|y)H^t7FLV`iq}VJmvX2liNnx`%a7V{be)7a1pmaR9l{(r(KTU8OJO z6pqTr{A-&0WjEvid({)3x?3A*E;(f!H4WQ0iBh{G~T zm$MVIF;`Y%k(R|vcG4uH)x1Ix4l?5B42lkQ~ybR(;Mtks!^L;A6`z;Z-lkT$iBmL~2pSiGdTB@?&V z2(Sjy93xSW{FZ9ovPfFv6O7U8vJ06gBYoI?gQcm2OJ^M@6}1%+zJeJzK%|bBi?Rio zx|CHK&2=isELJ-nFC@SGrc-Q$m9QpO(7d=4hZYr2J*!o;Ga0?MoU;ddKqCaaKSSSI zJtRtRs>CaOCF7X=PjwfjF}K%`jXvSipnFhdAgmWW@Xxb>!#UgPUeq8Z+V9c_Nx?- z2{PF_6RokjQSu}YJ%nNAXR9<_Cy*oV zN_XaXqn@e>lh>?wZZN{?t&*Bi&@Ps>Vwr^7UGjAT!6>uNIF|GEW0^jxAy= zm#HWl?4@Pf4b5-MEKVZWlW)|EY%rM^D{duF(cYoFB+6tmMmq$<&yMMM@uiM0kcsGI z1vOP8$s-MM!W>r1mT4`nHe9ReFfE9wauwfMZ@Gf;U>~6#dn&CtAfL}jRGYs||o zvLDOoD7#NocuNa8Di8I(#M&8DBA&WiVT+JXHq9=R?_KSHTx9iCy!EF1%KwX582R9| zWs(6(YIm*9zqZ+T^a#a~fM{keQ(KEayK184V1~|R<+o71t5A)**gU((+|9*lGE-&7 z<0=1=d^Sl7+CpYvv<6ElRpcR)ZITrlMHQJvJ{(|PvQj^?8oG%cRu~;n&@$2At|3AH zu!pvpYWIVzFtvZ_YIP-MZ(<(1s5rKuG8S2|w9z7Dq{~E2P3prJ%*rd7u6Kxc2X$Zz zM9S+zTZ3)#Sg+bKJQjy_!Bsg+W_zVkcFS@}uuhh2tB6rH60>!?HD_185g$4n58H`@ z*2^a9Dl4EzrM(5&6vi|}e5@;C$^PEb&@$L(Ypf`J#u*~yM|SraI;k`I0&#kqeYH_n z6II*P-=5oWyuvB6wlCE!2axkfWv@&nf?DhEvO`{J8yrIq`j)Qxn#y-sW@vFL`5<(r z2U;QvrI41!N-WTdIE2f=?`iiozgto~T` z(^zYZh1$@%Nf7(Mjj!8?_aTg~fPGBO`W?U6Re4OG;-z1pn)y(rT9YekXc0!cLL1o* zqPLl@#%Q|QAyN}7?49JpK20Mx6+kIXm*M(E()4p_FE8}298*{Q$&SfEq+3<8XCzk% zu$wrhbMPD1P)AZ^x#pvC-X<3I&`AZ7HDmCQY<5BJVX=rt>aQBe9-cvd86tPIH?Kr6XJu`( zq)Au#i(EWIcgtq>^ld3fZ2d%q&mqIb*g~x+<@6UVfk|91lx*Wkr&toD=@Ac-AyVP0 zN7V&=t-U;i1Ano;pR!M$viH8UU!)J+Qvi38CF%G|4~RRSp}s^}d*(dJ_MxHek_5j6P&BpM7ql z=@w4NaqW%_iKTl>LSa17!|e8z^d#P7kXA%RoLnML4Mz*RL}qHt-t8{6xQjewL3ZTeT;g6F0x9jd>X4>nOdioAa$?@kE|D!a1!d1-zMRVpZ2XzwN-Km zak5j}az=5d$2nqkt%ROJN0}?T*?GI9h&|!l@lsMSoKwqUGR+bDM4rlcGG}$f5U2a3 zsu+FuF5AKkFSF0IuqJZv-HFo8+Jzle4Cl43rC5H;r%kn__(~;7z*jbaF3h2+^bMsD zPS$&4(N+#+r3iiePHK28w%S~B(OfHQZLB5p(%M4sP@Bn9P6)s01@3r@?!%WDON4vK zW1K_*{LAj^R;os-6kyK{!B)D?ff8u{rZd<_MSjc)VS|+Bv~UY)T1I-%4^GxxnncF~ zPBI?$GyGXECwVu9s*+Cr8A8@=Y;|QKy+&v1VnfM^zV@|&`jiOlVqGyEXLYtV zLu2}d^>W1uV>o``Ueno)6Ra3rPY;|~BbzO?uas+p+6VZ`}lLxrB zG5XxTVmyIJu$|<0cWcHvpCd{ea1L9|JG@g~=TWP8Ok-E{H_kX)G=V$1L0@s2+)w`Gq_dn3VY*DTcErRo z>qkcM(DssvPwg~SeJF9%iD*jYL>#Q!WEu7LqEw)VE`=m!!-w-!b4L6XxqgBTmk`^{ zt_;E=ES5wva})aQpWp&-$)_ey@m8*~o3gb9VwtT(bx|En&~7bN%f<^d?{Gchs0|a3O#0llyv_9g|L< zo{PVO&`e&*b9&xOWJnjfzv*&KZeoT7*yr@u{%9;ah{f}A5+0mKt5ZMPA&X4+z{=9U zdy{_;Vx}cieY)VZzNWHo;DonYL-i3|`gS{~DV!BH%1yF#C0QV|Ig_`bjz`h~SLeA# zBX-Xen{RKiNs3y2Otd-LMWbyLr^w@E%jtY#k`$3W@U>(aX^FTk#q>FS`EuN0Ocj~+ zlAKlD$wSAf((9x@XZ1KbQKy_G_A;1@A2gU;Hc(s10;__XMd@v$q3I1b~b**l;ZP;dYtSobI-xQnZ2z$!}o@jVb&+4NpO3|M!lG{WXo%bUlg8-e zxc>jV*^dW{A9fiT9Ud3wh)Hz%2TDj@zAZjHDynZ>A7_=ZA3iE7JTh#cBO$TxfH-H> X4=0v218M}+a8^rB4o`9hL{W#b6^V{#Wxh~dT@0y-_TJs`a>-WkaViYn1 z;@oMmsR14R0>heSN!~Iq zjB#{|;}hwW2n=V?pPk~oV^Z9{PRS?%(7(M?YTDE(PU%ReOklW1(b~p2z7Y`-VNdMy z>`!<%uTwT=+EAz5c&B_|R0-=6n{21yR7gO=vzaO;*ffwFu{4qm% zE2rwy8LCA()&H8Ivs2^0XMoqK88fZts`9`bT`Y4n69X|$8exd$VySo;D7ol~VfM?{ zoWi5bgugV@6ZlBKMMI6&;ks8ZNLM*6^RNR4B}9X;96^|Y?I_ejovm+3z62s)YNIwj zX9pWEPhMvvw@N$YSZ_Bg@(W(DK7lAg3R8I8`i3GI=e4qZpSB0$xD?p?PoM3mj4!O$ zaQ(Zj##a*1n7w3(T-S~q$O*cZZwT2>mP<9+W~2U{LAW3#G>5I(Th_{GTjLxpYvX^x zhT2v32$+CwGL6OZ0KTZ8H}Ijo@v`{p49TMpKfz2rEU&Vw6w4*)PJgWAV)?>m&tslE z)Ny!#5IeaX9VffFgQK(C;dXL?u11rfz zESJU7T*Db7Z}Br!l`8V1KwIftA+9h(3-qC$lkRd#8cSarwSmskMfBq|G}jv(tE)82 z_UfmX1;xaEj|?rS0`gOw)7x#!kGY zc;z~p*!Ug|*ShEhKk1B{%+|XyQqSr>2IH>0COZL*mkIJReXLgX=_di0FVl6mh`fq; zcwRgKxDm=NW@0v5}ptTOUb=4&-NC#v9nl zZ#7%uuoxRy-DY{fdiasyhRx2B#2O4!vaDe&gpD*j@D4SPFR9`ca{;T1h8 zVfwZquLrM7hU7>-kIJtIlsDwQgrYx06J#VRvrvCR08|IF4S&_X$L^gAjgz=#~lvj9CuJ9Ck=^bf- z-nN1Yh%)xgl8LfP8);vR)K13w2(7J$^{RB{DoxN?(%b44EY0K|U*?xstpQ9y3aT5v z+vuzE3(m?3wBQ;;b_4lAZ{n0L!v?fNCSvpok7?kR0#;JTB zO>C{#^?TjI$$W<&%XyuTZJcE~^Q)A`ePg6?0Qp*zZDk!D6TkLI*I<;uhYUk?tuO6O zapp^u<`@q%v>OLVsCivGlZY$W}KH8R={pKpz~xSD#;qnr75M-7`2v1un7M|E)Mf7 z-Zicr<32veX#J-wl)lnao=6`Qnij5;Qik#dau;EEO?&7@Et1=Mibq%ytC*#iQ4;5n z&n^17j+I>5$=0U8dv&()s3jN42xiD2%$AutR9dL7EJQU{;$^L&o6t}5WH6TK0Zx!` zb$H2i>NYpAkz`@4t!1KI!)@F|kZI?Srb$cS!dtv#Rr(np=rtLkHLPl@ zrC7$z=t$V9-IOZE(@+%9YxT>=VAJxQ#=NSWt)X1W$NT`Ru|#}KBb&<(nWqC~n7%3Z zOv#U7n(0-xt>Q2{$}GHOJ1d2&;+X!Az;MjvZ`uOUSi*l8PBLU7FX?6NB?t8#sf;Vw zD#K(2Z)u{f>xg8s3|F87+aUr`SjY0Zh8v}j@0%7ll7l2exW7H^jZtn%GHrQ!N-v_i zG}SHAQ3b8E$n<70no0{!M}K)B5faQ#t?sK$0fy_R@`g^qW%&#f=+%STPwHzEbI_yk zN>lU7G;`fPtj8^KLiS60xvN?7zgixfP#|mUL?7F)1*mWAy`|Z_t&=brYY<{+xXxPa zh(e}lPd?GNq^)icA9e9-v@nGV(gXT77orMp7}vU^lFg6 zj$wBV7U&_`isTIYYBCqdc4SMSv@oQu#S=NiV{F0kIuDy{$N4fTq`3mWH!jqb8E^?a}wTpabSUqj7dWl)`r7?;Uk9m@T$L1J~ z<#)YrHL8cPrXTI)bE{i+u|NjNU30j-TxGm0#@o!tT-$fJ zW}B-1YI;@)?d6Q@$MbT>ywPnK8iQ|G17SRf{(6@+WGP*`0Yf;)Ru-nQvQ$c2#yFz` zv=dA7IgP+A4L8LR^h#1Rrt9dw9s-Ex#nn3+5gEvW4R~)c9aph`UzT z-O@o9V4*4AFgDS_IK%zATkqgy^P`_Etqia%l`o}rjMbow%-8++RtBJ*Y2rW@8Io(F zq8!KfmOlD1+fvB}+qaLNgCBRXj3k-{d@uE7xt+;POKTS-Sj&kA<0U~xTN+$vs_VfS z*^Ml#a~ZbRdZznlxY+PEpKZ;7bJ-lXI2MWAEx$;f`3j_`?Xn07sH2PUK!+F(hFF^Y zjwg*{xzbsGkc*b`wpuC~Z3(1_{J`%`#TRRcz4aJ*rhS%oCdS0B%609q6=QsR_Jaiww89~6Sg+>>&rY{F5S#8ezLuHwUECLzQ(A& z@*)OlORm>irb3O`m3{a+mNH6;%z3-8j-h)K8nP2t%Olj2N~SOG@sJ@q%JeJEl2VdB zqQCBCGM7jjIf8JxYO~&xN4lAAOEY!#J@dmN<7J_oGgnE}d2FR0o9Db`9^a0`bia9A zgl3wr=1Xh7q&pC*p?X_FINGY0q9t+2(y!=n&a_l~klm!Jj7JwkOa`*e$u??=T;gDD zi+2t2)AhKy>sre>d!?2f)kvk`su0(h!ONDfV=Z5fkbFFl{)Wnv)^{aanRokH2HIgM zWFL-Vh2^c4T*M)=0pZ+-P|G;~gLam9e>W8^qswtvwqcp2)Q08&%~95z>|JhVIm<-l zrJZ^#Lw>|9mc<^SiTU^$86bP*0RLv%{cn6_e*75MBwkKfg~B*oYSULwXs*QTbe6Tm zR98>yMdVomt-vOB(+RPvE!1wh0=q0VZ-Rp$Q!j_7B#J#OQxAm?8(XHODPaR?E3=Vi zUKG#rxT0a|g9SFi7G`Q6c^6rxV598r6vsYj#`%`9ZfYbBnfokcyk&q8JTx~uE%!MG zzsLncb)x)>5yr5YvKs4UjOpGCd~I4A#j3K_(&$wAmp)*QIDhjxwPK=5TNoPY_PXL+ zUYApQyi+IeSyQ4XCEXhx7vt3Z>wZ}8>HV;Nq|@Np{V>b%`}1x%tc~;B)4O5ANXOsq zh6(@oR@f+JS}&i1z=%SpvDbM%CaR)MnVgy!pXhNVMJKsE@!kX{AS`Y&5mnZHN^^}& zNpeT0y1Z^Du;0$1QRS>LHX+?J;;+s@;k%wKGB&~G@wk)HoF?InDrLP=Tyb%Uo_MF} m)Mq2bxniS7xl_{;lRZweXLsI~!6CsRPV@A1m)8l73H}d&3M=LS diff --git a/vall_e/config.py b/vall_e/config.py index 3a06730..38f775e 100755 --- a/vall_e/config.py +++ b/vall_e/config.py @@ -157,13 +157,24 @@ class Dataset: tasks_list: list[str] = field(default_factory=lambda: ["tts"]) - _frames_per_second: int = 0 # in encodec, each frame is 75 codes, in dac, each frame is 41 + _frames_per_second: int = 0 # allows setting your own hint @cached_property def frames_per_second(self): if self._frames_per_second > 0: return self._frames_per_second - return 41 if cfg.inference.audio_backend == "dac" else 75 + + if cfg.inference.audio_backend == "dac": + # using the 44KHz model with 24KHz sources has a frame rate of 41Hz + if cfg.variable_sample_rate and cfg.sample_rate == 24_000: + return 41 + if cfg.sample_rate == 44_000: + return 86 + if cfg.sample_rate == 16_000: + return 50 + + # 24Khz Encodec / Vocos and incidentally DAC are all at 75Hz + return 75 @property def min_phones(self): @@ -562,7 +573,7 @@ class Config(_Config): tokenizer: str = "./tokenizer.json" sample_rate: int = 24_000 - variable_sample_rate: bool = True # for DAC, this will override the model automatically resampling to 44KHz. + variable_sample_rate: bool = False # NOT recommended, as running directly 24Khz audio in the 44Khz DAC model will have detrimental quality loss @property def distributed(self): diff --git a/vall_e/emb/qnt.py b/vall_e/emb/qnt.py index 2983879..5b43f6c 100755 --- a/vall_e/emb/qnt.py +++ b/vall_e/emb/qnt.py @@ -143,17 +143,19 @@ def _load_vocos_model(device="cuda", levels=cfg.model.max_levels): @cache def _load_dac_model(device="cuda", levels=cfg.model.max_levels): - kwargs = dict(model_type="44khz",model_bitrate="8kbps",tag="latest") - - # yes there's a better way, something like f'{cfg.sample.rate//1000}hz' - if cfg.sample_rate == 44_000: - kwargs["model_type"] = "44kz" - elif cfg.sample_rate == 24_000: - kwargs["model_type"] = "24khz" - elif cfg.sample_rate == 16_000: - kwargs["model_type"] = "16khz" - else: - raise Exception(f'unsupported sample rate: {cfg.sample_rate}') + kwargs = dict(model_type="24khz",model_bitrate="8kbps",tag="latest") + """ + if not cfg.variable_sample_rate: + # yes there's a better way, something like f'{cfg.sample.rate//1000}hz' + if cfg.sample_rate == 44_000: + kwargs["model_type"] = "44kz" + elif cfg.sample_rate == 24_000: + kwargs["model_type"] = "24khz" + elif cfg.sample_rate == 16_000: + kwargs["model_type"] = "16khz" + else: + raise Exception(f'unsupported sample rate: {cfg.sample_rate}') + """ model = __load_dac_model(**kwargs) model = model.to(device)