more windows specific fixes, limit gradio to <5.0.0 on linux (it works on windows, but not on my linux machine tm)

2024-11-04 18:00:33 -06:00 · 2024-11-04 18:00:33 -06:00 · 9e65e05e83
commit 9e65e05e83
parent c83670c38c
4 changed files with 11 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -12,8 +12,12 @@ Besides a working PyTorch environment, the only hard requirement is [`espeak-ng`
 - Linux users can consult their package managers on installing `espeak`/`espeak-ng`.
 - Windows users are required to install [`espeak-ng`](https://github.com/espeak-ng/espeak-ng/releases/tag/1.51#Assets).
  + additionally, you may be required to set the `PHONEMIZER_ESPEAK_LIBRARY` environment variable to specify the path to `libespeak-ng.dll`.
+    + Simply running `set PHONEMIZER_ESPEAK_LIBRARY="C:\Program Files\eSpeak NG\libespeak-ng.dll"` beforehand should fix this.
 - In the future, an internal homebrew to replace this would be fantastic.

+Support on AMD systems with ROCm is *mostly* supported, but performance ***will*** vary.
+- ROCm is simply too inconsistent with outputs.
+
 ## Install

 Simply run `pip install git+https://git.ecker.tech/mrq/vall-e` or `pip install git+https://github.com/e-c-k-e-r/vall-e`.
--- a/setup.py
+++ b/setup.py
@ -72,8 +72,8 @@ setup(
        "vocos",
        "descript-audio-codec",

-        # gradio web UI
-        "gradio"
+        # gradio web UI (my linux install doesn't like 5.x, windows is fine)
+        f"gradio{"<5.0.0" if not sys.platform.startswith("win") else ""}"
        
    ],
    extras_require = {
--- a/vall_e/demo.py
+++ b/vall_e/demo.py
@ -289,12 +289,12 @@ def main():

 		# generate demo output
 		for dir in tqdm(speakers, desc=f"Generating demo for {k}"):
-			text = open(dir / "prompt.txt").read()
+			text = open(dir / "prompt.txt", encoding="utf-8").read()
 			language = open(dir / "language.txt").read() if (dir / "language.txt").exists() else "en"
 			prompt = dir / "prompt.wav"
 			reference = dir / "reference.wav"
 			out_path = dir / "out" / "ours.wav"
-			out_path_comparison = dir / "out" / f"ours_{comparison_kwargs["suffix"]}.wav"
+			out_path_comparison = dir / "out" / f"ours_{comparison_kwargs['suffix']}.wav"
 			external_sources = [ dir / "out" / f"{source}.wav" for source in sources ]

 			audio_samples = [ prompt, out_path ]
--- a/vall_e/models/base.py
+++ b/vall_e/models/base.py
@ -539,10 +539,13 @@ class Base(nn.Module):
 			self.len_emb = Embedding(11, d_model) if "len" in self.capabilities else None

 		if attention_backend == "auto":
+			attention_backend = "sdpa"
+			"""
 			if AVAILABLE_ATTENTIONS:
 				attention_backend = AVAILABLE_ATTENTIONS[0]
 			else:
 				attention_backend = "default"
+			"""

 		hf_attention = attention_backend
 		HF_ATTENTIONS = ["eager", "sdpa", "flash_attention_2"]