Merge 7a67b709b1
into a5f05418bd
This commit is contained in:
commit
1d3ec4bacc
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,3 +7,4 @@ __pycache__
|
|||||||
/*.egg-info
|
/*.egg-info
|
||||||
/vall_e/version.py
|
/vall_e/version.py
|
||||||
/build
|
/build
|
||||||
|
/venv
|
22
README.md
22
README.md
@ -6,21 +6,23 @@ An unofficial PyTorch implementation of [VALL-E](https://valle-demo.github.io/),
|
|||||||
|
|
||||||
[](https://www.buymeacoffee.com/enhuiz)
|
[](https://www.buymeacoffee.com/enhuiz)
|
||||||
|
|
||||||
## Install
|
|
||||||
|
|
||||||
### Install with pip
|
## Installation
|
||||||
|
|
||||||
```
|
Note that the code is only tested under `Python 3.10.7`.
|
||||||
|
|
||||||
|
### Install with `pip` (remote)
|
||||||
|
```bash
|
||||||
pip install git+https://github.com/enhuiz/vall-e
|
pip install git+https://github.com/enhuiz/vall-e
|
||||||
```
|
```
|
||||||
|
|
||||||
### Clone
|
### Install with `pip` (locally)
|
||||||
|
```bash
|
||||||
```
|
|
||||||
git clone --recurse-submodules https://github.com/enhuiz/vall-e.git
|
git clone --recurse-submodules https://github.com/enhuiz/vall-e.git
|
||||||
|
cd vall-e
|
||||||
|
pip install --editable .
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that the code is only tested under `Python 3.10.7`.
|
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
@ -29,13 +31,13 @@ Note that the code is only tested under `Python 3.10.7`.
|
|||||||
2. Quantize the data:
|
2. Quantize the data:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m vall_e.emb.qnt data/your_data
|
valle-quantize data/your_data
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Generate phonemes based on the text:
|
3. Generate phonemes based on the text:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m vall_e.emb.g2p data/your_data
|
valle-phonemes data/your_data
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Customize your configuration by creating `config/your_data/ar.yml` and `config/your_data/nar.yml`. Refer to the example configs in `config/test` and `vall_e/config.py` for details. You may choose different model presets, check `vall_e/vall_e/__init__.py`.
|
4. Customize your configuration by creating `config/your_data/ar.yml` and `config/your_data/nar.yml`. Refer to the example configs in `config/test` and `vall_e/config.py` for details. You may choose different model presets, check `vall_e/vall_e/__init__.py`.
|
||||||
@ -43,7 +45,7 @@ python -m vall_e.emb.g2p data/your_data
|
|||||||
5. Train the AR or NAR model using the following scripts:
|
5. Train the AR or NAR model using the following scripts:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m vall_e.train yaml=config/your_data/ar_or_nar.yml
|
valle-train yaml=config/your_data/ar_or_nar.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
48
pyproject.toml
Normal file
48
pyproject.toml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
[build-system]
|
||||||
|
build-backend = "hatchling.build"
|
||||||
|
requires = [
|
||||||
|
"hatchling >= 1.11.0",
|
||||||
|
"pip >= 22.0.0"
|
||||||
|
]
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
valle-quantize = "vall_e.emb.qnt:main"
|
||||||
|
valle-phonemes = "vall_e.emb.g2p:main"
|
||||||
|
valle-train = "vall_e.train:main"
|
||||||
|
|
||||||
|
|
||||||
|
[project]
|
||||||
|
name = "vall-e"
|
||||||
|
version = "0.0.1"
|
||||||
|
description = "An unofficial toy implementation of the audio LM VALL-E."
|
||||||
|
readme = "README.md"
|
||||||
|
license = "MIT"
|
||||||
|
requires-python = ">=3.10"
|
||||||
|
|
||||||
|
authors = [
|
||||||
|
{ name = "Zhe Niu", email = "niuzhe.nz@outlook.com" },
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
dependencies = [
|
||||||
|
"coloredlogs==15.0.1",
|
||||||
|
"deepspeed==0.7.7",
|
||||||
|
"diskcache==5.4.0",
|
||||||
|
"einops==0.6.0",
|
||||||
|
"encodec==0.1.1",
|
||||||
|
"g2p_en==2.1.0",
|
||||||
|
"humanize==4.4.0",
|
||||||
|
"matplotlib==3.6.0",
|
||||||
|
"numpy==1.23.3",
|
||||||
|
"omegaconf==2.2.3",
|
||||||
|
"openTSNE==0.6.2",
|
||||||
|
"pandas==1.5.0",
|
||||||
|
"soundfile==0.11.0",
|
||||||
|
"torch==1.13.0",
|
||||||
|
"torchaudio==0.13.0",
|
||||||
|
"tqdm==4.64.1",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
[project.urls]
|
||||||
|
GitHub = "https://github.com/enhuiz/vall-e"
|
@ -1,16 +0,0 @@
|
|||||||
coloredlogs==15.0.1
|
|
||||||
deepspeed==0.7.7
|
|
||||||
diskcache==5.4.0
|
|
||||||
einops==0.6.0
|
|
||||||
encodec==0.1.1
|
|
||||||
g2p_en==2.1.0
|
|
||||||
humanize==4.4.0
|
|
||||||
matplotlib==3.6.0
|
|
||||||
numpy==1.23.3
|
|
||||||
omegaconf==2.2.3
|
|
||||||
openTSNE==0.6.2
|
|
||||||
pandas==1.5.0
|
|
||||||
soundfile==0.11.0
|
|
||||||
torch==1.13.0+cu116
|
|
||||||
torchaudio==0.13.0+cu116
|
|
||||||
tqdm==4.64.1
|
|
59
setup.py
59
setup.py
@ -1,59 +0,0 @@
|
|||||||
import subprocess
|
|
||||||
from pathlib import Path
|
|
||||||
from datetime import datetime
|
|
||||||
from setuptools import setup, find_packages
|
|
||||||
|
|
||||||
|
|
||||||
def shell(*args):
|
|
||||||
out = subprocess.check_output(args)
|
|
||||||
return out.decode("ascii").strip()
|
|
||||||
|
|
||||||
|
|
||||||
def write_version(version_core, pre_release=True):
|
|
||||||
if pre_release:
|
|
||||||
time = shell("git", "log", "-1", "--format=%cd", "--date=iso")
|
|
||||||
time = datetime.strptime(time, "%Y-%m-%d %H:%M:%S %z")
|
|
||||||
time = time.strftime("%Y%m%d%H%M%S")
|
|
||||||
version = f"{version_core}-dev{time}"
|
|
||||||
else:
|
|
||||||
version = version_core
|
|
||||||
|
|
||||||
with open(Path("vall_e", "version.py"), "w") as f:
|
|
||||||
f.write('__version__ = "{}"\n'.format(version))
|
|
||||||
|
|
||||||
return version
|
|
||||||
|
|
||||||
|
|
||||||
with open("README.md", "r") as f:
|
|
||||||
long_description = f.read()
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name="vall-e",
|
|
||||||
python_requires=">=3.10.0",
|
|
||||||
version=write_version("0.0.1"),
|
|
||||||
description="An unofficial toy implementation of the audio LM VALL-E",
|
|
||||||
author="enhuiz",
|
|
||||||
author_email="niuzhe.nz@outlook.com",
|
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
packages=find_packages(),
|
|
||||||
install_requires=[
|
|
||||||
"coloredlogs>=15.0.1",
|
|
||||||
"deepspeed>=0.7.7",
|
|
||||||
"diskcache>=5.4.0",
|
|
||||||
"einops>=0.6.0",
|
|
||||||
"encodec>=0.1.1",
|
|
||||||
"g2p_en>=2.1.0",
|
|
||||||
"humanize>=4.4.0",
|
|
||||||
"matplotlib>=3.6.0",
|
|
||||||
"numpy>=1.23.3",
|
|
||||||
"omegaconf>=2.2.3",
|
|
||||||
"openTSNE>=0.6.2",
|
|
||||||
"pandas>=1.5.0",
|
|
||||||
"soundfile>=0.11.0",
|
|
||||||
"torch>=1.13.0",
|
|
||||||
"torchaudio>=0.13.0",
|
|
||||||
"tqdm>=4.64.1",
|
|
||||||
],
|
|
||||||
url="https://github.com/enhuiz/vall-e",
|
|
||||||
)
|
|
Loading…
Reference in New Issue
Block a user