additional cruft, added a note in documentation to be aware of NUMA node topology when running vall_e.emb.process with more than one process

This commit is contained in:
mrq 2025-02-18 19:56:30 -06:00
parent 596c2df11c
commit 92139b6da9
2 changed files with 9 additions and 3 deletions
docs
vall_e/emb

View File

@ -98,6 +98,9 @@ This process can utilize sliced segments within the transcription metadata, or u
Refer to the `__main__`'s arguments for usage details.
> [!NOTE]
> If you're using this to try and split your workload over multiple process / GPUs, it is *imperative* to make sure to keep each process within its own NUMA node by prefixing with `numactl -N0 -m0`, or you'll experience bottlenecks that make processing worse off compared to just doing it with one GPU.
## `similar.py`
This script handles taking either raw input audio, or processed encoded audio, and determines the top-K similar utterances for each sample for a given speaker (or dataset).

View File

@ -171,6 +171,7 @@ def process(
input_voice=None,
input_metadata="metadata",
output_dataset="training",
transcription_filename="whisper.json",
raise_exceptions=False,
stride=0,
stride_offset=0,
@ -266,17 +267,17 @@ def process(
continue
metadata_path = Path(f'./{input_metadata}/{group_name}/{speaker_id}/whisper.json')
metadata_path = Path(f'./{input_metadata}/{group_name}/{speaker_id}/{transcription_filename}')
if not metadata_path.exists():
missing["transcription"].append(str(metadata_path))
_logger.warning(f'Missing transcription metadata: ./{input_audio}/{group_name}/{speaker_id}/whisper.json')
_logger.warning(f'Missing transcription metadata: ./{input_audio}/{group_name}/{speaker_id}/{transcription_filename}')
continue
try:
metadata = json.loads(open(metadata_path, "r", encoding="utf-8").read())
except Exception as e:
missing["transcription"].append(str(metadata_path))
_logger.warning(f'Failed to open transcription metadata: ./{input_audio}/{group_name}/{speaker_id}/whisper.json: {e}')
_logger.warning(f'Failed to open transcription metadata: ./{input_audio}/{group_name}/{speaker_id}/{transcription_filename}: {e}')
continue
if f'{group_name}/{speaker_id}' not in dataset:
@ -404,6 +405,7 @@ def main():
parser.add_argument("--input-voice", type=str, default=None)
parser.add_argument("--input-metadata", type=str, default="training/metadata")
parser.add_argument("--output-dataset", type=str, default="training/dataset")
parser.add_argument("--transcription-filename", type=str, default="whisper.json")
parser.add_argument("--raise-exceptions", action="store_true")
parser.add_argument("--low-memory", action="store_true")
parser.add_argument("--skip-existing-folders", action="store_true")
@ -440,6 +442,7 @@ def main():
input_voice=args.input_voice,
input_metadata=args.input_metadata,
output_dataset=args.output_dataset,
transcription_filename=args.transcription_filename,
raise_exceptions=args.raise_exceptions,
stride=args.stride,
stride_offset=args.stride_offset,