diff --git a/whispercpp.pxd b/whispercpp.pxd
index 6e4f5dc..ab8c3ad 100644
--- a/whispercpp.pxd
+++ b/whispercpp.pxd
@@ -80,6 +80,8 @@ cdef extern from "whisper.h" nogil:
 	cdef whisper_token_data whisper_sample_best(whisper_context*)
 	cdef whisper_token whisper_sample_timestamp(whisper_context*)
 	cdef int whisper_lang_id(char*)
+	cdef int whisper_lang_max_id()
+	const char* whisper_lang_str(int)
 	cdef int whisper_n_len(whisper_context*)
 	cdef int whisper_n_vocab(whisper_context*)
 	cdef int whisper_n_text_ctx(whisper_context*)
diff --git a/whispercpp.pyx b/whispercpp.pyx
index cc9d153..ce99b2a 100644
--- a/whispercpp.pyx
+++ b/whispercpp.pyx
@@ -61,6 +61,22 @@ def download_model(model, models_dir=MODELS_DIR):
 			f.write(r.read())
 
 
+def list_languages():
+	"""Returns a list of tuples of language codes understood by whisper.cpp.
+
+	Returns:
+	    e.g. [(0, "en"), (1, "zh"), ...]
+	"""
+	cdef int max_id = whisper_lang_max_id() + 1
+	cdef list results = []
+	for i in range(max_id):
+		results.append((
+			i,
+			whisper_lang_str(i).decode()
+		))
+	return results
+
+
 cdef cnp.ndarray[cnp.float32_t, ndim=1, mode="c"] load_audio(bytes file, int sr = SAMPLE_RATE):
 	try:
 		out = (