# Install required packages
!pip install --quiet gradio ipywidgets kokoro-onnx onnxruntime requests numpy soundfile
print('✓ Packages installed!')
We'll start with trying to work on a Kokoro text-to-speech engine. It comes with a lot of options! You can see all the voices here.
from pathlib import Path
import requests
import soundfile as sf
from kokoro_onnx import Kokoro
print("Downloading Kokoro files...")
model_dir = Path("data/models/kokoro-onnx")
model_dir.mkdir(parents=True, exist_ok=True)
model_file = model_dir / "kokoro-v1.0.int8.onnx"
voices_file = model_dir / "voices-v1.0.bin"
model_url = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.int8.onnx"
voices_url = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
if not model_file.exists():
response = requests.get(model_url)
response.raise_for_status()
model_file.write_bytes(response.content)
if not voices_file.exists():
response = requests.get(voices_url)
response.raise_for_status()
voices_file.write_bytes(response.content)
print("Initializing text-to-speech...")
kokoro = Kokoro(str(model_file), str(voices_file))
text = "This is a small local text to speech demo."
voice = "af_heart"
language = "en-us"
speed = 1.0
print("Generating audio...")
audio, sample_rate = kokoro.create(
text,
voice=voice,
speed=speed,
lang=language,
)
output_file = "output.wav"
sf.write(output_file, audio, sample_rate)
print(f"Saved audio to {output_file}")
Want to listen? I supposed you could open the file, but we can also do it in the notebook itself.
from IPython.display import Audio
Audio("output.wav")
from functools import lru_cache
from pathlib import Path
import gradio as gr
import requests
DEFAULT_TEXT = "This is a small local text to speech demo."
MODEL_DIR = Path("data/models/kokoro-onnx")
MODEL_FILE = MODEL_DIR / "kokoro-v1.0.int8.onnx"
VOICES_FILE = MODEL_DIR / "voices-v1.0.bin"
MODEL_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/kokoro-v1.0.int8.onnx"
VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"
VOICES = {
"US female - Heart": ("af_heart", "en-us"),
"US female - Bella": ("af_bella", "en-us"),
"US female - Nicole": ("af_nicole", "en-us"),
"US female - Sarah": ("af_sarah", "en-us"),
"US female - Sky": ("af_sky", "en-us"),
"US male - Adam": ("am_adam", "en-us"),
"US male - Fenrir": ("am_fenrir", "en-us"),
"US male - Michael": ("am_michael", "en-us"),
"US male - Puck": ("am_puck", "en-us"),
"UK female - Alice": ("bf_alice", "en-gb"),
"UK female - Emma": ("bf_emma", "en-gb"),
"UK female - Isabella": ("bf_isabella", "en-gb"),
"UK female - Lily": ("bf_lily", "en-gb"),
"UK male - Daniel": ("bm_daniel", "en-gb"),
"UK male - Fable": ("bm_fable", "en-gb"),
"UK male - George": ("bm_george", "en-gb"),
"UK male - Lewis": ("bm_lewis", "en-gb"),
}
def download_file(url, path):
if path.exists():
return
MODEL_DIR.mkdir(parents=True, exist_ok=True)
print(f"Downloading {path.name}...")
response = requests.get(url)
response.raise_for_status()
path.write_bytes(response.content)
def download_models():
download_file(MODEL_URL, MODEL_FILE)
download_file(VOICES_URL, VOICES_FILE)
@lru_cache
def kokoro():
from kokoro_onnx import Kokoro
download_models()
return Kokoro(str(MODEL_FILE), str(VOICES_FILE))
def speak(text, voice_name, speed):
text = text.strip()
if not text:
raise gr.Error("Enter some text first.")
voice, language = VOICES[voice_name]
audio, sample_rate = kokoro().create(text, voice=voice, speed=speed, lang=language)
return sample_rate, audio
with gr.Blocks(title="Local TTS demo") as demo:
gr.Markdown("# Local TTS demo")
with gr.Row():
with gr.Column():
text = gr.Textbox(label="Text", value=DEFAULT_TEXT, lines=4)
voice = gr.Dropdown(label="Voice", choices=list(VOICES), value="US female - Heart")
speed = gr.Slider(0.7, 1.3, value=1.0, step=0.05, label="Speed")
button = gr.Button("Generate", variant="primary")
with gr.Column():
output = gr.Audio(label="Output")
button.click(speak, [text, voice, speed], output)
import os
os.environ['GRADIO_DEBUG'] = '1'
demo.launch()