yt_to_srt() {
local url="$1"
local output_base="$2"
local language="${3:-en}"
yt-dlp -x --audio-format wav --postprocessor-args "-ar 16000" -o "$output_base.wav" "$url"
whisper-cli --language "$language" --model "$WHISPER_MODEL" --split-on-word --max-len 65 --output-vtt --output-file "$output_base" --file "$output_base.wav"
rm "$output_base.wav"
}
file_to_srt() {
local filepath="$1"
local language="${2:-en}"
local filename=$(basename "$filepath")
local filename_no_ext="${filename%.*}"
local output_base="$filename_no_ext"
local temp_wav="$output_base.wav"
ffmpeg -i "$filepath" -vn -acodec pcm_s16le -ar 16000 -ac 1 "$temp_wav"
whisper-cli --language "$language" --model "$WHISPER_MODEL" --split-on-word --max-len 65 --output-vtt --output-file "$output_base" --file "$temp_wav"
rm "$temp_wav"
}
plus additional bootstrap script for large-v3-turbo model from my chez-moi dotfiles: #!/bin/bash
# Download whisper.cpp models from Hugging Face (runs once per machine).
set -euo pipefail
MODELS_DIR="$HOME/whisper-models"
BASE_URL="https://huggingface.co/ggerganov/whisper.cpp/resolve/main"
MODELS=("ggml-large-v3-turbo.bin" "ggml-tiny.bin")
mkdir -p "$MODELS_DIR"
for model in "${MODELS[@]}"; do
if [ ! -f "$MODELS_DIR/$model" ]; then
echo "Downloading $model..."
curl -L --progress-bar -o "$MODELS_DIR/$model" "$BASE_URL/$model"
else
echo "$model already exists, skipping."
fi
done
echo "Whisper models ready at $MODELS_DIR"I guess if it encourages you to install and figure out how to use ffmpeg, yt-dlp, kroko, numpy, and onnx that's a good thing. Sometimes just knowing a thing is possible is a huge benefit.
This repo is now a good way to centralize hacks around the sure-to-come blockers those platforms will add to prevent download.
Just like uBlockOrigin was a way to centralize all the "just run this greasemonkey script" comments, I can see this getting a huge following for people who really value transcriptions.
NPUs - definitely a good use case for at least part of it, there are ports of whisper that use coreML/ANE with less power and 3x speed of CPU only