Files
speech_piper/speak_direct.sh
2026-01-09 10:33:49 +11:00

41 lines
1005 B
Bash
Executable File

#!/bin/bash
# --- CONFIGURATION ---
# Use HAL as default since we know it exists
DEFAULT_VOICE="en_US-hal_6409-medium.onnx"
# Paths
PIPER_BIN="/home/sam/speech_piper/piper/piper"
MODEL_DIR="/home/sam/speech_piper/data"
SNAP_IP="127.0.0.1"
SNAP_PORT="4953"
# --- INPUTS ---
TEXT="$1"
VOICE="${2:-$DEFAULT_VOICE}"
SPEED="${3:-1.0}"
if [ -z "$TEXT" ]; then echo "Missing text"; exit 1; fi
# 1. Generate WAV (Piper handles the voice's native rate)
TMP_WAV="/tmp/piper_tts_$(date +%s).wav"
echo "$TEXT" | \
"$PIPER_BIN" --model "$MODEL_DIR/$VOICE" --output_file "$TMP_WAV" --length-scale "$SPEED"
if [ ! -f "$TMP_WAV" ]; then
echo "Error: Piper failed."
exit 1
fi
# 2. Convert to 48k Stereo (Standard Snapcast Format)
# -r 48000 : Resample to 48k
# -c 2 : Convert Mono to Stereo (fixes "one ear" scratchiness)
# -b 16 : Force 16-bit signed integer
sox "$TMP_WAV" \
-t raw -r 48000 -c 2 -b 16 -e signed-integer - \
vol 0.8 pad 0 1.0 | \
nc -q 1 $SNAP_IP $SNAP_PORT
rm "$TMP_WAV"