#!/bin/bash # --- CONFIGURATION --- # Use HAL as default since we know it exists DEFAULT_VOICE="en_US-hal_6409-medium.onnx" # Paths PIPER_BIN="/home/sam/speech_piper/piper/piper" MODEL_DIR="/home/sam/speech_piper/data" SNAP_IP="127.0.0.1" SNAP_PORT="4953" # --- INPUTS --- TEXT="$1" VOICE="${2:-$DEFAULT_VOICE}" SPEED="${3:-1.0}" if [ -z "$TEXT" ]; then echo "Missing text"; exit 1; fi # 1. Generate WAV (Piper handles the voice's native rate) TMP_WAV="/tmp/piper_tts_$(date +%s).wav" echo "$TEXT" | \ "$PIPER_BIN" --model "$MODEL_DIR/$VOICE" --output_file "$TMP_WAV" --length-scale "$SPEED" if [ ! -f "$TMP_WAV" ]; then echo "Error: Piper failed." exit 1 fi # 2. Convert to 48k Stereo (Standard Snapcast Format) # -r 48000 : Resample to 48k # -c 2 : Convert Mono to Stereo (fixes "one ear" scratchiness) # -b 16 : Force 16-bit signed integer sox "$TMP_WAV" \ -t raw -r 48000 -c 2 -b 16 -e signed-integer - \ vol 0.8 pad 0 1.0 | \ nc -q 1 $SNAP_IP $SNAP_PORT rm "$TMP_WAV"