speech_piper/speak_direct.sh

#!/bin/bash

# --- CONFIGURATION ---
# Use HAL as default since we know it exists
DEFAULT_VOICE="en_US-hal_6409-medium.onnx"

# Paths
PIPER_BIN="/home/sam/speech_piper/piper/piper"
MODEL_DIR="/home/sam/speech_piper/data"
SNAP_IP="127.0.0.1"
SNAP_PORT="4953"

# --- INPUTS ---
TEXT="$1"
VOICE="${2:-$DEFAULT_VOICE}"
SPEED="${3:-1.0}"

if [ -z "$TEXT" ]; then echo "Missing text"; exit 1; fi

# 1. Generate WAV (Piper handles the voice's native rate)
TMP_WAV="/tmp/piper_tts_$(date +%s).wav"

echo "$TEXT" | \
"$PIPER_BIN" --model "$MODEL_DIR/$VOICE" --output_file "$TMP_WAV" --length-scale "$SPEED"

if [ ! -f "$TMP_WAV" ]; then
    echo "Error: Piper failed."
    exit 1
fi

# 2. Convert to 48k Stereo (Standard Snapcast Format)
# -r 48000 : Resample to 48k
# -c 2     : Convert Mono to Stereo (fixes "one ear" scratchiness)
# -b 16    : Force 16-bit signed integer
sox "$TMP_WAV" \
    -t raw -r 48000 -c 2 -b 16 -e signed-integer - \
    vol 0.8 pad 0 1.0 | \
nc -q 1 $SNAP_IP $SNAP_PORT

rm "$TMP_WAV"