41 lines
1005 B
Bash
Executable File
41 lines
1005 B
Bash
Executable File
#!/bin/bash
|
|
|
|
# --- CONFIGURATION ---
|
|
# Use HAL as default since we know it exists
|
|
DEFAULT_VOICE="en_US-hal_6409-medium.onnx"
|
|
|
|
# Paths
|
|
PIPER_BIN="/home/sam/speech_piper/piper/piper"
|
|
MODEL_DIR="/home/sam/speech_piper/data"
|
|
SNAP_IP="127.0.0.1"
|
|
SNAP_PORT="4953"
|
|
|
|
# --- INPUTS ---
|
|
TEXT="$1"
|
|
VOICE="${2:-$DEFAULT_VOICE}"
|
|
SPEED="${3:-1.0}"
|
|
|
|
if [ -z "$TEXT" ]; then echo "Missing text"; exit 1; fi
|
|
|
|
# 1. Generate WAV (Piper handles the voice's native rate)
|
|
TMP_WAV="/tmp/piper_tts_$(date +%s).wav"
|
|
|
|
echo "$TEXT" | \
|
|
"$PIPER_BIN" --model "$MODEL_DIR/$VOICE" --output_file "$TMP_WAV" --length-scale "$SPEED"
|
|
|
|
if [ ! -f "$TMP_WAV" ]; then
|
|
echo "Error: Piper failed."
|
|
exit 1
|
|
fi
|
|
|
|
# 2. Convert to 48k Stereo (Standard Snapcast Format)
|
|
# -r 48000 : Resample to 48k
|
|
# -c 2 : Convert Mono to Stereo (fixes "one ear" scratchiness)
|
|
# -b 16 : Force 16-bit signed integer
|
|
sox "$TMP_WAV" \
|
|
-t raw -r 48000 -c 2 -b 16 -e signed-integer - \
|
|
vol 0.8 pad 0 1.0 | \
|
|
nc -q 1 $SNAP_IP $SNAP_PORT
|
|
|
|
rm "$TMP_WAV"
|