Files
voice_bridge/mqtt_audio_bridge.py
2026-01-09 10:28:44 +11:00

125 lines
3.8 KiB
Python

import paho.mqtt.client as mqtt
import requests
import wave
import io
import numpy as np
import openwakeword
from openwakeword.model import Model
import collections
import threading
# --- CONFIG ---
# 1. VOICE SERVER (Audio IO)
VOICE_BROKER = "192.168.20.13"
VOICE_TOPIC_AUDIO = "voice/audio_stream"
VOICE_TOPIC_STATUS = "voice/status"
# 2. HOME ASSISTANT (Text Output)
HA_BROKER = "192.168.20.30"
HA_TOPIC_TEXT = "homeassistant/voice/text"
MQTT_USER = "mqtt-user"
MQTT_PASS = "sam4jo"
WHISPER_URL = "http://whisper-api:5000/transcribe"
WAKE_WORD = "hey_jarvis"
# --- STATE ---
BUFFER_SIZE = 1280 * 4
audio_buffer = collections.deque(maxlen=BUFFER_SIZE)
command_buffer = bytearray()
is_command_mode = False
print("Loading Models...")
owwModel = Model()
# --- HELPER FUNCTIONS ---
def transcribe_and_send(raw_data):
wav_io = io.BytesIO()
with wave.open(wav_io, "wb") as wav_file:
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(16000)
wav_file.writeframes(raw_data)
wav_io.seek(0)
try:
files = {'audio': ('cmd.wav', wav_io, 'audio/wav')}
res = requests.post(WHISPER_URL, files=files)
if res.status_code == 200:
text = res.json().get("transcript", "").strip()
print(f"TEXT: {text}")
if text:
# PUBLISH TO HOME ASSISTANT BROKER
ha_client.publish(HA_TOPIC_TEXT, text)
# FEEDBACK TO ESP32 (Via Voice Broker)
voice_client.publish(VOICE_TOPIC_STATUS, "OK")
except Exception as e:
print(f"API Error: {e}")
# --- CALLBACKS ---
def on_voice_message(client, userdata, msg):
global is_command_mode, command_buffer
if msg.topic == VOICE_TOPIC_AUDIO:
payload = msg.payload
# 1. COMMAND MODE (Recording for Whisper)
if is_command_mode:
# Safety Check: Prevent Memory Overflow (cap at ~3MB)
if len(command_buffer) > 3000000:
print("Error: Buffer overflow. Resetting.")
is_command_mode = False
command_buffer = bytearray()
owwModel.reset()
return
# Append the audio chunk
command_buffer.extend(payload)
return
# 2. LISTENING MODE (Wake Word Detection)
# Convert bytes to numpy int16 for OpenWakeWord
audio_int16 = np.frombuffer(payload, dtype=np.int16)
# Feed the model
prediction = owwModel.predict(audio_int16)
if prediction[WAKE_WORD] > 0.5:
print(f"--- WAKE WORD DETECTED ---")
is_command_mode = True
command_buffer = bytearray()
client.publish(VOICE_TOPIC_STATUS, "WAKE")
elif msg.topic == "voice/status" and msg.payload.decode() == "processing":
# ESP32 stopped recording
if is_command_mode:
print("Processing...")
transcribe_and_send(command_buffer)
is_command_mode = False
command_buffer = bytearray()
owwModel.reset()
# --- MAIN ---
# Client 1: Voice Server (The Listener)
voice_client = mqtt.Client(client_id="Bridge_Voice")
voice_client.username_pw_set(MQTT_USER, MQTT_PASS)
voice_client.on_message = on_voice_message
voice_client.connect(VOICE_BROKER, 1883)
voice_client.subscribe([(VOICE_TOPIC_AUDIO, 0), ("voice/status", 0)])
# Client 2: Home Assistant (The Sender)
ha_client = mqtt.Client(client_id="Bridge_HA")
ha_client.username_pw_set(MQTT_USER, MQTT_PASS)
ha_client.connect(HA_BROKER, 1883)
print("Bridge Running (Dual Broker)...")
# Run loops in threads or blocking
# Since loop_forever blocks, we run HA loop in background and Voice loop in main
ha_thread = threading.Thread(target=ha_client.loop_forever)
ha_thread.daemon = True
ha_thread.start()
voice_client.loop_forever()