voice_bridge/mqtt_audio_bridge.py

import paho.mqtt.client as mqtt
import requests
import wave
import io
import numpy as np
import openwakeword
from openwakeword.model import Model
import collections
import threading

# --- CONFIG ---
# 1. VOICE SERVER (Audio IO)
VOICE_BROKER = "192.168.20.13"
VOICE_TOPIC_AUDIO = "voice/audio_stream"
VOICE_TOPIC_STATUS = "voice/status"

# 2. HOME ASSISTANT (Text Output)
HA_BROKER = "192.168.20.30"
HA_TOPIC_TEXT = "homeassistant/voice/text"

MQTT_USER = "mqtt-user"
MQTT_PASS = "sam4jo"

WHISPER_URL = "http://whisper-api:5000/transcribe"
WAKE_WORD   = "hey_jarvis"

# --- STATE ---
BUFFER_SIZE = 1280 * 4
audio_buffer = collections.deque(maxlen=BUFFER_SIZE)
command_buffer = bytearray()
is_command_mode = False

print("Loading Models...")
owwModel = Model()

# --- HELPER FUNCTIONS ---
def transcribe_and_send(raw_data):
    wav_io = io.BytesIO()
    with wave.open(wav_io, "wb") as wav_file:
        wav_file.setnchannels(1)
        wav_file.setsampwidth(2)
        wav_file.setframerate(16000)
        wav_file.writeframes(raw_data)
    wav_io.seek(0)

    try:
        files = {'audio': ('cmd.wav', wav_io, 'audio/wav')}
        res = requests.post(WHISPER_URL, files=files)
        if res.status_code == 200:
            text = res.json().get("transcript", "").strip()
            print(f"TEXT: {text}")
            if text:
                # PUBLISH TO HOME ASSISTANT BROKER
                ha_client.publish(HA_TOPIC_TEXT, text)

                # FEEDBACK TO ESP32 (Via Voice Broker)
                voice_client.publish(VOICE_TOPIC_STATUS, "OK")
    except Exception as e:
        print(f"API Error: {e}")

# --- CALLBACKS ---
def on_voice_message(client, userdata, msg):
    global is_command_mode, command_buffer

    if msg.topic == VOICE_TOPIC_AUDIO:
        payload = msg.payload

        # 1. COMMAND MODE (Recording for Whisper)
        if is_command_mode:
            # Safety Check: Prevent Memory Overflow (cap at ~3MB)
            if len(command_buffer) > 3000000:
                print("Error: Buffer overflow. Resetting.")
                is_command_mode = False
                command_buffer = bytearray()
                owwModel.reset()
                return

            # Append the audio chunk
            command_buffer.extend(payload)
            return

        # 2. LISTENING MODE (Wake Word Detection)
        # Convert bytes to numpy int16 for OpenWakeWord
        audio_int16 = np.frombuffer(payload, dtype=np.int16)

        # Feed the model
        prediction = owwModel.predict(audio_int16)

        if prediction[WAKE_WORD] > 0.5:
            print(f"--- WAKE WORD DETECTED ---")
            is_command_mode = True
            command_buffer = bytearray()
            client.publish(VOICE_TOPIC_STATUS, "WAKE")

    elif msg.topic == "voice/status" and msg.payload.decode() == "processing":
        # ESP32 stopped recording
        if is_command_mode:
            print("Processing...")
            transcribe_and_send(command_buffer)
            is_command_mode = False
            command_buffer = bytearray()
            owwModel.reset()
# --- MAIN ---
# Client 1: Voice Server (The Listener)
voice_client = mqtt.Client(client_id="Bridge_Voice")
voice_client.username_pw_set(MQTT_USER, MQTT_PASS)
voice_client.on_message = on_voice_message
voice_client.connect(VOICE_BROKER, 1883)
voice_client.subscribe([(VOICE_TOPIC_AUDIO, 0), ("voice/status", 0)])

# Client 2: Home Assistant (The Sender)
ha_client = mqtt.Client(client_id="Bridge_HA")
ha_client.username_pw_set(MQTT_USER, MQTT_PASS)
ha_client.connect(HA_BROKER, 1883)

print("Bridge Running (Dual Broker)...")

# Run loops in threads or blocking
# Since loop_forever blocks, we run HA loop in background and Voice loop in main
ha_thread = threading.Thread(target=ha_client.loop_forever)
ha_thread.daemon = True
ha_thread.start()

voice_client.loop_forever()