import paho.mqtt.client as mqtt import requests import wave import io import numpy as np import openwakeword from openwakeword.model import Model import collections import threading # --- CONFIG --- # 1. VOICE SERVER (Audio IO) VOICE_BROKER = "192.168.20.13" VOICE_TOPIC_AUDIO = "voice/audio_stream" VOICE_TOPIC_STATUS = "voice/status" # 2. HOME ASSISTANT (Text Output) HA_BROKER = "192.168.20.30" HA_TOPIC_TEXT = "homeassistant/voice/text" MQTT_USER = "mqtt-user" MQTT_PASS = "sam4jo" WHISPER_URL = "http://whisper-api:5000/transcribe" WAKE_WORD = "hey_jarvis" # --- STATE --- BUFFER_SIZE = 1280 * 4 audio_buffer = collections.deque(maxlen=BUFFER_SIZE) command_buffer = bytearray() is_command_mode = False print("Loading Models...") owwModel = Model() # --- HELPER FUNCTIONS --- def transcribe_and_send(raw_data): wav_io = io.BytesIO() with wave.open(wav_io, "wb") as wav_file: wav_file.setnchannels(1) wav_file.setsampwidth(2) wav_file.setframerate(16000) wav_file.writeframes(raw_data) wav_io.seek(0) try: files = {'audio': ('cmd.wav', wav_io, 'audio/wav')} res = requests.post(WHISPER_URL, files=files) if res.status_code == 200: text = res.json().get("transcript", "").strip() print(f"TEXT: {text}") if text: # PUBLISH TO HOME ASSISTANT BROKER ha_client.publish(HA_TOPIC_TEXT, text) # FEEDBACK TO ESP32 (Via Voice Broker) voice_client.publish(VOICE_TOPIC_STATUS, "OK") except Exception as e: print(f"API Error: {e}") # --- CALLBACKS --- def on_voice_message(client, userdata, msg): global is_command_mode, command_buffer if msg.topic == VOICE_TOPIC_AUDIO: payload = msg.payload # 1. COMMAND MODE (Recording for Whisper) if is_command_mode: # Safety Check: Prevent Memory Overflow (cap at ~3MB) if len(command_buffer) > 3000000: print("Error: Buffer overflow. Resetting.") is_command_mode = False command_buffer = bytearray() owwModel.reset() return # Append the audio chunk command_buffer.extend(payload) return # 2. LISTENING MODE (Wake Word Detection) # Convert bytes to numpy int16 for OpenWakeWord audio_int16 = np.frombuffer(payload, dtype=np.int16) # Feed the model prediction = owwModel.predict(audio_int16) if prediction[WAKE_WORD] > 0.5: print(f"--- WAKE WORD DETECTED ---") is_command_mode = True command_buffer = bytearray() client.publish(VOICE_TOPIC_STATUS, "WAKE") elif msg.topic == "voice/status" and msg.payload.decode() == "processing": # ESP32 stopped recording if is_command_mode: print("Processing...") transcribe_and_send(command_buffer) is_command_mode = False command_buffer = bytearray() owwModel.reset() # --- MAIN --- # Client 1: Voice Server (The Listener) voice_client = mqtt.Client(client_id="Bridge_Voice") voice_client.username_pw_set(MQTT_USER, MQTT_PASS) voice_client.on_message = on_voice_message voice_client.connect(VOICE_BROKER, 1883) voice_client.subscribe([(VOICE_TOPIC_AUDIO, 0), ("voice/status", 0)]) # Client 2: Home Assistant (The Sender) ha_client = mqtt.Client(client_id="Bridge_HA") ha_client.username_pw_set(MQTT_USER, MQTT_PASS) ha_client.connect(HA_BROKER, 1883) print("Bridge Running (Dual Broker)...") # Run loops in threads or blocking # Since loop_forever blocks, we run HA loop in background and Voice loop in main ha_thread = threading.Thread(target=ha_client.loop_forever) ha_thread.daemon = True ha_thread.start() voice_client.loop_forever()