import paho.mqtt.client as mqtt import requests import wave import io import numpy as np import openwakeword from openwakeword.model import Model # --- CONFIG --- MQTT_BROKER = "192.168.20.30" MQTT_USER = "mqtt-user" MQTT_PASS = "sam4jo" TOPIC_AUDIO = "homeassistant/voice/audio" TOPIC_ACK = "homeassistant/voice/ack" TOPIC_TEXT = "homeassistant/voice/text" WHISPER_URL = "http://localhost:5000/transcribe" WAKE_WORD_MODEL = "hey_jarvis" # or 'alexa', 'hey_mycroft', 'timer', 'weather' # --- STATE --- # We buffer audio here. # 1. We feed it to WakeWord. # 2. If WakeWord triggers, we KEEP recording for the command. audio_buffer = bytearray() is_command_mode = False command_buffer = bytearray() print("Loading OpenWakeWord...") owwModel = Model(wakeword_models=[WAKE_WORD_MODEL]) def send_to_whisper(raw_data): print(f"Sending {len(raw_data)} bytes to Whisper...") wav_io = io.BytesIO() with wave.open(wav_io, "wb") as wav_file: wav_file.setnchannels(1) wav_file.setsampwidth(2) wav_file.setframerate(16000) wav_file.writeframes(raw_data) wav_io.seek(0) try: files = {'audio': ('cmd.wav', wav_io, 'audio/wav')} res = requests.post(WHISPER_URL, files=files) if res.status_code == 200: text = res.json().get("transcript", "") print(f"COMMAND: {text}") client.publish(TOPIC_TEXT, text) client.publish(TOPIC_ACK, "OK") # Rainbow LED else: print("Whisper Error") except Exception as e: print(f"Error: {e}") def on_message(client, userdata, msg): global audio_buffer, is_command_mode, command_buffer if msg.topic == TOPIC_AUDIO: payload = msg.payload # 1. If we are already listening for a command, just accumulate if is_command_mode: command_buffer.extend(payload) # If buffer gets too big (e.g. 5 seconds), cut it off if len(command_buffer) > 16000 * 2 * 5: print("Timeout. Sending...") send_to_whisper(command_buffer) is_command_mode = False command_buffer = bytearray() return # 2. If NOT in command mode, feed OpenWakeWord # OWW needs 1280 samples (2560 bytes) chunks usually audio_buffer.extend(payload) # Process in chunks chunk_size = 1280 * 2 # 1280 samples * 2 bytes while len(audio_buffer) >= chunk_size: chunk = audio_buffer[:chunk_size] audio_buffer = audio_buffer[chunk_size:] # Convert to numpy for OWW audio_int16 = np.frombuffer(chunk, dtype=np.int16) # Predict prediction = owwModel.predict(audio_int16) # Check score (0.0 to 1.0) if prediction[WAKE_WORD_MODEL] > 0.5: print(f"WAKE WORD DETECTED: {WAKE_WORD_MODEL}") is_command_mode = True command_buffer = bytearray() # Start fresh for command # Optional: Send "Awake" LED command to ESP32 here elif msg.topic == "homeassistant/voice/status" and msg.payload.decode() == "processing": # ESP32 finished its VAD stream. if is_command_mode: send_to_whisper(command_buffer) is_command_mode = False command_buffer = bytearray() client = mqtt.Client() client.username_pw_set(MQTT_USER, MQTT_PASS) client.connect(MQTT_BROKER, 1883) client.subscribe([(TOPIC_AUDIO, 0), ("homeassistant/voice/status", 0)]) client.loop_forever()