125 lines
3.8 KiB
Python
125 lines
3.8 KiB
Python
import paho.mqtt.client as mqtt
|
|
import requests
|
|
import wave
|
|
import io
|
|
import numpy as np
|
|
import openwakeword
|
|
from openwakeword.model import Model
|
|
import collections
|
|
import threading
|
|
|
|
# --- CONFIG ---
|
|
# 1. VOICE SERVER (Audio IO)
|
|
VOICE_BROKER = "192.168.20.13"
|
|
VOICE_TOPIC_AUDIO = "voice/audio_stream"
|
|
VOICE_TOPIC_STATUS = "voice/status"
|
|
|
|
# 2. HOME ASSISTANT (Text Output)
|
|
HA_BROKER = "192.168.20.30"
|
|
HA_TOPIC_TEXT = "homeassistant/voice/text"
|
|
|
|
MQTT_USER = "mqtt-user"
|
|
MQTT_PASS = "sam4jo"
|
|
|
|
WHISPER_URL = "http://whisper-api:5000/transcribe"
|
|
WAKE_WORD = "hey_jarvis"
|
|
|
|
# --- STATE ---
|
|
BUFFER_SIZE = 1280 * 4
|
|
audio_buffer = collections.deque(maxlen=BUFFER_SIZE)
|
|
command_buffer = bytearray()
|
|
is_command_mode = False
|
|
|
|
print("Loading Models...")
|
|
owwModel = Model()
|
|
|
|
# --- HELPER FUNCTIONS ---
|
|
def transcribe_and_send(raw_data):
|
|
wav_io = io.BytesIO()
|
|
with wave.open(wav_io, "wb") as wav_file:
|
|
wav_file.setnchannels(1)
|
|
wav_file.setsampwidth(2)
|
|
wav_file.setframerate(16000)
|
|
wav_file.writeframes(raw_data)
|
|
wav_io.seek(0)
|
|
|
|
try:
|
|
files = {'audio': ('cmd.wav', wav_io, 'audio/wav')}
|
|
res = requests.post(WHISPER_URL, files=files)
|
|
if res.status_code == 200:
|
|
text = res.json().get("transcript", "").strip()
|
|
print(f"TEXT: {text}")
|
|
if text:
|
|
# PUBLISH TO HOME ASSISTANT BROKER
|
|
ha_client.publish(HA_TOPIC_TEXT, text)
|
|
|
|
# FEEDBACK TO ESP32 (Via Voice Broker)
|
|
voice_client.publish(VOICE_TOPIC_STATUS, "OK")
|
|
except Exception as e:
|
|
print(f"API Error: {e}")
|
|
|
|
# --- CALLBACKS ---
|
|
def on_voice_message(client, userdata, msg):
|
|
global is_command_mode, command_buffer
|
|
|
|
if msg.topic == VOICE_TOPIC_AUDIO:
|
|
payload = msg.payload
|
|
|
|
# 1. COMMAND MODE (Recording for Whisper)
|
|
if is_command_mode:
|
|
# Safety Check: Prevent Memory Overflow (cap at ~3MB)
|
|
if len(command_buffer) > 3000000:
|
|
print("Error: Buffer overflow. Resetting.")
|
|
is_command_mode = False
|
|
command_buffer = bytearray()
|
|
owwModel.reset()
|
|
return
|
|
|
|
# Append the audio chunk
|
|
command_buffer.extend(payload)
|
|
return
|
|
|
|
# 2. LISTENING MODE (Wake Word Detection)
|
|
# Convert bytes to numpy int16 for OpenWakeWord
|
|
audio_int16 = np.frombuffer(payload, dtype=np.int16)
|
|
|
|
# Feed the model
|
|
prediction = owwModel.predict(audio_int16)
|
|
|
|
if prediction[WAKE_WORD] > 0.5:
|
|
print(f"--- WAKE WORD DETECTED ---")
|
|
is_command_mode = True
|
|
command_buffer = bytearray()
|
|
client.publish(VOICE_TOPIC_STATUS, "WAKE")
|
|
|
|
elif msg.topic == "voice/status" and msg.payload.decode() == "processing":
|
|
# ESP32 stopped recording
|
|
if is_command_mode:
|
|
print("Processing...")
|
|
transcribe_and_send(command_buffer)
|
|
is_command_mode = False
|
|
command_buffer = bytearray()
|
|
owwModel.reset()
|
|
# --- MAIN ---
|
|
# Client 1: Voice Server (The Listener)
|
|
voice_client = mqtt.Client(client_id="Bridge_Voice")
|
|
voice_client.username_pw_set(MQTT_USER, MQTT_PASS)
|
|
voice_client.on_message = on_voice_message
|
|
voice_client.connect(VOICE_BROKER, 1883)
|
|
voice_client.subscribe([(VOICE_TOPIC_AUDIO, 0), ("voice/status", 0)])
|
|
|
|
# Client 2: Home Assistant (The Sender)
|
|
ha_client = mqtt.Client(client_id="Bridge_HA")
|
|
ha_client.username_pw_set(MQTT_USER, MQTT_PASS)
|
|
ha_client.connect(HA_BROKER, 1883)
|
|
|
|
print("Bridge Running (Dual Broker)...")
|
|
|
|
# Run loops in threads or blocking
|
|
# Since loop_forever blocks, we run HA loop in background and Voice loop in main
|
|
ha_thread = threading.Thread(target=ha_client.loop_forever)
|
|
ha_thread.daemon = True
|
|
ha_thread.start()
|
|
|
|
voice_client.loop_forever()
|