106 lines
3.6 KiB
Python
106 lines
3.6 KiB
Python
import paho.mqtt.client as mqtt
|
|
import requests
|
|
import wave
|
|
import io
|
|
import numpy as np
|
|
import openwakeword
|
|
from openwakeword.model import Model
|
|
|
|
# --- CONFIG ---
|
|
MQTT_BROKER = "192.168.20.30"
|
|
MQTT_USER = "mqtt-user"
|
|
MQTT_PASS = "sam4jo"
|
|
TOPIC_AUDIO = "homeassistant/voice/audio"
|
|
TOPIC_ACK = "homeassistant/voice/ack"
|
|
TOPIC_TEXT = "homeassistant/voice/text"
|
|
|
|
WHISPER_URL = "http://localhost:5000/transcribe"
|
|
WAKE_WORD_MODEL = "hey_jarvis" # or 'alexa', 'hey_mycroft', 'timer', 'weather'
|
|
|
|
# --- STATE ---
|
|
# We buffer audio here.
|
|
# 1. We feed it to WakeWord.
|
|
# 2. If WakeWord triggers, we KEEP recording for the command.
|
|
audio_buffer = bytearray()
|
|
is_command_mode = False
|
|
command_buffer = bytearray()
|
|
|
|
print("Loading OpenWakeWord...")
|
|
owwModel = Model(wakeword_models=[WAKE_WORD_MODEL])
|
|
|
|
def send_to_whisper(raw_data):
|
|
print(f"Sending {len(raw_data)} bytes to Whisper...")
|
|
wav_io = io.BytesIO()
|
|
with wave.open(wav_io, "wb") as wav_file:
|
|
wav_file.setnchannels(1)
|
|
wav_file.setsampwidth(2)
|
|
wav_file.setframerate(16000)
|
|
wav_file.writeframes(raw_data)
|
|
wav_io.seek(0)
|
|
|
|
try:
|
|
files = {'audio': ('cmd.wav', wav_io, 'audio/wav')}
|
|
res = requests.post(WHISPER_URL, files=files)
|
|
if res.status_code == 200:
|
|
text = res.json().get("transcript", "")
|
|
print(f"COMMAND: {text}")
|
|
client.publish(TOPIC_TEXT, text)
|
|
client.publish(TOPIC_ACK, "OK") # Rainbow LED
|
|
else:
|
|
print("Whisper Error")
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
|
|
def on_message(client, userdata, msg):
|
|
global audio_buffer, is_command_mode, command_buffer
|
|
|
|
if msg.topic == TOPIC_AUDIO:
|
|
payload = msg.payload
|
|
|
|
# 1. If we are already listening for a command, just accumulate
|
|
if is_command_mode:
|
|
command_buffer.extend(payload)
|
|
# If buffer gets too big (e.g. 5 seconds), cut it off
|
|
if len(command_buffer) > 16000 * 2 * 5:
|
|
print("Timeout. Sending...")
|
|
send_to_whisper(command_buffer)
|
|
is_command_mode = False
|
|
command_buffer = bytearray()
|
|
return
|
|
|
|
# 2. If NOT in command mode, feed OpenWakeWord
|
|
# OWW needs 1280 samples (2560 bytes) chunks usually
|
|
audio_buffer.extend(payload)
|
|
|
|
# Process in chunks
|
|
chunk_size = 1280 * 2 # 1280 samples * 2 bytes
|
|
while len(audio_buffer) >= chunk_size:
|
|
chunk = audio_buffer[:chunk_size]
|
|
audio_buffer = audio_buffer[chunk_size:]
|
|
|
|
# Convert to numpy for OWW
|
|
audio_int16 = np.frombuffer(chunk, dtype=np.int16)
|
|
|
|
# Predict
|
|
prediction = owwModel.predict(audio_int16)
|
|
|
|
# Check score (0.0 to 1.0)
|
|
if prediction[WAKE_WORD_MODEL] > 0.5:
|
|
print(f"WAKE WORD DETECTED: {WAKE_WORD_MODEL}")
|
|
is_command_mode = True
|
|
command_buffer = bytearray() # Start fresh for command
|
|
# Optional: Send "Awake" LED command to ESP32 here
|
|
|
|
elif msg.topic == "homeassistant/voice/status" and msg.payload.decode() == "processing":
|
|
# ESP32 finished its VAD stream.
|
|
if is_command_mode:
|
|
send_to_whisper(command_buffer)
|
|
is_command_mode = False
|
|
command_buffer = bytearray()
|
|
|
|
client = mqtt.Client()
|
|
client.username_pw_set(MQTT_USER, MQTT_PASS)
|
|
client.connect(MQTT_BROKER, 1883)
|
|
client.subscribe([(TOPIC_AUDIO, 0), ("homeassistant/voice/status", 0)])
|
|
client.loop_forever()
|