#!/usr/bin/env python3 """ Hotword Detector Detects wake words: "Hey Osiris" / "你好 Osiris" Supports: - Porcupine (PicoVoice) for accurate hotword detection - Custom keyword spotting - Bilingual support (English/Mandarin) """ import os import json import logging import struct import wave from typing import Optional, Callable, List from pathlib import Path try: import pvporcupine import pyaudio HAS_PORCUPINE = True except ImportError: HAS_PORCUPINE = False logging.warning("Porcupine not installed. Install with: pip install pvporcupine") try: import webrtcvad HAS_VAD = True except ImportError: HAS_VAD = False logging.warning("WebRTC VAD not installed") logger = logging.getLogger(__name__) class HotwordDetector: """ Hotword detection with support for "Hey Osiris" in English and Mandarin. """ def __init__(self, config_path: str = "hotword_config.json"): self.config = self._load_config(config_path) self.audio_config = self.config.get("audio", { "sample_rate": 16000, "frame_length": 512 }) self.hotwords = self.config.get("hotwords", []) self.is_running = False self.callback = None # Porcupine setup self.porcupine = None self.keyword_index = -1 if HAS_PORCUPINE: self._init_porcupine() # VAD setup self.vad = None if HAS_VAD: self.vad = webrtcvad.Vad(2) # Aggressiveness level 2 logger.info(f"HotwordDetector initialized (Porcupine: {HAS_PORCUPINE})") def _load_config(self, config_path: str) -> dict: """Load configuration.""" try: with open(config_path, 'r') as f: return json.load(f) except FileNotFoundError: return { "hotwords": [ { "keyword": "hey osiris", "keyword_zh": "你好 osiris", "sensitivity": 0.5 } ], "audio": { "sample_rate": 16000, "frame_length": 512 } } def _init_porcupine(self): """Initialize Porcupine hotword detection.""" if not HAS_PORCUPINE: return try: # Create Porcupine instance with custom keywords self.porcupine = pvporcupine.create( keywords=["hey osiris"], sensitivities=[0.5] ) self.keyword_index = 0 logger.info("Porcupine initialized with 'Hey Osiris'") except Exception as e: logger.warning(f"Porcupine initialization failed: {e}") self.porcupine = None def set_callback(self, callback: Callable[[], None]): """Set callback function for when hotword is detected.""" self.callback = callback def detect(self, timeout: int = None) -> Optional[str]: """ Start detection and wait for hotword. Args: timeout: Maximum time to wait in seconds (None = infinite) Returns: Detected hotword or None """ if not self.porcupine: logger.warning("Porcupine not available, using simple detection") return self._simple_detect(timeout) return self._porcupine_detect(timeout) def _porcupine_detect(self, timeout: int = None) -> Optional[str]: """Detect using Porcupine.""" if not self.porcupine: return None import pyaudio pa = pyaudio.PyAudio() try: # Open audio stream stream = pa.open( rate=self.porcupine.sample_rate, channels=1, format=pyaudio.paInt16, input=True, frames_per_buffer=self.porcupine.frame_length ) logger.info("Listening for 'Hey Osiris'...") self.is_running = True start_time = None if timeout: import time start_time = time.time() while self.is_running: # Check timeout if timeout and start_time: if time.time() - start_time > timeout: logger.info("Hotword detection timeout") break # Read audio frame pcm = stream.read(self.porcupine.frame_length, exception_on_overflow=False) pcm = struct.unpack_from( f"h{self.porcupine.frame_length}", pcm ) # Process frame keyword_index = self.porcupine.process(pcm) if keyword_index >= 0: logger.info("Hotword detected!") if self.callback: self.callback() return "hey osiris" except KeyboardInterrupt: logger.info("Detection interrupted") except Exception as e: logger.error(f"Detection error: {e}") finally: stream.close() pa.terminate() self.is_running = False return None def _simple_detect(self, timeout: int = None) -> Optional[str]: """ Simple voice activity detection (fallback). Detects any speech as hotword. """ logger.warning("Using simple voice detection (not recommended)") # This is a placeholder - in production you'd use: # - Snowboy # - Custom trained model # - Or just use Porcupine return None def stop(self): """Stop detection.""" self.is_running = False logger.info("Hotword detection stopped") def create_custom_hotword(self, keyword: str, output_path: str): """ Create custom hotword model (requires Porcupine training). This is a placeholder - actual implementation requires: 1. Recording multiple samples of the keyword 2. Training with Porcupine Console 3. Exporting the model """ logger.info(f"Custom hotword creation not implemented: {keyword}") logger.info("Use Porcupine Console to train custom keywords") class SimpleHotwordDetector: """ Simple hotword detection using audio level threshold. Fallback when Porcupine is not available. """ def __init__(self, keyword: str = "hey osiris"): self.keyword = keyword self.threshold = 0.5 self.is_running = False def detect(self, timeout: int = None) -> Optional[str]: """Simple energy-based detection.""" logger.warning("Simple detection is not reliable. Install Porcupine for best results.") return None def main(): """Test hotword detection.""" print("\n" + "="*60) print(" 🔍 Hotword Detector Test") print(" Say 'Hey Osiris' or '你好 Osiris'") print("="*60) detector = HotwordDetector() def on_hotword(): print("\n🎉 HOTWORD DETECTED!") detector.set_callback(on_hotword) try: result = detector.detect(timeout=30) if result: print(f"Detected: {result}") else: print("No hotword detected") except KeyboardInterrupt: print("\nTest stopped") detector.stop() if __name__ == "__main__": logging.basicConfig(level=logging.INFO) main()