#!/usr/bin/env python3 """ Bilingual Speech Recognizer Supports English and Mandarin Chinese with automatic language detection. """ import os import json import logging from typing import Optional, Tuple from pathlib import Path try: import aiy.voice from aiy import speech HAS_AIY = True except ImportError: HAS_AIY = False try: from google.cloud import speech as speech_service HAS_GOOGLE_CLOUD = True except ImportError: HAS_GOOGLE_CLOUD = False try: from langdetect import detect HAS_LANG_DETECT = True except ImportError: HAS_LANG_DETECT = False logger = logging.getLogger(__name__) class BilingualSpeechRecognizer: """ Speech recognizer with automatic English/Mandarin detection. """ def __init__(self, config_path: str = "config.json"): self.config = self._load_config(config_path) self.language_cache = {} if HAS_AIY: self.aiy_recognizer = speech.Recognizer() else: self.aiy_recognizer = None logger.info("BilingualSpeechRecognizer initialized") def _load_config(self, config_path: str) -> dict: """Load configuration from JSON file.""" try: with open(config_path, 'r') as f: return json.load(f) except FileNotFoundError: logger.warning(f"Config file {config_path} not found, using defaults") return { "speech": { "language": "auto", "recognition_timeout": 5 } } def recognize(self, audio_data: bytes, timeout: Optional[int] = None) -> Tuple[Optional[str], str]: """ Recognize speech from audio data. Args: audio_data: Raw audio bytes timeout: Recognition timeout in seconds Returns: Tuple of (recognized_text, detected_language) """ if timeout is None: timeout = self.config.get("speech", {}).get("recognition_timeout", 5) # Try Google Cloud Speech first (if available) if HAS_GOOGLE_CLOUD and self.config.get("speech", {}).get("offline_mode", False) is False: try: text = self._google_cloud_recognize(audio_data) if text: lang = self._detect_language(text) return text, lang except Exception as e: logger.warning(f"Google Cloud recognition failed: {e}") # Fall back to AIY/local recognition if self.aiy_recognizer: try: text = self._aiy_recognize(audio_data) if text: lang = self._detect_language(text) return text, lang except Exception as e: logger.warning(f"AIY recognition failed: {e}") # Last resort: simple language detection from text return None, "unknown" def _google_cloud_recognize(self, audio_data: bytes) -> Optional[str]: """Use Google Cloud Speech-to-Text for recognition.""" if not HAS_GOOGLE_CLOUD: return None client = speech_service.SpeechClient() # Try bilingual recognition config = speech_service.RecognitionConfig( encoding=speech_service.RecognitionConfig.AudioEncoding.LINEAR16, sample_rate_hertz=16000, language_codes=["en-US", "zh-CN", "zh-TW"], enable_automatic_punctuation=True, ) response = client.recognize( config=config, audio=speech_service.RecognitionAudio(content=audio_data) ) if response.results: result = response.results[0] if result.alternatives: return result.alternatives[0].transcript return None def _aiy_recognize(self, audio_data: bytes) -> Optional[str]: """Use AIY Voice Kit for recognition.""" if not self.aiy_recognizer: return None try: # AIY uses Google's speech recognition internally recognizer = self.aiy_recognizer # This is a simplified version - actual implementation depends on AIY version return None except Exception as e: logger.error(f"AIY recognition error: {e}") return None def _detect_language(self, text: str) -> str: """ Detect if text is English or Chinese. Returns: 'en' for English, 'zh' for Chinese, 'unknown' otherwise """ if not text: return "unknown" # Simple heuristic: check for Chinese characters chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff') if chinese_chars > len(text) * 0.3: # 30% Chinese characters return "zh" # Use langdetect if available if HAS_LANG_DETECT: try: detected = detect(text) if detected in ["zh-cn", "zh-tw", "zh"]: return "zh" elif detected in ["en", "en-us", "en-gb"]: return "en" except: pass # Default to English return "en" def listen_for_hotword(self, callback) -> None: """ Listen for hotword activation. Args: callback: Function to call when hotword detected """ if not HAS_AIY: logger.warning("AIY not available, hotword detection disabled") return # Implementation depends on AIY version # This is a placeholder for the actual hotword detection logger.info("Hotword detection enabled") def main(): """Test the speech recognizer.""" recognizer = BilingualSpeechRecognizer() # Test language detection test_texts = [ "Hello, how are you?", "你好,你好吗?", "Play some music", "播放音乐" ] for text in test_texts: lang = recognizer._detect_language(text) print(f"'{text}' -> Language: {lang}") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) main()