#!/usr/bin/env python3 """ Bilingual Voice Assistant Core Main logic for processing voice commands and generating responses. """ import os import json import logging import random from typing import Optional, Dict, List, Tuple from pathlib import Path from datetime import datetime from speech_recognizer import BilingualSpeechRecognizer from music_player import MusicPlayer from openclaw_client import OpenClawClient logger = logging.getLogger(__name__) class VoiceAssistant: """ Main assistant class coordinating speech recognition, command processing, and responses. """ def __init__(self, config_path: str = "config.json"): self.config_path = config_path self.config = self._load_config(config_path) # Initialize components self.speech_recognizer = BilingualSpeechRecognizer(config_path) self.music_player = MusicPlayer(config_path) self.openclaw_client = OpenClawClient(config_path) # Command patterns self.music_commands = [ "play", "pause", "resume", "stop", "next", "previous", "volume", "shuffle", "repeat" ] self.chinese_music_commands = [ "播放", "暂停", "继续", "停止", "下一首", "上一首", "音量", "随机", "重复" ] logger.info("VoiceAssistant initialized") def _load_config(self, config_path: str) -> dict: """Load configuration.""" try: with open(config_path, 'r') as f: return json.load(f) except FileNotFoundError: return {} def process_command(self, text: str, language: str = "en") -> Tuple[str, str]: """ Process a voice command and return response. Args: text: Recognized text language: Detected language ('en' or 'zh') Returns: Tuple of (response_text, response_language) """ text_lower = text.lower() # Music commands if self._is_music_command(text_lower, language): return self._handle_music_command(text_lower, language) # Time query if any(word in text_lower for word in ["what time", "time is it", "几点", "时间"]): return self._get_time(language) # Greeting if any(word in text_lower for word in ["hello", "hi", "hey", "你好", "您好"]): return self._get_greeting(language) # OpenClaw query if "ask claw" in text_lower or "问 claw" in text_lower: # Extract the actual question question = text_lower.replace("ask claw", "").replace("问 claw", "").strip() return self._ask_openclaw(question, language) # Default: ask OpenClaw return self._ask_openclaw(text, language) def _is_music_command(self, text: str, language: str) -> bool: """Check if text is a music command.""" if language == "en": return any(cmd in text for cmd in self.music_commands) else: return any(cmd in text for cmd in self.chinese_music_commands) def _handle_music_command(self, text: str, language: str) -> Tuple[str, str]: """Handle music playback commands.""" # Play command if "play" in text or "播放" in text: # Extract song name if specified song_name = self._extract_song_name(text) if song_name: matches = self.music_player.search_tracks(song_name) if matches: self.music_player.play(matches[0]) return (f"Playing {matches[0].name}", "en" if language == "en" else "zh") else: return ("Song not found", "en" if language == "en" else "zh") else: # Play random track if self.music_player.music_library: first_track = list(self.music_player.music_library.values())[0] self.music_player.play(first_track) return ("Playing music", "en" if language == "en" else "zh") # Pause elif "pause" in text or "暂停" in text: self.music_player.pause() return ("Paused", "en" if language == "en" else "zh") # Resume elif "resume" in text or "继续" in text: self.music_player.resume() return ("Resumed", "en" if language == "en" else "zh") # Stop elif "stop" in text or "停止" in text: self.music_player.stop() return ("Stopped", "en" if language == "en" else "zh") # Next elif "next" in text or "下一首" in text: self.music_player.next() return ("Next track", "en" if language == "en" else "zh") # Volume elif "volume" in text or "音量" in text: if "up" in text or "大" in text: self.music_player.set_volume(self.music_player.volume + 0.1) elif "down" in text or "小" in text: self.music_player.set_volume(self.music_player.volume - 0.1) return ("Volume adjusted", "en" if language == "en" else "zh") return ("Command not recognized", "en" if language == "en" else "zh") def _extract_song_name(self, text: str) -> Optional[str]: """Extract song name from command.""" # Simple implementation - look for text after "play" if "play" in text: parts = text.split("play", 1) if len(parts) > 1: return parts[1].strip() if "播放" in text: parts = text.split("播放", 1) if len(parts) > 1: return parts[1].strip() return None def _get_time(self, language: str) -> Tuple[str, str]: """Get current time response.""" now = datetime.now() if language == "zh": return (f"现在时间是 {now.strftime('%H点%M分')}", "zh") else: return (f"The current time is {now.strftime('%I:%M %p')}", "en") def _get_greeting(self, language: str) -> Tuple[str, str]: """Get greeting response.""" greetings_en = [ "Hello! How can I help you?", "Hi there! What can I do for you?", "Hey! Ready to assist you." ] greetings_zh = [ "你好!有什么可以帮你的吗?", "您好!需要什么帮助?", "嗨!随时为您服务。" ] if language == "zh": return (random.choice(greetings_zh), "zh") else: return (random.choice(greetings_en), "en") def _ask_openclaw(self, question: str, language: str) -> Tuple[str, str]: """Send question to OpenClaw and get response.""" if not self.openclaw_client.enabled: if language == "zh": return ("OpenClaw 未启用", "zh") else: return ("OpenClaw is not enabled", "en") # Add context about language preference context = {"preferred_language": language} response = self.openclaw_client.send_request(question, context) if "error" in response: if language == "zh": return ("抱歉,暂时无法回答", "zh") else: return ("Sorry, I can't answer that right now", "en") # Extract response text response_text = response.get("response", str(response)) # Detect response language response_lang = language # Assume same language if any('\u4e00' <= char <= '\u9fff' for char in response_text): response_lang = "zh" return (response_text, response_lang) def get_status(self) -> Dict: """Get assistant status.""" return { "speech_recognizer": "active", "music_player": self.music_player.get_status(), "openclaw": self.openclaw_client.get_status() } def main(): """Test the assistant.""" assistant = VoiceAssistant() # Test commands test_commands = [ ("hello", "en"), ("what time is it", "en"), ("play music", "en"), ("你好", "zh"), ("现在几点", "zh"), ("播放音乐", "zh") ] for text, lang in test_commands: response, resp_lang = assistant.process_command(text, lang) print(f"Input: {text} ({lang})") print(f"Output: {response} ({resp_lang})") print("-" * 40) if __name__ == "__main__": logging.basicConfig(level=logging.INFO) main()