273 lines
9.4 KiB
Python
Executable File
273 lines
9.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Bilingual Voice Assistant Core
|
|
Main logic for processing voice commands and generating responses.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import logging
|
|
import random
|
|
from typing import Optional, Dict, List, Tuple
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
from speech_recognizer import BilingualSpeechRecognizer
|
|
from music_player import MusicPlayer
|
|
from openclaw_client import OpenClawClient
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Emoji mappings for different response types
|
|
EMOJIS = {
|
|
'greeting': '👋',
|
|
'music_play': '🎵',
|
|
'music_pause': '⏸️ ',
|
|
'music_resume': '▶️ ',
|
|
'music_stop': '⏹️ ',
|
|
'music_next': '⏭️ ',
|
|
'music_volume': '🔊',
|
|
'time': '🕐',
|
|
'question': '🤔',
|
|
'answer': '💬',
|
|
'error': '⚠️ ',
|
|
'success': '✅',
|
|
'thinking': '💭',
|
|
'openclaw': '🏭',
|
|
}
|
|
|
|
|
|
class VoiceAssistant:
|
|
"""
|
|
Main assistant class coordinating speech recognition,
|
|
command processing, and responses.
|
|
"""
|
|
|
|
def __init__(self, config_path: str = "config.json"):
|
|
self.config_path = config_path
|
|
self.config = self._load_config(config_path)
|
|
|
|
# Initialize components
|
|
self.speech_recognizer = BilingualSpeechRecognizer(config_path)
|
|
self.music_player = MusicPlayer(config_path)
|
|
self.openclaw_client = OpenClawClient(config_path)
|
|
|
|
# Command patterns
|
|
self.music_commands = [
|
|
"play", "pause", "resume", "stop", "next", "previous",
|
|
"volume", "shuffle", "repeat"
|
|
]
|
|
|
|
self.chinese_music_commands = [
|
|
"播放", "暂停", "继续", "停止", "下一首", "上一首",
|
|
"音量", "随机", "重复"
|
|
]
|
|
|
|
logger.info("VoiceAssistant initialized")
|
|
|
|
def _load_config(self, config_path: str) -> dict:
|
|
"""Load configuration."""
|
|
try:
|
|
with open(config_path, 'r') as f:
|
|
return json.load(f)
|
|
except FileNotFoundError:
|
|
return {}
|
|
|
|
def process_command(self, text: str, language: str = "en") -> Tuple[str, str]:
|
|
"""
|
|
Process a voice command and return response.
|
|
|
|
Args:
|
|
text: Recognized text
|
|
language: Detected language ('en' or 'zh')
|
|
|
|
Returns:
|
|
Tuple of (response_text, response_language)
|
|
"""
|
|
text_lower = text.lower()
|
|
|
|
# Music commands
|
|
if self._is_music_command(text_lower, language):
|
|
return self._handle_music_command(text_lower, language)
|
|
|
|
# Time query
|
|
if any(word in text_lower for word in ["what time", "time is it", "几点", "时间"]):
|
|
return self._get_time(language)
|
|
|
|
# Greeting
|
|
if any(word in text_lower for word in ["hello", "hi", "hey", "你好", "您好"]):
|
|
return self._get_greeting(language)
|
|
|
|
# OpenClaw query
|
|
if "ask claw" in text_lower or "问 claw" in text_lower:
|
|
# Extract the actual question
|
|
question = text_lower.replace("ask claw", "").replace("问 claw", "").strip()
|
|
return self._ask_openclaw(question, language)
|
|
|
|
# Default: ask OpenClaw
|
|
return self._ask_openclaw(text, language)
|
|
|
|
def _is_music_command(self, text: str, language: str) -> bool:
|
|
"""Check if text is a music command."""
|
|
if language == "en":
|
|
return any(cmd in text for cmd in self.music_commands)
|
|
else:
|
|
return any(cmd in text for cmd in self.chinese_music_commands)
|
|
|
|
def _handle_music_command(self, text: str, language: str) -> Tuple[str, str]:
|
|
"""Handle music playback commands."""
|
|
|
|
# Play command
|
|
if "play" in text or "播放" in text:
|
|
# Extract song name if specified
|
|
song_name = self._extract_song_name(text)
|
|
if song_name:
|
|
matches = self.music_player.search_tracks(song_name)
|
|
if matches:
|
|
self.music_player.play(matches[0])
|
|
return (f"Playing {matches[0].name}",
|
|
"en" if language == "en" else "zh")
|
|
else:
|
|
return ("Song not found",
|
|
"en" if language == "en" else "zh")
|
|
else:
|
|
# Play random track
|
|
if self.music_player.music_library:
|
|
first_track = list(self.music_player.music_library.values())[0]
|
|
self.music_player.play(first_track)
|
|
return (f"{EMOJIS['music_play']} Playing music",
|
|
"en" if language == "en" else "zh")
|
|
|
|
# Pause
|
|
elif "pause" in text or "暂停" in text:
|
|
self.music_player.pause()
|
|
return (f"{EMOJIS['music_pause']} Paused", "en" if language == "en" else "zh")
|
|
|
|
# Resume
|
|
elif "resume" in text or "继续" in text:
|
|
self.music_player.resume()
|
|
return (f"{EMOJIS['music_resume']} Resumed", "en" if language == "en" else "zh")
|
|
|
|
# Stop
|
|
elif "stop" in text or "停止" in text:
|
|
self.music_player.stop()
|
|
return (f"{EMOJIS['music_stop']} Stopped", "en" if language == "en" else "zh")
|
|
|
|
# Next
|
|
elif "next" in text or "下一首" in text:
|
|
self.music_player.next()
|
|
return (f"{EMOJIS['music_next']} Next track", "en" if language == "en" else "zh")
|
|
|
|
# Volume
|
|
elif "volume" in text or "音量" in text:
|
|
if "up" in text or "大" in text:
|
|
self.music_player.set_volume(self.music_player.volume + 0.1)
|
|
elif "down" in text or "小" in text:
|
|
self.music_player.set_volume(self.music_player.volume - 0.1)
|
|
return (f"{EMOJIS['music_volume']} Volume adjusted", "en" if language == "en" else "zh")
|
|
|
|
return ("Command not recognized", "en" if language == "en" else "zh")
|
|
|
|
def _extract_song_name(self, text: str) -> Optional[str]:
|
|
"""Extract song name from command."""
|
|
# Simple implementation - look for text after "play"
|
|
if "play" in text:
|
|
parts = text.split("play", 1)
|
|
if len(parts) > 1:
|
|
return parts[1].strip()
|
|
if "播放" in text:
|
|
parts = text.split("播放", 1)
|
|
if len(parts) > 1:
|
|
return parts[1].strip()
|
|
return None
|
|
|
|
def _get_time(self, language: str) -> Tuple[str, str]:
|
|
"""Get current time response."""
|
|
now = datetime.now()
|
|
emoji = EMOJIS['time']
|
|
if language == "zh":
|
|
return (f"{emoji} 现在时间是 {now.strftime('%H点%M分')}", "zh")
|
|
else:
|
|
return (f"{emoji} The current time is {now.strftime('%I:%M %p')}", "en")
|
|
|
|
def _get_greeting(self, language: str) -> Tuple[str, str]:
|
|
"""Get greeting response."""
|
|
greetings_en = [
|
|
"Hello! How can I help you?",
|
|
"Hi there! What can I do for you?",
|
|
"Hey! Ready to assist you."
|
|
]
|
|
greetings_zh = [
|
|
"你好!有什么可以帮你的吗?",
|
|
"您好!需要什么帮助?",
|
|
"嗨!随时为您服务。"
|
|
]
|
|
|
|
if language == "zh":
|
|
return (random.choice(greetings_zh), "zh")
|
|
else:
|
|
return (random.choice(greetings_en), "en")
|
|
|
|
def _ask_openclaw(self, question: str, language: str) -> Tuple[str, str]:
|
|
"""Send question to OpenClaw and get response."""
|
|
if not self.openclaw_client.enabled:
|
|
if language == "zh":
|
|
return (f"{EMOJIS['openclaw']} OpenClaw 未启用", "zh")
|
|
else:
|
|
return (f"{EMOJIS['openclaw']} OpenClaw is not enabled", "en")
|
|
|
|
# Add context about language preference
|
|
context = {"preferred_language": language}
|
|
|
|
response = self.openclaw_client.send_request(question, context)
|
|
|
|
if "error" in response:
|
|
if language == "zh":
|
|
return (f"{EMOJIS['error']} 抱歉,暂时无法回答", "zh")
|
|
else:
|
|
return (f"{EMOJIS['error']} Sorry, I can't answer that right now", "en")
|
|
|
|
# Extract response text
|
|
response_text = response.get("response", str(response))
|
|
|
|
# Detect response language
|
|
response_lang = language # Assume same language
|
|
if any('\u4e00' <= char <= '\u9fff' for char in response_text):
|
|
response_lang = "zh"
|
|
|
|
return (response_text, response_lang)
|
|
|
|
def get_status(self) -> Dict:
|
|
"""Get assistant status."""
|
|
return {
|
|
"speech_recognizer": "active",
|
|
"music_player": self.music_player.get_status(),
|
|
"openclaw": self.openclaw_client.get_status()
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Test the assistant."""
|
|
assistant = VoiceAssistant()
|
|
|
|
# Test commands
|
|
test_commands = [
|
|
("hello", "en"),
|
|
("what time is it", "en"),
|
|
("play music", "en"),
|
|
("你好", "zh"),
|
|
("现在几点", "zh"),
|
|
("播放音乐", "zh")
|
|
]
|
|
|
|
for text, lang in test_commands:
|
|
response, resp_lang = assistant.process_command(text, lang)
|
|
print(f"Input: {text} ({lang})")
|
|
print(f"Output: {response} ({resp_lang})")
|
|
print("-" * 40)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
logging.basicConfig(level=logging.INFO)
|
|
main()
|