Claw - AI Now Inc 1662bc141a Initial commit: Bilingual Voice Assistant for Google AIY Voice Kit V1
Features:
- Bilingual support (English/Mandarin Chinese)
- Hotword detection: 'Hey Osiris' / '你好 Osiris'
- Music playback control (MP3, WAV, OGG, FLAC)
- OpenClaw integration for AI responses
- Google AIY Voice Kit V1 compatible
- Text-to-speech in both languages
- Voice command recognition
- Raspberry Pi ready with installation script

AI Now Inc - Del Mar Demo Unit 🏭
2026-03-01 00:02:49 -08:00

254 lines
8.7 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Bilingual Voice Assistant Core
Main logic for processing voice commands and generating responses.
"""
import os
import json
import logging
import random
from typing import Optional, Dict, List, Tuple
from pathlib import Path
from datetime import datetime
from speech_recognizer import BilingualSpeechRecognizer
from music_player import MusicPlayer
from openclaw_client import OpenClawClient
logger = logging.getLogger(__name__)
class VoiceAssistant:
"""
Main assistant class coordinating speech recognition,
command processing, and responses.
"""
def __init__(self, config_path: str = "config.json"):
self.config_path = config_path
self.config = self._load_config(config_path)
# Initialize components
self.speech_recognizer = BilingualSpeechRecognizer(config_path)
self.music_player = MusicPlayer(config_path)
self.openclaw_client = OpenClawClient(config_path)
# Command patterns
self.music_commands = [
"play", "pause", "resume", "stop", "next", "previous",
"volume", "shuffle", "repeat"
]
self.chinese_music_commands = [
"播放", "暂停", "继续", "停止", "下一首", "上一首",
"音量", "随机", "重复"
]
logger.info("VoiceAssistant initialized")
def _load_config(self, config_path: str) -> dict:
"""Load configuration."""
try:
with open(config_path, 'r') as f:
return json.load(f)
except FileNotFoundError:
return {}
def process_command(self, text: str, language: str = "en") -> Tuple[str, str]:
"""
Process a voice command and return response.
Args:
text: Recognized text
language: Detected language ('en' or 'zh')
Returns:
Tuple of (response_text, response_language)
"""
text_lower = text.lower()
# Music commands
if self._is_music_command(text_lower, language):
return self._handle_music_command(text_lower, language)
# Time query
if any(word in text_lower for word in ["what time", "time is it", "几点", "时间"]):
return self._get_time(language)
# Greeting
if any(word in text_lower for word in ["hello", "hi", "hey", "你好", "您好"]):
return self._get_greeting(language)
# OpenClaw query
if "ask claw" in text_lower or "问 claw" in text_lower:
# Extract the actual question
question = text_lower.replace("ask claw", "").replace("问 claw", "").strip()
return self._ask_openclaw(question, language)
# Default: ask OpenClaw
return self._ask_openclaw(text, language)
def _is_music_command(self, text: str, language: str) -> bool:
"""Check if text is a music command."""
if language == "en":
return any(cmd in text for cmd in self.music_commands)
else:
return any(cmd in text for cmd in self.chinese_music_commands)
def _handle_music_command(self, text: str, language: str) -> Tuple[str, str]:
"""Handle music playback commands."""
# Play command
if "play" in text or "播放" in text:
# Extract song name if specified
song_name = self._extract_song_name(text)
if song_name:
matches = self.music_player.search_tracks(song_name)
if matches:
self.music_player.play(matches[0])
return (f"Playing {matches[0].name}",
"en" if language == "en" else "zh")
else:
return ("Song not found",
"en" if language == "en" else "zh")
else:
# Play random track
if self.music_player.music_library:
first_track = list(self.music_player.music_library.values())[0]
self.music_player.play(first_track)
return ("Playing music",
"en" if language == "en" else "zh")
# Pause
elif "pause" in text or "暂停" in text:
self.music_player.pause()
return ("Paused", "en" if language == "en" else "zh")
# Resume
elif "resume" in text or "继续" in text:
self.music_player.resume()
return ("Resumed", "en" if language == "en" else "zh")
# Stop
elif "stop" in text or "停止" in text:
self.music_player.stop()
return ("Stopped", "en" if language == "en" else "zh")
# Next
elif "next" in text or "下一首" in text:
self.music_player.next()
return ("Next track", "en" if language == "en" else "zh")
# Volume
elif "volume" in text or "音量" in text:
if "up" in text or "" in text:
self.music_player.set_volume(self.music_player.volume + 0.1)
elif "down" in text or "" in text:
self.music_player.set_volume(self.music_player.volume - 0.1)
return ("Volume adjusted", "en" if language == "en" else "zh")
return ("Command not recognized", "en" if language == "en" else "zh")
def _extract_song_name(self, text: str) -> Optional[str]:
"""Extract song name from command."""
# Simple implementation - look for text after "play"
if "play" in text:
parts = text.split("play", 1)
if len(parts) > 1:
return parts[1].strip()
if "播放" in text:
parts = text.split("播放", 1)
if len(parts) > 1:
return parts[1].strip()
return None
def _get_time(self, language: str) -> Tuple[str, str]:
"""Get current time response."""
now = datetime.now()
if language == "zh":
return (f"现在时间是 {now.strftime('%H点%M分')}", "zh")
else:
return (f"The current time is {now.strftime('%I:%M %p')}", "en")
def _get_greeting(self, language: str) -> Tuple[str, str]:
"""Get greeting response."""
greetings_en = [
"Hello! How can I help you?",
"Hi there! What can I do for you?",
"Hey! Ready to assist you."
]
greetings_zh = [
"你好!有什么可以帮你的吗?",
"您好!需要什么帮助?",
"嗨!随时为您服务。"
]
if language == "zh":
return (random.choice(greetings_zh), "zh")
else:
return (random.choice(greetings_en), "en")
def _ask_openclaw(self, question: str, language: str) -> Tuple[str, str]:
"""Send question to OpenClaw and get response."""
if not self.openclaw_client.enabled:
if language == "zh":
return ("OpenClaw 未启用", "zh")
else:
return ("OpenClaw is not enabled", "en")
# Add context about language preference
context = {"preferred_language": language}
response = self.openclaw_client.send_request(question, context)
if "error" in response:
if language == "zh":
return ("抱歉,暂时无法回答", "zh")
else:
return ("Sorry, I can't answer that right now", "en")
# Extract response text
response_text = response.get("response", str(response))
# Detect response language
response_lang = language # Assume same language
if any('\u4e00' <= char <= '\u9fff' for char in response_text):
response_lang = "zh"
return (response_text, response_lang)
def get_status(self) -> Dict:
"""Get assistant status."""
return {
"speech_recognizer": "active",
"music_player": self.music_player.get_status(),
"openclaw": self.openclaw_client.get_status()
}
def main():
"""Test the assistant."""
assistant = VoiceAssistant()
# Test commands
test_commands = [
("hello", "en"),
("what time is it", "en"),
("play music", "en"),
("你好", "zh"),
("现在几点", "zh"),
("播放音乐", "zh")
]
for text, lang in test_commands:
response, resp_lang = assistant.process_command(text, lang)
print(f"Input: {text} ({lang})")
print(f"Output: {response} ({resp_lang})")
print("-" * 40)
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
main()