#!/usr/bin/env python3 """ Text-to-Speech Engine Supports English and Mandarin Chinese with Google Cloud TTS and offline alternatives. """ import os import json import logging from typing import Optional, List from pathlib import Path try: from google.cloud import texttospeech HAS_GOOGLE_CLOUD = True except ImportError: HAS_GOOGLE_CLOUD = False try: import pygame HAS_PYGAME = True except ImportError: HAS_PYGAME = False logger = logging.getLogger(__name__) class TTSEngine: """ Bilingual TTS engine supporting English and Mandarin Chinese. """ def __init__(self, config_path: str = "config.json"): self.config = self._load_config(config_path) # TTS configuration tts_config = self.config.get("tts", {}) self.english_voice = tts_config.get("english_voice", "en-US-Standard-A") self.chinese_voice = tts_config.get("chinese_voice", "zh-CN-Standard-A") self.speed = tts_config.get("speed", 1.0) self.pitch = tts_config.get("pitch", 0) # Initialize Google Cloud client if available self.client = None if HAS_GOOGLE_CLOUD and self.config.get("openclaw", {}).get("enabled", True): try: self.client = texttospeech.TextToSpeechClient() logger.info("Google Cloud TTS initialized") except Exception as e: logger.warning(f"Google Cloud TTS not available: {e}") # Initialize audio output if HAS_PYGAME: pygame.mixer.init() logger.info("TTSEngine initialized") def _load_config(self, config_path: str) -> dict: """Load configuration.""" try: with open(config_path, 'r') as f: return json.load(f) except FileNotFoundError: return {"tts": {}} def speak(self, text: str, language: str = "en") -> bool: """ Speak text in the specified language. Args: text: Text to speak language: 'en' for English, 'zh' for Chinese Returns: True if speech succeeded """ try: # Generate speech audio audio_data = self._synthesize(text, language) if audio_data: # Play audio return self._play_audio(audio_data) return False except Exception as e: logger.error(f"TTS error: {e}") return False def _synthesize(self, text: str, language: str) -> Optional[bytes]: """ Synthesize speech from text. Args: text: Text to synthesize language: Language code Returns: Audio data or None """ if self.client and HAS_GOOGLE_CLOUD: return self._google_synthesize(text, language) else: return self._offline_synthesize(text, language) def _google_synthesize(self, text: str, language: str) -> Optional[bytes]: """Use Google Cloud TTS.""" if not self.client: return None # Select voice based on language if language == "zh": voice_name = self.chinese_voice lang_code = "zh-CN" else: voice_name = self.english_voice lang_code = "en-US" # Configure synthesis voice = texttospeech.VoiceSelectionParams( language_code=lang_code, name=voice_name, ) audio_config = texttospeech.AudioConfig( audio_encoding=texttospeech.AudioEncoding.MP3, speaking_rate=self.speed, pitch=self.pitch, ) synthesis_input = texttospeech.SynthesisInput(text=text) # Perform synthesis response = self.client.synthesize_speech( request=texttospeech.SynthesizeSpeechRequest( input=synthesis_input, voice=voice, audio_config=audio_config, ) ) return response.audio_content def _offline_synthesize(self, text: str, language: str) -> Optional[bytes]: """ Offline TTS fallback (basic system TTS). This is a placeholder - in production, you'd use: - espeak for English - A Chinese TTS engine for Mandarin """ logger.warning("Using offline TTS (limited quality)") # Try system TTS try: if language == "zh": # Chinese TTS (if available) os.system(f'espeak -v zh "{text}" --stdout > /tmp/tts_output.wav') else: # English TTS os.system(f'espeak "{text}" --stdout > /tmp/tts_output.wav') # Read the file if os.path.exists('/tmp/tts_output.wav'): with open('/tmp/tts_output.wav', 'rb') as f: return f.read() except Exception as e: logger.error(f"Offline TTS failed: {e}") return None def _play_audio(self, audio_data: bytes) -> bool: """ Play audio data. Args: audio_data: Audio bytes (MP3 or WAV) Returns: True if playback succeeded """ if not HAS_PYGAME: logger.warning("Pygame not available for audio playback") return False try: # Save to temp file temp_path = "/tmp/tts_audio.mp3" with open(temp_path, 'wb') as f: f.write(audio_data) # Load and play pygame.mixer.music.load(temp_path) pygame.mixer.music.play() # Wait for completion while pygame.mixer.music.get_busy(): pygame.time.wait(100) return True except Exception as e: logger.error(f"Audio playback error: {e}") return False def speak_sync(self, text: str, language: str = "en", on_complete=None) -> bool: """ Synchronous speech with optional callback. Args: text: Text to speak language: Language code on_complete: Callback function when done Returns: True if speech succeeded """ result = self.speak(text, language) if on_complete: on_complete(result) return result def get_voices(self) -> List[dict]: """Get list of available voices.""" voices = [] if self.client and HAS_GOOGLE_CLOUD: try: response = self.client.list_voices() for voice in response.voices: voices.append({ "name": voice.name, "language": voice.language_codes, "gender": voice.ssml_gender }) except Exception as e: logger.error(f"Error listing voices: {e}") return voices def main(): """Test the TTS engine.""" tts = TTSEngine() # Test English print("Testing English TTS...") tts.speak("Hello! I am your voice assistant.", "en") # Test Chinese print("Testing Chinese TTS...") tts.speak("你好!我是你的语音助手。", "zh") # List available voices voices = tts.get_voices() print(f"\nAvailable voices: {len(voices)}") for voice in voices[:5]: # Show first 5 print(f" - {voice['name']} ({', '.join(voice['language'])})") if __name__ == "__main__": logging.basicConfig(level=logging.INFO) main()