Package videosdk.plugins.smallestai

Sub-modules

videosdk.plugins.smallestai.tts

Classes

class SmallestAITTS (*,
api_key: str | None = None,
model: str = 'lightning',
voice_id: str = 'emily',
speed: float = 1.0,
consistency: float = 0.5,
similarity: float = 0.0,
enhancement: bool = False)
Expand source code
class SmallestAITTS(TTS):
    def __init__(
        self,
        *,
        api_key: str | None = None,
        model: str = DEFAULT_MODEL,
        voice_id: str = DEFAULT_VOICE_ID,
        speed: float = 1.0,
        consistency: float = 0.5,
        similarity: float = 0.0,
        enhancement: bool = False,
    ) -> None:
        """Initialize the SmallestAI TTS plugin.

        Args:
            api_key (Optional[str], optional): SmallestAI API key. Defaults to None.
            model (str): The model to use for the TTS plugin. Defaults to "lightning".
            voice_id (str): The voice ID to use for the TTS plugin. Defaults to "emily".
            speed (float): The speed to use for the TTS plugin. Defaults to 1.0.
            consistency (float): The consistency to use for the TTS plugin. Defaults to 0.5.
            similarity (float): The similarity to use for the TTS plugin. Defaults to 0.0.
            enhancement (bool): Whether to enable enhancement for the TTS plugin. Defaults to False.
        """
        super().__init__(
            sample_rate=SMALLESTAI_SAMPLE_RATE, num_channels=SMALLESTAI_CHANNELS
        )

        self.model = model
        self.voice_id = voice_id
        self.speed = speed
        self.consistency = consistency
        self.similarity = similarity
        self.enhancement = enhancement

        self.audio_track = None
        self.loop = None
        self._first_chunk_sent = False

        self.api_key = api_key or os.getenv("SMALLEST_API_KEY")
        if not self.api_key:
            raise ValueError(
                "SmallestAI API key required. Provide either:\n"
                "1. api_key parameter, OR\n"
                "2. SMALLEST_API_KEY environment variable"
            )

        try:
            from smallestai.waves import AsyncWavesClient
        except ImportError:
            raise ImportError(
                "SmallestAI package not found. Install it with: pip install smallestai"
            )

        self._client = AsyncWavesClient(
            api_key=self.api_key,
            model=self.model,
            sample_rate=SMALLESTAI_SAMPLE_RATE,
            voice_id=self.voice_id,
            speed=self.speed,
            consistency=self.consistency,
            similarity=self.similarity,
            enhancement=self.enhancement
        )

    def reset_first_audio_tracking(self) -> None:
        """Reset the first audio tracking state for next TTS task"""
        self._first_chunk_sent = False

    async def synthesize(
        self,
        text: AsyncIterator[str] | str,
        voice_id: Optional[str] = None,
        **kwargs: Any,
    ) -> None:
        try:
            if isinstance(text, AsyncIterator):
                async for segment in segment_text(text):
                    await self._synthesize_audio(segment, voice_id or self.voice_id, **kwargs)
            else:
                await self._synthesize_audio(text, voice_id or self.voice_id, **kwargs)

            if not self.audio_track or not self.loop:
                self.emit("error", "Audio track or event loop not set")
                return

        except Exception as e:
            self.emit("error", f"SmallestAI TTS synthesis failed: {str(e)}")

    async def _synthesize_audio(self, text: str, voice_id: str, **kwargs: Any) -> None:
        """Synthesize text to speech using SmallestAI API"""
        try:
            synthesis_kwargs = {
                "voice_id": voice_id,
                "speed": kwargs.get("speed", self.speed),
                "consistency": kwargs.get("consistency", self.consistency),
                "similarity": kwargs.get("similarity", self.similarity),
                "enhancement": kwargs.get("enhancement", self.enhancement),
                "sample_rate": kwargs.get("sample_rate", SMALLESTAI_SAMPLE_RATE),
            }

            async with self._client as tts:
                audio_bytes = await tts.synthesize(text, **synthesis_kwargs)

                if not audio_bytes:
                    self.emit("error", "No audio data received from SmallestAI")
                    return

                asyncio.create_task(self._stream_audio_chunks(audio_bytes))

        except Exception as e:
            self.emit("error", f"SmallestAI synthesis failed: {str(e)}")
            raise

    async def _stream_audio_chunks(self, audio_bytes: bytes) -> None:
        """Stream audio data in chunks to ensure smooth playback"""
        chunk_size = int(SMALLESTAI_SAMPLE_RATE *
                         SMALLESTAI_CHANNELS * 2 * 20 / 1000)

        for i in range(0, len(audio_bytes), chunk_size):
            chunk = audio_bytes[i:i + chunk_size]

            if len(chunk) < chunk_size and len(chunk) > 0:
                padding_needed = chunk_size - len(chunk)
                chunk += b'\x00' * padding_needed

            if len(chunk) == chunk_size:
                if not self._first_chunk_sent and self._first_audio_callback:
                    self._first_chunk_sent = True
                    self.loop.create_task(self._first_audio_callback())

                asyncio.create_task(self.audio_track.add_new_bytes(chunk))
                await asyncio.sleep(0.001)

    def _remove_wav_header(self, audio_bytes: bytes) -> bytes:
        """Remove WAV header if present to get raw PCM data"""
        if audio_bytes.startswith(b'RIFF'):

            data_pos = audio_bytes.find(b'data')
            if data_pos != -1:

                return audio_bytes[data_pos + 8:]

        return audio_bytes

    async def aclose(self) -> None:
        """Cleanup resources"""
        if hasattr(self, "_client"):
            try:

                if hasattr(self._client, 'aclose'):
                    await self._client.aclose()
            except Exception:
                pass
        await super().aclose()

    async def interrupt(self) -> None:
        """Interrupt the TTS process"""
        if self.audio_track:
            self.audio_track.interrupt()

Base class for Text-to-Speech implementations

Initialize the SmallestAI TTS plugin.

Args

api_key : Optional[str], optional
SmallestAI API key. Defaults to None.
model : str
The model to use for the TTS plugin. Defaults to "lightning".
voice_id : str
The voice ID to use for the TTS plugin. Defaults to "emily".
speed : float
The speed to use for the TTS plugin. Defaults to 1.0.
consistency : float
The consistency to use for the TTS plugin. Defaults to 0.5.
similarity : float
The similarity to use for the TTS plugin. Defaults to 0.0.
enhancement : bool
Whether to enable enhancement for the TTS plugin. Defaults to False.

Ancestors

  • videosdk.agents.tts.tts.TTS
  • videosdk.agents.event_emitter.EventEmitter
  • typing.Generic

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    """Cleanup resources"""
    if hasattr(self, "_client"):
        try:

            if hasattr(self._client, 'aclose'):
                await self._client.aclose()
        except Exception:
            pass
    await super().aclose()

Cleanup resources

async def interrupt(self) ‑> None
Expand source code
async def interrupt(self) -> None:
    """Interrupt the TTS process"""
    if self.audio_track:
        self.audio_track.interrupt()

Interrupt the TTS process

def reset_first_audio_tracking(self) ‑> None
Expand source code
def reset_first_audio_tracking(self) -> None:
    """Reset the first audio tracking state for next TTS task"""
    self._first_chunk_sent = False

Reset the first audio tracking state for next TTS task

async def synthesize(self,
text: AsyncIterator[str] | str,
voice_id: Optional[str] = None,
**kwargs: Any) ‑> None
Expand source code
async def synthesize(
    self,
    text: AsyncIterator[str] | str,
    voice_id: Optional[str] = None,
    **kwargs: Any,
) -> None:
    try:
        if isinstance(text, AsyncIterator):
            async for segment in segment_text(text):
                await self._synthesize_audio(segment, voice_id or self.voice_id, **kwargs)
        else:
            await self._synthesize_audio(text, voice_id or self.voice_id, **kwargs)

        if not self.audio_track or not self.loop:
            self.emit("error", "Audio track or event loop not set")
            return

    except Exception as e:
        self.emit("error", f"SmallestAI TTS synthesis failed: {str(e)}")

Convert text to speech

Args

text
Text to convert to speech (either string or async iterator of strings)
voice_id
Optional voice identifier
**kwargs
Additional provider-specific arguments

Returns

None