Module `agents.stt.stt`

Classes

class STT

Expand source code

class STT(EventEmitter[Literal["error"]]):
    """Base class for Speech-to-Text implementations"""
    
    def __init__(
        self,
    ) -> None:
        super().__init__()
        self._label = f"{type(self).__module__}.{type(self).__name__}"
        self._transcript_callback: Optional[Callable[[STTResponse], Awaitable[None]]] = None
        
    @property
    def label(self) -> str:
        """Get the STT provider label"""
        return self._label

    def on_stt_transcript(self, callback: Callable[[STTResponse], Awaitable[None]]) -> None:
        """Set callback for receiving STT transcripts"""
        self._transcript_callback = callback

    @abstractmethod
    async def process_audio(
        self,
        audio_frames: bytes,
        language: Optional[str] = None,
        **kwargs: Any
    ) -> None:
        """
        Process audio frames and convert to text
        
        Args:
            audio_frames: Iterator of bytes to process
            language: Optional language code for recognition
            **kwargs: Additional provider-specific arguments
            
        Returns:
            AsyncIterator yielding STTResponse objects
        """
        raise NotImplementedError

    async def aclose(self) -> None:
        """Cleanup resources"""
        logger.info(f"Cleaning up STT: {self.label}")
        self._transcript_callback = None
        try:
            import gc
            gc.collect()
            logger.info(f"STT garbage collection completed: {self.label}")
        except Exception as e:
            logger.error(f"Error during STT garbage collection: {e}")
        
        logger.info(f"STT cleanup completed: {self.label}")
    
    async def __aenter__(self) -> STT:
        return self
        
    async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
        await self.aclose()

Base class for Speech-to-Text implementations

Ancestors

EventEmitter
typing.Generic

Instance variables

prop label : str

Expand source code

@property
def label(self) -> str:
    """Get the STT provider label"""
    return self._label

Get the STT provider label

Methods

async def aclose(self) ‑> None

Expand source code

async def aclose(self) -> None:
    """Cleanup resources"""
    logger.info(f"Cleaning up STT: {self.label}")
    self._transcript_callback = None
    try:
        import gc
        gc.collect()
        logger.info(f"STT garbage collection completed: {self.label}")
    except Exception as e:
        logger.error(f"Error during STT garbage collection: {e}")
    
    logger.info(f"STT cleanup completed: {self.label}")

Cleanup resources

def on_stt_transcript(self, callback: Callable[[STTResponse], Awaitable[None]]) ‑> None

Expand source code

def on_stt_transcript(self, callback: Callable[[STTResponse], Awaitable[None]]) -> None:
    """Set callback for receiving STT transcripts"""
    self._transcript_callback = callback

Set callback for receiving STT transcripts

async def process_audio(self, audio_frames: bytes, language: Optional[str] = None, **kwargs: Any) ‑> None

Expand source code

@abstractmethod
async def process_audio(
    self,
    audio_frames: bytes,
    language: Optional[str] = None,
    **kwargs: Any
) -> None:
    """
    Process audio frames and convert to text
    
    Args:
        audio_frames: Iterator of bytes to process
        language: Optional language code for recognition
        **kwargs: Additional provider-specific arguments
        
    Returns:
        AsyncIterator yielding STTResponse objects
    """
    raise NotImplementedError

Process audio frames and convert to text

Args

audio_frames: Iterator of bytes to process
language: Optional language code for recognition
**kwargs: Additional provider-specific arguments

Returns

AsyncIterator yielding STTResponse objects

class STTResponse (**data: Any)

Expand source code

class STTResponse(BaseModel):
    """Response from STT processing
    
    Attributes:
        event_type: The type of speech event.
        data: The data from the speech event.
        metadata: Additional metadata from the speech event.
    """
    event_type: SpeechEventType
    data: SpeechData
    metadata: Optional[dict[str, Any]] = None

Response from STT processing

Attributes

event_type: The type of speech event.
data: The data from the speech event.
metadata: Additional metadata from the speech event.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Ancestors

pydantic.main.BaseModel

Class variables

var data : SpeechData
var event_type : SpeechEventType
var metadata : dict[str, typing.Any] | None
var model_config

class SpeechData (text: str, confidence: float = 0.0, language: Optional[str] = None, start_time: float = 0.0, end_time: float = 0.0)

Expand source code

@dataclass
class SpeechData:
    """Data structure for speech recognition results
    
    Attributes:
        text: The recognized text.
        confidence: The confidence level of the recognition.
        language: The language of the recognized text.
        start_time: The start time of the speech.
        end_time: The end time of the speech.
    """
    text: str
    confidence: float = 0.0
    language: Optional[str] = None
    start_time: float = 0.0
    end_time: float = 0.0

Data structure for speech recognition results

Attributes

text: The recognized text.
confidence: The confidence level of the recognition.
language: The language of the recognized text.
start_time: The start time of the speech.
end_time: The end time of the speech.

Instance variables

var confidence : float
var end_time : float
var language : str | None
var start_time : float
var text : str

class SpeechEventType (*args, **kwds)

Expand source code

class SpeechEventType(str, Enum):
    """Type of speech event"""
    START = "start_of_speech"
    INTERIM = "interim_transcript"
    PREFLIGHT = "preflight_transcript"
    FINAL = "final_transcript"
    END = "end_of_speech"

Type of speech event

Ancestors

builtins.str
enum.Enum

Class variables

var END
var FINAL
var INTERIM
var PREFLIGHT
var START