Module agents.stt.stt
Classes
class STT-
Expand source code
class STT(EventEmitter[Literal["error"]]): """Base class for Speech-to-Text implementations""" def __init__( self, ) -> None: super().__init__() self._label = f"{type(self).__module__}.{type(self).__name__}" self._transcript_callback: Optional[Callable[[STTResponse], Awaitable[None]]] = None @property def label(self) -> str: """Get the STT provider label""" return self._label def on_stt_transcript(self, callback: Callable[[STTResponse], Awaitable[None]]) -> None: """Set callback for receiving STT transcripts""" self._transcript_callback = callback @abstractmethod async def process_audio( self, audio_frames: bytes, language: Optional[str] = None, **kwargs: Any ) -> None: """ Process audio frames and convert to text Args: audio_frames: Iterator of bytes to process language: Optional language code for recognition **kwargs: Additional provider-specific arguments Returns: AsyncIterator yielding STTResponse objects """ raise NotImplementedError async def aclose(self) -> None: """Cleanup resources""" logger.info(f"Cleaning up STT: {self.label}") self._transcript_callback = None try: import gc gc.collect() logger.info(f"STT garbage collection completed: {self.label}") except Exception as e: logger.error(f"Error during STT garbage collection: {e}") logger.info(f"STT cleanup completed: {self.label}") async def __aenter__(self) -> STT: return self async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: await self.aclose()Base class for Speech-to-Text implementations
Ancestors
- EventEmitter
- typing.Generic
Instance variables
prop label : str-
Expand source code
@property def label(self) -> str: """Get the STT provider label""" return self._labelGet the STT provider label
Methods
async def aclose(self) ‑> None-
Expand source code
async def aclose(self) -> None: """Cleanup resources""" logger.info(f"Cleaning up STT: {self.label}") self._transcript_callback = None try: import gc gc.collect() logger.info(f"STT garbage collection completed: {self.label}") except Exception as e: logger.error(f"Error during STT garbage collection: {e}") logger.info(f"STT cleanup completed: {self.label}")Cleanup resources
def on_stt_transcript(self,
callback: Callable[[STTResponse], Awaitable[None]]) ‑> None-
Expand source code
def on_stt_transcript(self, callback: Callable[[STTResponse], Awaitable[None]]) -> None: """Set callback for receiving STT transcripts""" self._transcript_callback = callbackSet callback for receiving STT transcripts
async def process_audio(self, audio_frames: bytes, language: Optional[str] = None, **kwargs: Any) ‑> None-
Expand source code
@abstractmethod async def process_audio( self, audio_frames: bytes, language: Optional[str] = None, **kwargs: Any ) -> None: """ Process audio frames and convert to text Args: audio_frames: Iterator of bytes to process language: Optional language code for recognition **kwargs: Additional provider-specific arguments Returns: AsyncIterator yielding STTResponse objects """ raise NotImplementedErrorProcess audio frames and convert to text
Args
audio_frames- Iterator of bytes to process
language- Optional language code for recognition
**kwargs- Additional provider-specific arguments
Returns
AsyncIterator yielding STTResponse objects
class STTResponse (**data: Any)-
Expand source code
class STTResponse(BaseModel): """Response from STT processing Attributes: event_type: The type of speech event. data: The data from the speech event. metadata: Additional metadata from the speech event. """ event_type: SpeechEventType data: SpeechData metadata: Optional[dict[str, Any]] = NoneResponse from STT processing
Attributes
event_type- The type of speech event.
data- The data from the speech event.
metadata- Additional metadata from the speech event.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.selfis explicitly positional-only to allowselfas a field name.Ancestors
- pydantic.main.BaseModel
Class variables
var data : SpeechDatavar event_type : SpeechEventTypevar metadata : dict[str, typing.Any] | Nonevar model_config
class SpeechData (text: str,
confidence: float = 0.0,
language: Optional[str] = None,
start_time: float = 0.0,
end_time: float = 0.0)-
Expand source code
@dataclass class SpeechData: """Data structure for speech recognition results Attributes: text: The recognized text. confidence: The confidence level of the recognition. language: The language of the recognized text. start_time: The start time of the speech. end_time: The end time of the speech. """ text: str confidence: float = 0.0 language: Optional[str] = None start_time: float = 0.0 end_time: float = 0.0Data structure for speech recognition results
Attributes
text- The recognized text.
confidence- The confidence level of the recognition.
language- The language of the recognized text.
start_time- The start time of the speech.
end_time- The end time of the speech.
Instance variables
var confidence : floatvar end_time : floatvar language : str | Nonevar start_time : floatvar text : str
class SpeechEventType (*args, **kwds)-
Expand source code
class SpeechEventType(str, Enum): """Type of speech event""" START = "start_of_speech" INTERIM = "interim_transcript" FINAL = "final_transcript" END = "end_of_speech"Type of speech event
Ancestors
- builtins.str
- enum.Enum
Class variables
var ENDvar FINALvar INTERIMvar START