Module agents.vad
Classes
class VAD (sample_rate: int = 16000,
threshold: float = 0.5,
min_speech_duration: float = 0.05,
min_silence_duration: float = 0.5)-
Expand source code
class VAD(EventEmitter[Literal["error", "info"]]): """Base class for Voice Activity Detection implementations""" def __init__( self, sample_rate: int = 16000, threshold: float = 0.5, min_speech_duration: float = 0.05, min_silence_duration: float = 0.5 ) -> None: super().__init__() self._label = f"{type(self).__module__}.{type(self).__name__}" self._sample_rate = sample_rate self._threshold = threshold self._min_speech_duration = min_speech_duration self._min_silence_duration = min_silence_duration self._vad_callback: Optional[Callable[[VADResponse], Awaitable[None]]] = None @property def label(self) -> str: """Get the VAD provider label""" return self._label @property def sample_rate(self) -> int: """Get audio sample rate""" return self._sample_rate @abstractmethod async def process_audio( self, audio_frames: bytes, **kwargs: Any ) -> None: """ Process audio frames and detect voice activity Args: audio_frames: Iterator of audio frames to process **kwargs: Additional provider-specific arguments Returns: AsyncIterator yielding VADResponse objects """ raise NotImplementedError async def flush(self) -> None: """Signal that no more audio will arrive. Subclasses may override.""" pass async def prewarm(self) -> None: """Load models and warm inference kernels so the first ``process_audio()`` call doesn't pay cold-start cost. Default is a no-op. Plugins with local ONNX/ML models (SileroVAD) override this. Safe to call multiple times — must be idempotent. """ pass async def aclose(self) -> None: """Cleanup resources""" logger.info(f"Cleaning up VAD: {self.label}") self._vad_callback = None try: import gc gc.collect() logger.info(f"VAD garbage collection completed: {self.label}") except Exception as e: logger.error(f"Error during VAD garbage collection: {e}") logger.info(f"VAD cleanup completed: {self.label}") async def __aenter__(self) -> VAD: return self async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: await self.aclose() def on_vad_event(self, callback: Callable[[VADResponse], Awaitable[None]]) -> None: """Set callback for receiving VAD events""" self._vad_callback = callbackBase class for Voice Activity Detection implementations
Ancestors
- EventEmitter
- typing.Generic
Instance variables
prop label : str-
Expand source code
@property def label(self) -> str: """Get the VAD provider label""" return self._labelGet the VAD provider label
prop sample_rate : int-
Expand source code
@property def sample_rate(self) -> int: """Get audio sample rate""" return self._sample_rateGet audio sample rate
Methods
async def aclose(self) ‑> None-
Expand source code
async def aclose(self) -> None: """Cleanup resources""" logger.info(f"Cleaning up VAD: {self.label}") self._vad_callback = None try: import gc gc.collect() logger.info(f"VAD garbage collection completed: {self.label}") except Exception as e: logger.error(f"Error during VAD garbage collection: {e}") logger.info(f"VAD cleanup completed: {self.label}")Cleanup resources
async def flush(self) ‑> None-
Expand source code
async def flush(self) -> None: """Signal that no more audio will arrive. Subclasses may override.""" passSignal that no more audio will arrive. Subclasses may override.
def on_vad_event(self,
callback: Callable[[VADResponse], Awaitable[None]]) ‑> None-
Expand source code
def on_vad_event(self, callback: Callable[[VADResponse], Awaitable[None]]) -> None: """Set callback for receiving VAD events""" self._vad_callback = callbackSet callback for receiving VAD events
async def prewarm(self) ‑> None-
Expand source code
async def prewarm(self) -> None: """Load models and warm inference kernels so the first ``process_audio()`` call doesn't pay cold-start cost. Default is a no-op. Plugins with local ONNX/ML models (SileroVAD) override this. Safe to call multiple times — must be idempotent. """ passLoad models and warm inference kernels so the first
process_audio()call doesn't pay cold-start cost.Default is a no-op. Plugins with local ONNX/ML models (SileroVAD) override this. Safe to call multiple times — must be idempotent.
async def process_audio(self, audio_frames: bytes, **kwargs: Any) ‑> None-
Expand source code
@abstractmethod async def process_audio( self, audio_frames: bytes, **kwargs: Any ) -> None: """ Process audio frames and detect voice activity Args: audio_frames: Iterator of audio frames to process **kwargs: Additional provider-specific arguments Returns: AsyncIterator yielding VADResponse objects """ raise NotImplementedErrorProcess audio frames and detect voice activity
Args
audio_frames- Iterator of audio frames to process
**kwargs- Additional provider-specific arguments
Returns
AsyncIterator yielding VADResponse objects
Inherited members
class VADData (is_speech: bool,
confidence: float = 0.0,
timestamp: float = 0.0,
speech_duration: float = 0.0,
silence_duration: float = 0.0,
audio_frames: bytes | None = None,
raw_probability: float = 0.0,
inference_duration_ms: float = 0.0,
energy: float = 0.0,
samples_index: int = 0)-
Expand source code
@dataclass class VADData: """Data structure for voice activity detection results""" is_speech: bool confidence: float = 0.0 timestamp: float = 0.0 speech_duration: float = 0.0 silence_duration: float = 0.0 audio_frames: bytes | None = None raw_probability: float = 0.0 inference_duration_ms: float = 0.0 energy: float = 0.0 samples_index: int = 0Data structure for voice activity detection results
Instance variables
var audio_frames : bytes | Nonevar confidence : floatvar energy : floatvar inference_duration_ms : floatvar is_speech : boolvar raw_probability : floatvar samples_index : intvar silence_duration : floatvar speech_duration : floatvar timestamp : float
class VADEventType (*args, **kwds)-
Expand source code
class VADEventType(str, Enum): START_OF_SPEECH = "start_of_speech" END_OF_SPEECH = "end_of_speech" FRAME_PROCESSED = "frame_processed"str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.
Ancestors
- builtins.str
- enum.Enum
Class variables
var END_OF_SPEECHvar FRAME_PROCESSEDvar START_OF_SPEECH
class VADResponse (**data: Any)-
Expand source code
class VADResponse(BaseModel): """Response from VAD processing""" event_type: VADEventType data: VADData metadata: Optional[dict[str, Any]] = NoneResponse from VAD processing
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.selfis explicitly positional-only to allowselfas a field name.Ancestors
- pydantic.main.BaseModel
Class variables
var data : VADDatavar event_type : VADEventTypevar metadata : dict[str, typing.Any] | Nonevar model_config