Module agents.vad

Classes

class VAD (sample_rate: int = 16000,
threshold: float = 0.5,
min_speech_duration: float = 0.5,
min_silence_duration: float = 0.5)
Expand source code
class VAD(EventEmitter[Literal["error", "info"]]):
    """Base class for Voice Activity Detection implementations"""
    
    def __init__(
        self,
        sample_rate: int = 16000,
        threshold: float = 0.5,
        min_speech_duration: float = 0.5,
        min_silence_duration: float = 0.5
    ) -> None:
        super().__init__()
        self._label = f"{type(self).__module__}.{type(self).__name__}"
        self._sample_rate = sample_rate
        self._threshold = threshold
        self._min_speech_duration = min_speech_duration
        self._min_silence_duration = min_silence_duration
        self._vad_callback: Optional[Callable[[VADResponse], Awaitable[None]]] = None

    @property
    def label(self) -> str:
        """Get the VAD provider label"""
        return self._label

    @property
    def sample_rate(self) -> int:
        """Get audio sample rate"""
        return self._sample_rate

    @abstractmethod
    async def process_audio(
        self,
        audio_frames: bytes,
        **kwargs: Any
    ) -> None:
        """
        Process audio frames and detect voice activity
        
        Args:
            audio_frames: Iterator of audio frames to process
            **kwargs: Additional provider-specific arguments
            
        Returns:
            AsyncIterator yielding VADResponse objects
        """
        raise NotImplementedError

    async def aclose(self) -> None:
        """Cleanup resources"""
        logger.info(f"Cleaning up VAD: {self.label}")
        
        self._vad_callback = None        
        try:
            import gc
            gc.collect()
            logger.info(f"VAD garbage collection completed: {self.label}")
        except Exception as e:
            logger.error(f"Error during VAD garbage collection: {e}")
        
        logger.info(f"VAD cleanup completed: {self.label}")
    
    async def __aenter__(self) -> VAD:
        return self
        
    async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
        await self.aclose()

    def on_vad_event(self, callback: Callable[[VADResponse], Awaitable[None]]) -> None:
        """Set callback for receiving VAD events"""
        self._vad_callback = callback

Base class for Voice Activity Detection implementations

Ancestors

Instance variables

prop label : str
Expand source code
@property
def label(self) -> str:
    """Get the VAD provider label"""
    return self._label

Get the VAD provider label

prop sample_rate : int
Expand source code
@property
def sample_rate(self) -> int:
    """Get audio sample rate"""
    return self._sample_rate

Get audio sample rate

Methods

async def aclose(self) ‑> None
Expand source code
async def aclose(self) -> None:
    """Cleanup resources"""
    logger.info(f"Cleaning up VAD: {self.label}")
    
    self._vad_callback = None        
    try:
        import gc
        gc.collect()
        logger.info(f"VAD garbage collection completed: {self.label}")
    except Exception as e:
        logger.error(f"Error during VAD garbage collection: {e}")
    
    logger.info(f"VAD cleanup completed: {self.label}")

Cleanup resources

def on_vad_event(self,
callback: Callable[[VADResponse], Awaitable[None]]) ‑> None
Expand source code
def on_vad_event(self, callback: Callable[[VADResponse], Awaitable[None]]) -> None:
    """Set callback for receiving VAD events"""
    self._vad_callback = callback

Set callback for receiving VAD events

async def process_audio(self, audio_frames: bytes, **kwargs: Any) ‑> None
Expand source code
@abstractmethod
async def process_audio(
    self,
    audio_frames: bytes,
    **kwargs: Any
) -> None:
    """
    Process audio frames and detect voice activity
    
    Args:
        audio_frames: Iterator of audio frames to process
        **kwargs: Additional provider-specific arguments
        
    Returns:
        AsyncIterator yielding VADResponse objects
    """
    raise NotImplementedError

Process audio frames and detect voice activity

Args

audio_frames
Iterator of audio frames to process
**kwargs
Additional provider-specific arguments

Returns

AsyncIterator yielding VADResponse objects

class VADData (is_speech: bool,
confidence: float = 0.0,
timestamp: float = 0.0,
speech_duration: float = 0.0,
silence_duration: float = 0.0)
Expand source code
@dataclass
class VADData:
    """Data structure for voice activity detection results"""
    is_speech: bool
    confidence: float = 0.0
    timestamp: float = 0.0
    speech_duration: float = 0.0
    silence_duration: float = 0.0

Data structure for voice activity detection results

Instance variables

var confidence : float
var is_speech : bool
var silence_duration : float
var speech_duration : float
var timestamp : float
class VADEventType (*args, **kwds)
Expand source code
class VADEventType(str, Enum):
    START_OF_SPEECH = "start_of_speech"
    END_OF_SPEECH = "end_of_speech"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.str() (if defined) or repr(object). encoding defaults to sys.getdefaultencoding(). errors defaults to 'strict'.

Ancestors

  • builtins.str
  • enum.Enum

Class variables

var END_OF_SPEECH
var START_OF_SPEECH
class VADResponse (**data: Any)
Expand source code
class VADResponse(BaseModel):
    """Response from VAD processing"""
    event_type: VADEventType
    data: VADData
    metadata: Optional[dict[str, Any]] = None

Response from VAD processing

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Ancestors

  • pydantic.main.BaseModel

Class variables

var dataVADData
var event_typeVADEventType
var metadata : dict[str, typing.Any] | None
var model_config