Pipeline
The Pipeline
component manages the AI model and its configuration. It provides a standardized interface for different model providers.
Key Features:​
- Flexible model selection
- Configurable model parameters
- Streaming audio support
- Multi-modal capabilities
Example Implementation:​
from videosdk.agents import RealTimePipeline
from openai.types.beta.realtime.session import InputAudioTranscription, TurnDetection
from videosdk.plugins.openai import OpenAIRealtime, OpenAIRealtimeConfig
# For Gemini
# from videosdk.plugins.gemini import GeminiRealtime, GeminiLiveConfig
# from google.genai.types import AudioTranscriptionConfig
# Option 1: Using OpenAI's real-time models
openai_model = OpenAIRealtime(
model="gpt-4o-realtime-preview",
api_key="your-openai-api-key" # Or use environment variable
)
# Option 2: Using Google's models
# gemini_model = GeminiRealtime(
# model="gemini-pro-1.5",
# api_key="your-google-api-key"
# )
# Configure the pipeline with your selected model
model = OpenAIRealtime(
model="gpt-4o-realtime-preview",
config=OpenAIRealtimeConfig(
modalities=["text", "audio"],
input_audio_transcription=InputAudioTranscription(
model="whisper-1"
),
turn_detection=TurnDetection(
type="server_vad",
threshold=0.5,
prefix_padding_ms=300,
silence_duration_ms=200,
),
tool_choice="auto"
)
)
# For Gemini
# model = GeminiRealtime(
# model="gemini-2.0-flash-live-001",
# config=GeminiLiveConfig(
# response_modalities=["AUDIO"],
# output_audio_transcription=AudioTranscriptionConfig(
# )
# )
# )
# Create the pipeline with the model and configuration
pipeline = RealTimePipeline(model=openai_model)
Got a Question? Ask us on discord