import WebSocket from 'ws';
const sessionId = 'your-session-id';
const credentials = process.env.INWORLD_API_KEY;
const ws = new WebSocket(`wss://api.inworld.ai/api/v1/realtime/session?key=${sessionId}&protocol=realtime`, {
headers: {
Authorization: `Basic ${credentials}`
}
});
ws.on('open', () => {
console.log('WebSocket connected');
});
ws.on('message', (buffer) => {
const message = JSON.parse(buffer.toString());
switch (message.type) {
case 'session.created':
console.log('Session created:', message.session.id);
updateSession();
break;
case 'session.updated':
console.log('Session updated');
sendMessage('Hello!');
break;
case 'conversation.item.added':
console.log('Conversation item added:', message.item.id);
break;
case 'conversation.item.done':
console.log('Conversation item done');
createResponse();
break;
case 'input_audio_buffer.speech_started':
console.log('Speech started at', message.audio_start_ms, 'ms');
break;
case 'input_audio_buffer.speech_stopped':
console.log('Speech stopped at', message.audio_end_ms, 'ms');
break;
case 'conversation.item.input_audio_transcription.delta':
console.log('Transcription delta:', message.delta);
break;
case 'conversation.item.input_audio_transcription.completed':
console.log('Transcription complete:', message.transcript);
break;
case 'response.created':
console.log('Response created:', message.response.id);
break;
case 'response.output_item.added':
console.log('Output item added:', message.item.id);
break;
case 'response.output_text.delta':
console.log('Text delta:', message.delta);
break;
case 'response.output_audio.delta':
// Decode and play audio chunk
const audioBuffer = Buffer.from(message.delta, 'base64');
playAudio(audioBuffer);
break;
case 'response.output_audio_transcript.delta':
console.log('Audio transcript delta:', message.delta);
break;
case 'response.done':
console.log('Response complete, status:', message.response.status);
break;
case 'error':
console.error('Error:', message.error.message, message.error.code);
break;
}
});
function updateSession() {
ws.send(JSON.stringify({
type: 'session.update',
session: {
type: 'realtime',
output_modalities: ['text', 'audio'],
instructions: 'You are a helpful AI assistant.',
audio: {
input: {
turn_detection: {
type: 'semantic_vad',
eagerness: 'medium',
create_response: true,
interrupt_response: true
}
},
output: {
voice: 'Clive'
}
}
}
}));
}
function sendMessage(text) {
ws.send(JSON.stringify({
type: 'conversation.item.create',
item: {
type: 'message',
role: 'user',
content: [{ type: 'input_text', text }]
}
}));
}
function createResponse() {
ws.send(JSON.stringify({
type: 'response.create',
response: {
output_modalities: ['text', 'audio']
}
}));
}
function cancelResponse() {
ws.send(JSON.stringify({ type: 'response.cancel' }));
}
function sendAudioChunk(audioChunk) {
ws.send(JSON.stringify({
type: 'input_audio_buffer.append',
audio: audioChunk // base64-encoded audio data
}));
}
function clearAudioBuffer() {
ws.send(JSON.stringify({ type: 'input_audio_buffer.clear' }));
}