Serverless WebSocket API
Real-time features — live dashboards, chat, notifications, collaborative editing — traditionally require persistent server processes. AWS API Gateway WebSocket API lets you build real-time features on serverless: Lambda functions handle connect, disconnect, and message events while API Gateway manages the persistent connections. We implement production-grade WebSocket APIs with connection management, room-based broadcasting, presence tracking, and JWT authentication.
Need this done for your project?
We implement, you ship. Async, documented, done in days.
WebSocket API Gateway Setup
We configure API Gateway WebSocket API with route selection based on message action, Lambda integrations for each route, and connection-level IAM authorization.
resource "aws_apigatewayv2_api" "ws" {
name = "${var.project}-ws-${var.env}"
protocol_type = "WEBSOCKET"
route_selection_expression = "$request.body.action"
}
# $connect route — authentication happens here
resource "aws_apigatewayv2_route" "connect" {
api_id = aws_apigatewayv2_api.ws.id
route_key = "$connect"
authorization_type = "CUSTOM"
authorizer_id = aws_apigatewayv2_authorizer.ws_auth.id
target = "integrations/${aws_apigatewayv2_integration.connect.id}"
}
# $disconnect route — cleanup
resource "aws_apigatewayv2_route" "disconnect" {
api_id = aws_apigatewayv2_api.ws.id
route_key = "$disconnect"
target = "integrations/${aws_apigatewayv2_integration.disconnect.id}"
}
# Custom routes based on action field
resource "aws_apigatewayv2_route" "send_message" {
api_id = aws_apigatewayv2_api.ws.id
route_key = "sendMessage"
target = "integrations/${aws_apigatewayv2_integration.send_message.id}"
}
resource "aws_apigatewayv2_route" "join_room" {
api_id = aws_apigatewayv2_api.ws.id
route_key = "joinRoom"
target = "integrations/${aws_apigatewayv2_integration.join_room.id}"
}
resource "aws_apigatewayv2_stage" "prod" {
api_id = aws_apigatewayv2_api.ws.id
name = "prod"
auto_deploy = true
}Authentication happens on $connect via query string token (WebSocket does not support custom headers in the browser). The authorizer validates the JWT and stores the user context with the connection ID.
Connection Management with DynamoDB
We track active connections, room memberships, and user presence in DynamoDB for fast lookups during message broadcasting.
// $connect handler — store connection
export const connectHandler = async (event: APIGatewayProxyWebsocketEventV2) => {
const connectionId = event.requestContext.connectionId;
const userId = event.requestContext.authorizer?.userId;
const tenantId = event.requestContext.authorizer?.tenantId;
await docClient.send(new PutCommand({
TableName: TABLE,
Item: {
PK: `CONN#${connectionId}`,
SK: 'META',
GSI1PK: `USER#${userId}`,
GSI1SK: `CONN#${connectionId}`,
userId,
tenantId,
connectedAt: new Date().toISOString(),
expiresAt: Math.floor(Date.now() / 1000) + 86400, // TTL: 24h
},
}));
return { statusCode: 200 };
};
// joinRoom handler — add connection to room
export const joinRoomHandler = async (event: any) => {
const { connectionId } = event.requestContext;
const { roomId } = JSON.parse(event.body);
const conn = await getConnection(connectionId);
// Verify tenant owns this room
const room = await getRoom(roomId);
if (room.tenantId !== conn.tenantId) {
return { statusCode: 403 };
}
await docClient.send(new PutCommand({
TableName: TABLE,
Item: {
PK: `ROOM#${roomId}`,
SK: `CONN#${connectionId}`,
GSI1PK: `CONN#${connectionId}`,
GSI1SK: `ROOM#${roomId}`,
userId: conn.userId,
joinedAt: new Date().toISOString(),
expiresAt: Math.floor(Date.now() / 1000) + 86400,
},
}));
return { statusCode: 200 };
};The GSI1 index allows querying all rooms for a connection (for cleanup on disconnect) and all connections in a room (for broadcasting). TTL ensures stale connections are cleaned up even if the disconnect handler fails.
Message Broadcasting
Broadcasting messages to all connections in a room uses the API Gateway Management API to post data to each connection.
import { ApiGatewayManagementApiClient, PostToConnectionCommand, GoneException } from '@aws-sdk/client-apigatewaymanagementapi';
const apigw = new ApiGatewayManagementApiClient({
endpoint: process.env.WEBSOCKET_ENDPOINT, // https://abc123.execute-api.region.amazonaws.com/prod
});
export async function broadcastToRoom(
roomId: string,
message: object,
excludeConnectionId?: string
): Promise<void> {
// Get all connections in the room
const result = await docClient.send(new QueryCommand({
TableName: TABLE,
KeyConditionExpression: 'PK = :pk AND begins_with(SK, :prefix)',
ExpressionAttributeValues: {
':pk': `ROOM#${roomId}`,
':prefix': 'CONN#',
},
}));
const payload = Buffer.from(JSON.stringify(message));
const staleConnections: string[] = [];
await Promise.allSettled(
(result.Items || []).map(async (item) => {
const connId = item.SK.replace('CONN#', '');
if (connId === excludeConnectionId) return;
try {
await apigw.send(new PostToConnectionCommand({
ConnectionId: connId,
Data: payload,
}));
} catch (err) {
if (err instanceof GoneException) {
staleConnections.push(connId);
}
}
})
);
// Clean up stale connections
if (staleConnections.length > 0) {
await Promise.all(
staleConnections.map(connId => removeConnection(connId))
);
}
}We handle GoneException gracefully — when a connection is stale (client disconnected without triggering $disconnect), we clean it up immediately. For rooms with hundreds of connections, we use SQS to fan out the broadcast across multiple Lambda invocations to avoid timeout.
Presence & Heartbeats
User presence (online/offline status) is tracked using connection state and periodic heartbeats.
// Client-side heartbeat (every 5 minutes)
const ws = new WebSocket(WS_URL + '?token=' + accessToken);
setInterval(() => {
if (ws.readyState === WebSocket.OPEN) {
ws.send(JSON.stringify({ action: 'heartbeat' }));
}
}, 5 * 60 * 1000);
// Server-side heartbeat handler
export const heartbeatHandler = async (event: any) => {
const { connectionId } = event.requestContext;
// Update connection TTL
await docClient.send(new UpdateCommand({
TableName: TABLE,
Key: { PK: `CONN#${connectionId}`, SK: 'META' },
UpdateExpression: 'SET lastSeen = :now, expiresAt = :ttl',
ExpressionAttributeValues: {
':now': new Date().toISOString(),
':ttl': Math.floor(Date.now() / 1000) + 600, // 10 min TTL
},
}));
return { statusCode: 200 };
};
// Presence query — get online users in a tenant
export async function getOnlineUsers(tenantId: string): Promise<string[]> {
const result = await docClient.send(new QueryCommand({
TableName: TABLE,
IndexName: 'GSI2',
KeyConditionExpression: 'GSI2PK = :tenant',
ExpressionAttributeValues: {
':tenant': `TENANT#${tenantId}`,
},
}));
// Deduplicate by userId (user may have multiple connections)
const userIds = [...new Set(result.Items?.map(i => i.userId))];
return userIds;
}Heartbeats reset the connection TTL. If a client stops sending heartbeats (browser tab closed, network failure), the TTL expires and DynamoDB Streams triggers a cleanup Lambda that removes the connection from all rooms and broadcasts a presence update. This ensures accurate online status even when $disconnect is not triggered.
Why Anubiz Engineering
Ready to get started?
Skip the research. Tell us what you need, and we'll scope it, implement it, and hand it back — fully documented and production-ready.