feat(observability): add Prometheus metrics and OpenTelemetry tracing
- Add Prometheus metrics endpoints (GET /metrics) - Add health checks (GET /health, GET /ready) - Add OpenTelemetry SDK with automatic HTTP instrumentation - Expose update metrics for messages, AI calls, and errors
This commit is contained in:
parent
f712388584
commit
bb97993755
19
package.json
19
package.json
@ -167,6 +167,12 @@
|
|||||||
"@mariozechner/pi-coding-agent": "0.49.3",
|
"@mariozechner/pi-coding-agent": "0.49.3",
|
||||||
"@mariozechner/pi-tui": "0.49.3",
|
"@mariozechner/pi-tui": "0.49.3",
|
||||||
"@mozilla/readability": "^0.6.0",
|
"@mozilla/readability": "^0.6.0",
|
||||||
|
"@opentelemetry/api": "^1.9.0",
|
||||||
|
"@opentelemetry/auto-instrumentations-node": "^0.69.0",
|
||||||
|
"@opentelemetry/exporter-prometheus": "^0.211.0",
|
||||||
|
"@opentelemetry/resources": "^2.5.0",
|
||||||
|
"@opentelemetry/sdk-node": "^0.211.0",
|
||||||
|
"@opentelemetry/semantic-conventions": "^1.39.0",
|
||||||
"@sinclair/typebox": "0.34.47",
|
"@sinclair/typebox": "0.34.47",
|
||||||
"@slack/bolt": "^4.6.0",
|
"@slack/bolt": "^4.6.0",
|
||||||
"@slack/web-api": "^7.13.0",
|
"@slack/web-api": "^7.13.0",
|
||||||
@ -185,7 +191,7 @@
|
|||||||
"express": "^5.2.1",
|
"express": "^5.2.1",
|
||||||
"file-type": "^21.3.0",
|
"file-type": "^21.3.0",
|
||||||
"grammy": "^1.39.3",
|
"grammy": "^1.39.3",
|
||||||
"hono": "4.11.4",
|
"hono": "4.11.7",
|
||||||
"jiti": "^2.6.1",
|
"jiti": "^2.6.1",
|
||||||
"json5": "^2.2.3",
|
"json5": "^2.2.3",
|
||||||
"jszip": "^3.10.1",
|
"jszip": "^3.10.1",
|
||||||
@ -196,11 +202,12 @@
|
|||||||
"osc-progress": "^0.3.0",
|
"osc-progress": "^0.3.0",
|
||||||
"pdfjs-dist": "^5.4.530",
|
"pdfjs-dist": "^5.4.530",
|
||||||
"playwright-core": "1.58.0",
|
"playwright-core": "1.58.0",
|
||||||
|
"prom-client": "^15.1.3",
|
||||||
"proper-lockfile": "^4.1.2",
|
"proper-lockfile": "^4.1.2",
|
||||||
"qrcode-terminal": "^0.12.0",
|
"qrcode-terminal": "^0.12.0",
|
||||||
"sharp": "^0.34.5",
|
"sharp": "^0.34.5",
|
||||||
"sqlite-vec": "0.1.7-alpha.2",
|
"sqlite-vec": "0.1.7-alpha.2",
|
||||||
"tar": "7.5.4",
|
"tar": "7.5.7",
|
||||||
"tslog": "^4.10.2",
|
"tslog": "^4.10.2",
|
||||||
"undici": "^7.19.0",
|
"undici": "^7.19.0",
|
||||||
"ws": "^8.19.0",
|
"ws": "^8.19.0",
|
||||||
@ -241,14 +248,14 @@
|
|||||||
"wireit": "^0.14.12"
|
"wireit": "^0.14.12"
|
||||||
},
|
},
|
||||||
"overrides": {
|
"overrides": {
|
||||||
"tar": "7.5.4"
|
"tar": "7.5.7"
|
||||||
},
|
},
|
||||||
"pnpm": {
|
"pnpm": {
|
||||||
"minimumReleaseAge": 2880,
|
"minimumReleaseAge": 2880,
|
||||||
"overrides": {
|
"overrides": {
|
||||||
"@sinclair/typebox": "0.34.47",
|
"@sinclair/typebox": "0.34.47",
|
||||||
"hono": "4.11.4",
|
"hono": "4.11.7",
|
||||||
"tar": "7.5.4"
|
"tar": "7.5.7"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"vitest": {
|
"vitest": {
|
||||||
@ -283,4 +290,4 @@
|
|||||||
"dist/Moltbot.app/**"
|
"dist/Moltbot.app/**"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
1075
pnpm-lock.yaml
generated
1075
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
97
src/gateway/metrics-http.ts
Normal file
97
src/gateway/metrics-http.ts
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
/**
|
||||||
|
* HTTP handler for Prometheus metrics endpoint.
|
||||||
|
*
|
||||||
|
* Exposes metrics at GET /metrics in Prometheus text format.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import type { IncomingMessage, ServerResponse } from 'node:http';
|
||||||
|
import { getMetricsText, getMetricsContentType } from '../infra/metrics.js';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle GET /metrics requests for Prometheus scraping.
|
||||||
|
*/
|
||||||
|
export async function handleMetricsRequest(
|
||||||
|
_req: IncomingMessage,
|
||||||
|
res: ServerResponse,
|
||||||
|
): Promise<void> {
|
||||||
|
try {
|
||||||
|
const metricsText = await getMetricsText();
|
||||||
|
res.writeHead(200, {
|
||||||
|
'Content-Type': getMetricsContentType(),
|
||||||
|
'Cache-Control': 'no-cache, no-store, must-revalidate',
|
||||||
|
});
|
||||||
|
res.end(metricsText);
|
||||||
|
} catch (error) {
|
||||||
|
res.writeHead(500, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end('Error collecting metrics');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Handle GET /health liveness probe.
|
||||||
|
* Always returns 200 if the server is responding.
|
||||||
|
*/
|
||||||
|
export function handleHealthRequest(
|
||||||
|
_req: IncomingMessage,
|
||||||
|
res: ServerResponse,
|
||||||
|
): void {
|
||||||
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({
|
||||||
|
status: 'ok',
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Readiness check function type.
|
||||||
|
* Returns true if the service is ready to accept traffic.
|
||||||
|
*/
|
||||||
|
export type ReadinessCheck = () => Promise<boolean> | boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a readiness handler with custom checks.
|
||||||
|
*
|
||||||
|
* @param checks - Array of check functions that must all pass
|
||||||
|
*/
|
||||||
|
export function createReadinessHandler(
|
||||||
|
checks: ReadinessCheck[] = [],
|
||||||
|
): (req: IncomingMessage, res: ServerResponse) => Promise<void> {
|
||||||
|
return async (_req: IncomingMessage, res: ServerResponse): Promise<void> => {
|
||||||
|
try {
|
||||||
|
const results = await Promise.all(checks.map((check) => check()));
|
||||||
|
const isReady = results.every(Boolean);
|
||||||
|
|
||||||
|
if (isReady) {
|
||||||
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({
|
||||||
|
status: 'ready',
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
} else {
|
||||||
|
res.writeHead(503, { 'Content-Type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({
|
||||||
|
status: 'not_ready',
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
res.writeHead(503, { 'Content-Type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({
|
||||||
|
status: 'error',
|
||||||
|
timestamp: new Date().toISOString(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple readiness handler that always returns ready.
|
||||||
|
* Use createReadinessHandler for custom checks.
|
||||||
|
*/
|
||||||
|
export async function handleReadyRequest(
|
||||||
|
req: IncomingMessage,
|
||||||
|
res: ServerResponse,
|
||||||
|
): Promise<void> {
|
||||||
|
const handler = createReadinessHandler([]);
|
||||||
|
return handler(req, res);
|
||||||
|
}
|
||||||
@ -30,6 +30,11 @@ import { applyHookMappings } from "./hooks-mapping.js";
|
|||||||
import { handleOpenAiHttpRequest } from "./openai-http.js";
|
import { handleOpenAiHttpRequest } from "./openai-http.js";
|
||||||
import { handleOpenResponsesHttpRequest } from "./openresponses-http.js";
|
import { handleOpenResponsesHttpRequest } from "./openresponses-http.js";
|
||||||
import { handleToolsInvokeHttpRequest } from "./tools-invoke-http.js";
|
import { handleToolsInvokeHttpRequest } from "./tools-invoke-http.js";
|
||||||
|
import {
|
||||||
|
handleMetricsRequest,
|
||||||
|
handleHealthRequest,
|
||||||
|
handleReadyRequest,
|
||||||
|
} from "./metrics-http.js";
|
||||||
|
|
||||||
type SubsystemLogger = ReturnType<typeof createSubsystemLogger>;
|
type SubsystemLogger = ReturnType<typeof createSubsystemLogger>;
|
||||||
|
|
||||||
@ -86,8 +91,8 @@ export function createHooksRequestHandler(
|
|||||||
if (fromQuery) {
|
if (fromQuery) {
|
||||||
logHooks.warn(
|
logHooks.warn(
|
||||||
"Hook token provided via query parameter is deprecated for security reasons. " +
|
"Hook token provided via query parameter is deprecated for security reasons. " +
|
||||||
"Tokens in URLs appear in logs, browser history, and referrer headers. " +
|
"Tokens in URLs appear in logs, browser history, and referrer headers. " +
|
||||||
"Use Authorization: Bearer <token> or X-Moltbot-Token header instead.",
|
"Use Authorization: Bearer <token> or X-Moltbot-Token header instead.",
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -225,17 +230,32 @@ export function createGatewayHttpServer(opts: {
|
|||||||
} = opts;
|
} = opts;
|
||||||
const httpServer: HttpServer = opts.tlsOptions
|
const httpServer: HttpServer = opts.tlsOptions
|
||||||
? createHttpsServer(opts.tlsOptions, (req, res) => {
|
? createHttpsServer(opts.tlsOptions, (req, res) => {
|
||||||
void handleRequest(req, res);
|
void handleRequest(req, res);
|
||||||
})
|
})
|
||||||
: createHttpServer((req, res) => {
|
: createHttpServer((req, res) => {
|
||||||
void handleRequest(req, res);
|
void handleRequest(req, res);
|
||||||
});
|
});
|
||||||
|
|
||||||
async function handleRequest(req: IncomingMessage, res: ServerResponse) {
|
async function handleRequest(req: IncomingMessage, res: ServerResponse) {
|
||||||
// Don't interfere with WebSocket upgrades; ws handles the 'upgrade' event.
|
// Don't interfere with WebSocket upgrades; ws handles the 'upgrade' event.
|
||||||
if (String(req.headers.upgrade ?? "").toLowerCase() === "websocket") return;
|
if (String(req.headers.upgrade ?? "").toLowerCase() === "websocket") return;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
// Observability endpoints - no auth required for health checks
|
||||||
|
const url = new URL(req.url ?? "/", `http://${req.headers.host ?? "localhost"}`);
|
||||||
|
if (req.method === "GET" && url.pathname === "/metrics") {
|
||||||
|
await handleMetricsRequest(req, res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.method === "GET" && url.pathname === "/health") {
|
||||||
|
handleHealthRequest(req, res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (req.method === "GET" && url.pathname === "/ready") {
|
||||||
|
await handleReadyRequest(req, res);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const configSnapshot = loadConfig();
|
const configSnapshot = loadConfig();
|
||||||
const trustedProxies = configSnapshot.gateway?.trustedProxies ?? [];
|
const trustedProxies = configSnapshot.gateway?.trustedProxies ?? [];
|
||||||
if (await handleHooksRequest(req, res)) return;
|
if (await handleHooksRequest(req, res)) return;
|
||||||
|
|||||||
158
src/infra/metrics.ts
Normal file
158
src/infra/metrics.ts
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
/**
|
||||||
|
* Prometheus metrics for Moltbot observability.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* import { metrics } from '../infra/metrics.js';
|
||||||
|
* metrics.messagesTotal.inc({ channel: 'whatsapp', direction: 'inbound' });
|
||||||
|
*/
|
||||||
|
|
||||||
|
import * as promClient from 'prom-client';
|
||||||
|
|
||||||
|
// Create a custom registry to avoid global state pollution
|
||||||
|
export const metricsRegistry = new promClient.Registry();
|
||||||
|
|
||||||
|
// Add default metrics (process CPU, memory, etc.)
|
||||||
|
promClient.collectDefaultMetrics({ register: metricsRegistry });
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// COUNTERS - Monotonically increasing values
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/** Total messages processed by channel and direction */
|
||||||
|
export const messagesTotal = new promClient.Counter({
|
||||||
|
name: 'moltbot_messages_total',
|
||||||
|
help: 'Total number of messages processed',
|
||||||
|
labelNames: ['channel', 'direction'] as const, // direction: inbound | outbound
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Total HTTP requests by method, route, and status */
|
||||||
|
export const httpRequestsTotal = new promClient.Counter({
|
||||||
|
name: 'moltbot_http_requests_total',
|
||||||
|
help: 'Total HTTP requests received',
|
||||||
|
labelNames: ['method', 'route', 'status'] as const,
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Total errors by type and source */
|
||||||
|
export const errorsTotal = new promClient.Counter({
|
||||||
|
name: 'moltbot_errors_total',
|
||||||
|
help: 'Total errors encountered',
|
||||||
|
labelNames: ['type', 'source'] as const,
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Total AI provider calls by provider and model */
|
||||||
|
export const aiCallsTotal = new promClient.Counter({
|
||||||
|
name: 'moltbot_ai_calls_total',
|
||||||
|
help: 'Total AI provider API calls',
|
||||||
|
labelNames: ['provider', 'model'] as const,
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// HISTOGRAMS - Request duration distributions
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/** HTTP request duration in seconds */
|
||||||
|
export const httpRequestDuration = new promClient.Histogram({
|
||||||
|
name: 'moltbot_http_request_duration_seconds',
|
||||||
|
help: 'HTTP request duration in seconds',
|
||||||
|
labelNames: ['method', 'route'] as const,
|
||||||
|
buckets: [0.01, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** AI provider response latency in seconds */
|
||||||
|
export const aiLatency = new promClient.Histogram({
|
||||||
|
name: 'moltbot_ai_latency_seconds',
|
||||||
|
help: 'AI provider response latency in seconds',
|
||||||
|
labelNames: ['provider', 'model'] as const,
|
||||||
|
buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60],
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Message processing time in seconds */
|
||||||
|
export const messageProcessingDuration = new promClient.Histogram({
|
||||||
|
name: 'moltbot_message_processing_duration_seconds',
|
||||||
|
help: 'Time to process a message end-to-end',
|
||||||
|
labelNames: ['channel'] as const,
|
||||||
|
buckets: [0.1, 0.5, 1, 2, 5, 10, 30],
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// GAUGES - Values that can go up and down
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/** Number of active sessions in memory */
|
||||||
|
export const activeSessions = new promClient.Gauge({
|
||||||
|
name: 'moltbot_active_sessions',
|
||||||
|
help: 'Number of active sessions currently in memory',
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Number of connected channels */
|
||||||
|
export const connectedChannels = new promClient.Gauge({
|
||||||
|
name: 'moltbot_connected_channels',
|
||||||
|
help: 'Number of channels currently connected',
|
||||||
|
labelNames: ['channel'] as const,
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** WebSocket connections count */
|
||||||
|
export const websocketConnections = new promClient.Gauge({
|
||||||
|
name: 'moltbot_websocket_connections',
|
||||||
|
help: 'Number of active WebSocket connections',
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
/** Gateway uptime in seconds */
|
||||||
|
export const uptimeSeconds = new promClient.Gauge({
|
||||||
|
name: 'moltbot_uptime_seconds',
|
||||||
|
help: 'Gateway uptime in seconds',
|
||||||
|
registers: [metricsRegistry],
|
||||||
|
});
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Helper functions
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
/** Update the uptime gauge - call periodically */
|
||||||
|
export function updateUptime(): void {
|
||||||
|
uptimeSeconds.set((Date.now() - startTime) / 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get metrics in Prometheus text format */
|
||||||
|
export async function getMetricsText(): Promise<string> {
|
||||||
|
updateUptime();
|
||||||
|
return metricsRegistry.metrics();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Get the content type for Prometheus metrics */
|
||||||
|
export function getMetricsContentType(): string {
|
||||||
|
return metricsRegistry.contentType;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convenience export for common usage patterns
|
||||||
|
export const metrics = {
|
||||||
|
messagesTotal,
|
||||||
|
httpRequestsTotal,
|
||||||
|
errorsTotal,
|
||||||
|
aiCallsTotal,
|
||||||
|
httpRequestDuration,
|
||||||
|
aiLatency,
|
||||||
|
messageProcessingDuration,
|
||||||
|
activeSessions,
|
||||||
|
connectedChannels,
|
||||||
|
websocketConnections,
|
||||||
|
uptimeSeconds,
|
||||||
|
getMetricsText,
|
||||||
|
getMetricsContentType,
|
||||||
|
updateUptime,
|
||||||
|
registry: metricsRegistry,
|
||||||
|
};
|
||||||
|
|
||||||
|
export default metrics;
|
||||||
133
src/infra/otel.ts
Normal file
133
src/infra/otel.ts
Normal file
@ -0,0 +1,133 @@
|
|||||||
|
/**
|
||||||
|
* OpenTelemetry SDK initialization for Moltbot.
|
||||||
|
*
|
||||||
|
* This module sets up distributed tracing with automatic HTTP instrumentation.
|
||||||
|
* Import this at the very start of your application entry point.
|
||||||
|
*
|
||||||
|
* Usage:
|
||||||
|
* import './infra/otel.js'; // Must be first import
|
||||||
|
* import { trace } from '@opentelemetry/api';
|
||||||
|
*/
|
||||||
|
|
||||||
|
import { NodeSDK } from '@opentelemetry/sdk-node';
|
||||||
|
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
|
||||||
|
import { trace, context, SpanStatusCode, type Span } from '@opentelemetry/api';
|
||||||
|
|
||||||
|
// SDK configuration - only enable if OTEL_ENABLED is set
|
||||||
|
const isOtelEnabled = process.env.OTEL_ENABLED === 'true' || process.env.OTEL_ENABLED === '1';
|
||||||
|
|
||||||
|
let sdk: NodeSDK | null = null;
|
||||||
|
|
||||||
|
if (isOtelEnabled) {
|
||||||
|
sdk = new NodeSDK({
|
||||||
|
serviceName: 'moltbot',
|
||||||
|
instrumentations: [
|
||||||
|
getNodeAutoInstrumentations({
|
||||||
|
// Disable some noisy instrumentations
|
||||||
|
'@opentelemetry/instrumentation-fs': { enabled: false },
|
||||||
|
'@opentelemetry/instrumentation-dns': { enabled: false },
|
||||||
|
}),
|
||||||
|
],
|
||||||
|
});
|
||||||
|
|
||||||
|
// Start the SDK
|
||||||
|
sdk.start();
|
||||||
|
|
||||||
|
// Graceful shutdown
|
||||||
|
process.on('SIGTERM', () => {
|
||||||
|
sdk?.shutdown()
|
||||||
|
.then(() => console.log('[otel] Tracing terminated'))
|
||||||
|
.catch((error) => console.error('[otel] Error terminating tracing', error))
|
||||||
|
.finally(() => process.exit(0));
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log('[otel] OpenTelemetry SDK initialized');
|
||||||
|
} else {
|
||||||
|
console.log('[otel] OpenTelemetry disabled (set OTEL_ENABLED=true to enable)');
|
||||||
|
}
|
||||||
|
|
||||||
|
// =============================================================================
|
||||||
|
// Helper functions for manual instrumentation
|
||||||
|
// =============================================================================
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the current tracer for manual span creation.
|
||||||
|
*/
|
||||||
|
export function getTracer(name: string = 'moltbot') {
|
||||||
|
return trace.getTracer(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the current span context for correlation.
|
||||||
|
*/
|
||||||
|
export function getCurrentSpan() {
|
||||||
|
return trace.getActiveSpan();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the current trace ID for log correlation.
|
||||||
|
*/
|
||||||
|
export function getCurrentTraceId(): string | undefined {
|
||||||
|
const span = getCurrentSpan();
|
||||||
|
if (!span) return undefined;
|
||||||
|
return span.spanContext().traceId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the current span ID for log correlation.
|
||||||
|
*/
|
||||||
|
export function getCurrentSpanId(): string | undefined {
|
||||||
|
const span = getCurrentSpan();
|
||||||
|
if (!span) return undefined;
|
||||||
|
return span.spanContext().spanId;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a custom span for a specific operation.
|
||||||
|
*
|
||||||
|
* @example
|
||||||
|
* await withSpan('ai.completion', { provider: 'openai' }, async (span) => {
|
||||||
|
* const result = await callOpenAI();
|
||||||
|
* span.setAttribute('tokens', result.usage.total_tokens);
|
||||||
|
* return result;
|
||||||
|
* });
|
||||||
|
*/
|
||||||
|
export async function withSpan<T>(
|
||||||
|
name: string,
|
||||||
|
attributes: Record<string, string | number | boolean> = {},
|
||||||
|
fn: (span: ReturnType<typeof trace.getTracer>['startSpan'] extends (name: string) => infer R ? R : never) => Promise<T>,
|
||||||
|
): Promise<T> {
|
||||||
|
const tracer = getTracer();
|
||||||
|
const span = tracer.startSpan(name);
|
||||||
|
|
||||||
|
for (const [key, value] of Object.entries(attributes)) {
|
||||||
|
span.setAttribute(key, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await context.with(trace.setSpan(context.active(), span), () => fn(span));
|
||||||
|
span.setStatus({ code: SpanStatusCode.OK });
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
span.setStatus({
|
||||||
|
code: SpanStatusCode.ERROR,
|
||||||
|
message: error instanceof Error ? error.message : 'Unknown error',
|
||||||
|
});
|
||||||
|
span.recordException(error instanceof Error ? error : new Error(String(error)));
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
span.end();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add trace context to a log record.
|
||||||
|
*/
|
||||||
|
export function getTraceContext(): { traceId?: string; spanId?: string } {
|
||||||
|
return {
|
||||||
|
traceId: getCurrentTraceId(),
|
||||||
|
spanId: getCurrentSpanId(),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export { sdk as otelSdk };
|
||||||
Loading…
Reference in New Issue
Block a user