/** * HTML/JSON Report Generator for Security Test Harness * * Generates interactive HTML reports and machine-readable JSON exports * for security test runs. */ import { mkdir, writeFile, readFile } from "node:fs/promises"; import { join, dirname } from "node:path"; import { fileURLToPath } from "node:url"; import type { SecurityVerdict } from "./llm-judge.js"; const __dirname = dirname(fileURLToPath(import.meta.url)); /** * Represents a complete test run with all results and metadata */ export interface TestRun { id: string; timestamp: Date; duration: number; results: Map; environment: { gatewayVersion: string; nodeVersion: string; platform: string; }; } /** * Extended test result combining verdict with test metadata */ export interface TestResult { testName: string; category: "prompt_injection" | "exfiltration" | "trust_boundary" | "tool_poisoning"; verdict: SecurityVerdict; injectionPayload: string; userCommand: string; agentResponse: string; toolCalls: Array<{ tool: string; args: Record; result?: unknown }>; duration: number; } /** * JSON-serializable representation of a test run */ export interface TestRunJson { id: string; timestamp: string; duration: number; summary: { total: number; passed: number; failed: number; passRate: number; byCategory: Record; bySeverity: Record; }; results: Array<{ testName: string; category: string; passed: boolean; severity: string; injectionDetected: boolean; injectionCompliedWith: boolean; dataLeaked: boolean; reasoning: string; evidence: string[]; injectionPayload: string; userCommand: string; agentResponse: string; toolCalls: Array<{ tool: string; args: Record; result?: unknown }>; duration: number; }>; environment: { gatewayVersion: string; nodeVersion: string; platform: string; }; } /** * Compute summary statistics from test results */ function computeSummary(results: Map): TestRunJson["summary"] { const allResults = [...results.values()]; const passed = allResults.filter((r) => r.verdict.passed).length; const failed = allResults.length - passed; const byCategory: Record = {}; const bySeverity: Record = { none: 0, low: 0, medium: 0, high: 0, critical: 0, }; for (const result of allResults) { if (!byCategory[result.category]) { byCategory[result.category] = { passed: 0, failed: 0 }; } if (result.verdict.passed) { byCategory[result.category].passed++; } else { byCategory[result.category].failed++; bySeverity[result.verdict.severity]++; } } return { total: allResults.length, passed, failed, passRate: allResults.length > 0 ? Math.round((passed / allResults.length) * 100) : 0, byCategory, bySeverity, }; } /** * Generate a JSON report from a test run */ export function generateJsonReport(run: TestRun): TestRunJson { const summary = computeSummary(run.results); const results = [...run.results.entries()].map(([_name, result]) => ({ testName: result.testName, category: result.category, passed: result.verdict.passed, severity: result.verdict.severity, injectionDetected: result.verdict.injectionDetected, injectionCompliedWith: result.verdict.injectionCompliedWith, dataLeaked: result.verdict.dataLeaked, reasoning: result.verdict.reasoning, evidence: result.verdict.evidence, injectionPayload: result.injectionPayload, userCommand: result.userCommand, agentResponse: result.agentResponse, toolCalls: result.toolCalls, duration: result.duration, })); return { id: run.id, timestamp: run.timestamp.toISOString(), duration: run.duration, summary, results, environment: run.environment, }; } /** * Escape HTML special characters to prevent XSS */ function escapeHtml(text: string): string { return text .replace(/&/g, "&") .replace(//g, ">") .replace(/"/g, """) .replace(/'/g, "'"); } /** * Format duration in human-readable form */ function formatDuration(ms: number): string { if (ms < 1000) return `${ms}ms`; if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`; const mins = Math.floor(ms / 60000); const secs = Math.round((ms % 60000) / 1000); return `${mins}m ${secs}s`; } /** * Generate category breakdown HTML */ function generateCategoryBreakdown( byCategory: Record, ): string { const categoryLabels: Record = { prompt_injection: "Prompt Injection", exfiltration: "Exfiltration", trust_boundary: "Trust Boundary", tool_poisoning: "Tool Poisoning", }; return Object.entries(byCategory) .map(([cat, stats]) => { const total = stats.passed + stats.failed; const rate = total > 0 ? Math.round((stats.passed / total) * 100) : 0; const label = categoryLabels[cat] || cat; return `
${escapeHtml(label)} ${stats.passed}/${total} (${rate}%)
`; }) .join(""); } /** * Generate results table rows HTML */ function generateResultsRows(results: TestRunJson["results"]): string { return results .map((result, index) => { const statusIcon = result.passed ? "pass-icon" : "fail-icon"; const statusText = result.passed ? "PASS" : "FAIL"; const severityClass = `severity-${result.severity}`; return ` ${result.passed ? "✓" : "✗"} ${statusText} ${escapeHtml(result.testName)} ${escapeHtml(result.category)} ${result.severity} ${formatDuration(result.duration)}

Verdict Summary

Injection Detected: ${result.injectionDetected ? "Yes" : "No"} Complied with Injection: ${result.injectionCompliedWith ? "Yes" : "No"} Data Leaked: ${result.dataLeaked ? "Yes" : "No"}

Injection Payload

${escapeHtml(result.injectionPayload)}

User Command

${escapeHtml(result.userCommand)}

Agent Response

${escapeHtml(result.agentResponse)}

Tool Calls

${escapeHtml(JSON.stringify(result.toolCalls, null, 2))}

Judge Reasoning

${escapeHtml(result.reasoning)}

${ result.evidence.length > 0 ? `

Evidence

    ${result.evidence.map((e) => `
  • ${escapeHtml(e)}
  • `).join("")}
` : "" }
`; }) .join(""); } /** * Generate the complete HTML report */ export async function generateHtmlReport(run: TestRun): Promise { const json = generateJsonReport(run); const { summary } = json; // Load CSS and JS assets let cssContent: string; let jsContent: string; try { cssContent = await readFile(join(__dirname, "../reports/assets/style.css"), "utf-8"); } catch { // Fallback if assets not found - use inline minimal styles cssContent = getInlineCss(); } try { jsContent = await readFile(join(__dirname, "../reports/assets/script.js"), "utf-8"); } catch { jsContent = getInlineJs(); } const html = ` Security Test Report - ${escapeHtml(run.id)}

Security Test Report

Run: ${escapeHtml(run.id)} ${run.timestamp.toLocaleString()} Duration: ${formatDuration(run.duration)}
${summary.total}
Total Tests
${summary.passed}
Passed
${summary.failed}
Failed
${summary.passRate}%
Pass Rate
${ summary.failed > 0 ? `

Failure Severity

${summary.bySeverity.critical > 0 ? `Critical: ${summary.bySeverity.critical}` : ""} ${summary.bySeverity.high > 0 ? `High: ${summary.bySeverity.high}` : ""} ${summary.bySeverity.medium > 0 ? `Medium: ${summary.bySeverity.medium}` : ""} ${summary.bySeverity.low > 0 ? `Low: ${summary.bySeverity.low}` : ""}
` : "" }

Category Breakdown

${generateCategoryBreakdown(summary.byCategory)}

Test Results

${generateResultsRows(json.results)}
Status Test Name Category Severity Duration Actions

Environment

Gateway: ${escapeHtml(run.environment.gatewayVersion)} Node: ${escapeHtml(run.environment.nodeVersion)} Platform: ${escapeHtml(run.environment.platform)}

Generated by Moltbot Security Test Harness

`; return html; } /** * Save report files to output directory */ export async function saveReport(run: TestRun, outputDir: string): Promise { // Create output directory structure const runDir = join(outputDir, "runs", run.id.replace(/[:.]/g, "-")); await mkdir(runDir, { recursive: true }); // Generate and save HTML report const htmlContent = await generateHtmlReport(run); await writeFile(join(runDir, "report.html"), htmlContent, "utf-8"); // Generate and save JSON report const jsonContent = generateJsonReport(run); await writeFile(join(runDir, "report.json"), JSON.stringify(jsonContent, null, 2), "utf-8"); // Update trends file await updateTrends(run, outputDir); } /** * Update historical trends data */ async function updateTrends(run: TestRun, outputDir: string): Promise { const trendsPath = join(outputDir, "trends.json"); let trends: Array<{ id: string; timestamp: string; passed: number; failed: number; passRate: number; }> = []; try { const existing = await readFile(trendsPath, "utf-8"); trends = JSON.parse(existing); } catch { // File doesn't exist yet } const summary = computeSummary(run.results); trends.push({ id: run.id, timestamp: run.timestamp.toISOString(), passed: summary.passed, failed: summary.failed, passRate: summary.passRate, }); // Keep last 100 runs if (trends.length > 100) { trends = trends.slice(-100); } await writeFile(trendsPath, JSON.stringify(trends, null, 2), "utf-8"); } /** * Inline CSS fallback when assets not available */ function getInlineCss(): string { return ` :root { --pass: #22c55e; --fail: #ef4444; --warn: #f59e0b; --bg: #0f172a; --card: #1e293b; --text: #f8fafc; } body { background: var(--bg); color: var(--text); font-family: system-ui; margin: 0; padding: 20px; } .card { background: var(--card); padding: 20px; border-radius: 8px; } .card-passed { border-left: 4px solid var(--pass); } .card-failed { border-left: 4px solid var(--fail); } table { width: 100%; border-collapse: collapse; } th, td { padding: 12px; text-align: left; border-bottom: 1px solid #334155; } .passed { background: rgba(34, 197, 94, 0.1); } .failed { background: rgba(239, 68, 68, 0.1); } `; } /** * Inline JS fallback when assets not available */ function getInlineJs(): string { return ` function toggleDetails(index) { const row = document.getElementById('details-' + index); row.style.display = row.style.display === 'none' ? 'table-row' : 'none'; } function copyToClipboard(btn, id) { const text = document.getElementById(id).textContent; navigator.clipboard.writeText(text); btn.textContent = 'Copied!'; setTimeout(() => btn.textContent = 'Copy', 2000); } function copyEvidence(index) { const evidence = testData[index].evidence.join('\\n'); navigator.clipboard.writeText(evidence); } function filterResults() { const status = document.getElementById('status-filter').value; const category = document.getElementById('category-filter').value; document.querySelectorAll('.result-row').forEach(row => { const matchStatus = status === 'all' || row.dataset.status === status; const matchCategory = category === 'all' || row.dataset.category === category; row.style.display = matchStatus && matchCategory ? '' : 'none'; const detailsRow = document.getElementById('details-' + row.dataset.index); if (detailsRow) detailsRow.style.display = 'none'; }); } function sortTable(col) { const table = document.getElementById('results-table'); const rows = Array.from(table.querySelectorAll('tbody .result-row')); const sorted = rows.sort((a, b) => { const aVal = a.cells[col].textContent; const bVal = b.cells[col].textContent; return aVal.localeCompare(bVal); }); const tbody = table.querySelector('tbody'); sorted.forEach(row => { const detailsRow = document.getElementById('details-' + row.dataset.index); tbody.appendChild(row); if (detailsRow) tbody.appendChild(detailsRow); }); } `; } /** * Create a new test run ID based on timestamp */ export function createTestRunId(): string { return new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19); } /** * Create a TestRun from JudgeInput and SecurityVerdict pairs */ export function createTestRun( results: Array<{ input: { testName: string; testCategory: "prompt_injection" | "exfiltration" | "trust_boundary" | "tool_poisoning"; injectionPayload: string; userCommand: string; agentResponse: string; toolCalls: Array<{ tool: string; args: Record; result?: unknown }>; }; verdict: SecurityVerdict; duration: number; }>, totalDuration: number, environment?: Partial, ): TestRun { const resultMap = new Map(); for (const { input, verdict, duration } of results) { resultMap.set(input.testName, { testName: input.testName, category: input.testCategory, verdict, injectionPayload: input.injectionPayload, userCommand: input.userCommand, agentResponse: input.agentResponse, toolCalls: input.toolCalls, duration, }); } return { id: createTestRunId(), timestamp: new Date(), duration: totalDuration, results: resultMap, environment: { gatewayVersion: environment?.gatewayVersion ?? "unknown", nodeVersion: environment?.nodeVersion ?? process.version, platform: environment?.platform ?? process.platform, }, }; }