- Robot-client: TelemetryReporter collects system metrics (CPU, RAM, disk, WiFi) and sends them to backend every 60s via WebSocket - Robot-client: LogForwarder buffers Pino logs and flushes them in batches every 5s to the backend via WebSocket - Backend: HealthReport entity + HealthTelemetryService with alert thresholds (CPU >80°C, RAM >90%, disk >90%, load >3.0, heap >85%) - Backend: DeviceLog entity + LogIngestionService with EventEmitter2 for SSE - Backend: REST endpoints GET /devices/:id/health/reports and /alerts - Backend: REST endpoint GET /devices/:id/logs with filtering (level, logger, search) - Backend: SSE endpoint GET /admin/logs/stream for real-time log streaming - Migrations for health_reports and device_logs tables with proper indexes Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
169 lines
4.6 KiB
TypeScript
169 lines
4.6 KiB
TypeScript
import { readFileSync, statfsSync } from 'node:fs';
|
|
import { freemem, totalmem, loadavg } from 'node:os';
|
|
import { execSync } from 'node:child_process';
|
|
import { type CloudSocket } from '../transport/cloud-socket.js';
|
|
import { createLogger, type Logger } from '../utils/index.js';
|
|
|
|
// Must match backend HealthReportPayload
|
|
interface HealthReportPayload {
|
|
cpuTempCelsius: number;
|
|
memoryUsedMb: number;
|
|
memoryTotalMb: number;
|
|
diskUsedPercent: number;
|
|
loadAvg1m: number;
|
|
heapUsedMb: number;
|
|
heapTotalMb: number;
|
|
uptimeSeconds: number;
|
|
wifiSsid: string | null;
|
|
wifiSignalDbm: number | null;
|
|
clientVersion: string;
|
|
nodeVersion: string;
|
|
reportedAt: string;
|
|
}
|
|
|
|
/**
|
|
* Periodically collects system metrics and sends them to the backend
|
|
* via the existing Socket.IO connection.
|
|
*/
|
|
export class TelemetryReporter {
|
|
private readonly logger: Logger;
|
|
private interval: ReturnType<typeof setInterval> | null = null;
|
|
private readonly clientVersion: string;
|
|
|
|
constructor(
|
|
private readonly cloudSocket: CloudSocket,
|
|
clientVersion = '0.0.1',
|
|
) {
|
|
this.logger = createLogger('telemetry', 'info');
|
|
this.clientVersion = clientVersion;
|
|
}
|
|
|
|
/**
|
|
* Start reporting at the given interval.
|
|
* Default: every 60 seconds.
|
|
*/
|
|
start(intervalMs = 60_000): void {
|
|
this.logger.info({ intervalMs }, 'Telemetry reporter started');
|
|
|
|
// Send initial report immediately
|
|
this.report();
|
|
|
|
this.interval = setInterval(() => {
|
|
this.report();
|
|
}, intervalMs);
|
|
}
|
|
|
|
stop(): void {
|
|
if (this.interval) {
|
|
clearInterval(this.interval);
|
|
this.interval = null;
|
|
}
|
|
}
|
|
|
|
private report(): void {
|
|
if (!this.cloudSocket.isConnected) {
|
|
this.logger.debug('Skipping telemetry report: not connected');
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const payload = this.collectMetrics();
|
|
// Emit via the existing socket — the backend RobotGateway
|
|
// handles 'health_report' events
|
|
this.cloudSocket.emitRaw('health_report', payload);
|
|
this.logger.debug({ payload }, 'Health report sent');
|
|
} catch (err) {
|
|
this.logger.warn({ err }, 'Failed to collect/send telemetry');
|
|
}
|
|
}
|
|
|
|
private collectMetrics(): HealthReportPayload {
|
|
const mem = process.memoryUsage();
|
|
const totalMb = totalmem() / (1024 * 1024);
|
|
const freeMb = freemem() / (1024 * 1024);
|
|
|
|
return {
|
|
cpuTempCelsius: this.getCpuTemp(),
|
|
memoryUsedMb: round(totalMb - freeMb),
|
|
memoryTotalMb: round(totalMb),
|
|
diskUsedPercent: this.getDiskUsage(),
|
|
loadAvg1m: round(loadavg()[0]),
|
|
heapUsedMb: round(mem.heapUsed / (1024 * 1024)),
|
|
heapTotalMb: round(mem.heapTotal / (1024 * 1024)),
|
|
uptimeSeconds: Math.floor(process.uptime()),
|
|
wifiSsid: this.getWifiSsid(),
|
|
wifiSignalDbm: this.getWifiSignal(),
|
|
clientVersion: this.clientVersion,
|
|
nodeVersion: process.version,
|
|
reportedAt: new Date().toISOString(),
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Read CPU temperature from thermal zone (Linux only).
|
|
*/
|
|
private getCpuTemp(): number {
|
|
try {
|
|
const raw = readFileSync('/sys/class/thermal/thermal_zone0/temp', 'utf-8');
|
|
return round(parseInt(raw, 10) / 1000);
|
|
} catch {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get disk usage for the root partition.
|
|
*/
|
|
private getDiskUsage(): number {
|
|
try {
|
|
const stats = statfsSync('/');
|
|
const totalBlocks = stats.blocks;
|
|
const freeBlocks = stats.bfree;
|
|
return round(((totalBlocks - freeBlocks) / totalBlocks) * 100);
|
|
} catch {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get current WiFi SSID via nmcli.
|
|
*/
|
|
private getWifiSsid(): string | null {
|
|
try {
|
|
const result = execSync('nmcli -t -f active,ssid dev wifi', {
|
|
encoding: 'utf-8',
|
|
timeout: 3000,
|
|
});
|
|
const active = result.split('\n').find((l) => l.startsWith('yes:'));
|
|
return active ? active.split(':')[1] || null : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Get WiFi signal strength in dBm via nmcli.
|
|
*/
|
|
private getWifiSignal(): number | null {
|
|
try {
|
|
const result = execSync('nmcli -t -f active,signal dev wifi', {
|
|
encoding: 'utf-8',
|
|
timeout: 3000,
|
|
});
|
|
const active = result.split('\n').find((l) => l.startsWith('yes:'));
|
|
if (!active) return null;
|
|
const signal = parseInt(active.split(':')[1], 10);
|
|
// nmcli reports signal as 0-100 percentage; approximate dBm
|
|
// -30 dBm = 100%, -90 dBm = 0%
|
|
return Math.round(-90 + (signal / 100) * 60);
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|
|
}
|
|
|
|
function round(n: number, decimals = 1): number {
|
|
const factor = Math.pow(10, decimals);
|
|
return Math.round(n * factor) / factor;
|
|
}
|