ti-pote/apps/robot-client/src/services/telemetry-reporter.ts
ordinarthur 096f772da8 feat: add health telemetry and centralized log system (Phase 2 & 3)
- Robot-client: TelemetryReporter collects system metrics (CPU, RAM, disk, WiFi)
  and sends them to backend every 60s via WebSocket
- Robot-client: LogForwarder buffers Pino logs and flushes them in batches
  every 5s to the backend via WebSocket
- Backend: HealthReport entity + HealthTelemetryService with alert thresholds
  (CPU >80°C, RAM >90%, disk >90%, load >3.0, heap >85%)
- Backend: DeviceLog entity + LogIngestionService with EventEmitter2 for SSE
- Backend: REST endpoints GET /devices/:id/health/reports and /alerts
- Backend: REST endpoint GET /devices/:id/logs with filtering (level, logger, search)
- Backend: SSE endpoint GET /admin/logs/stream for real-time log streaming
- Migrations for health_reports and device_logs tables with proper indexes

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-04-13 21:11:53 +02:00

169 lines
4.6 KiB
TypeScript

import { readFileSync, statfsSync } from 'node:fs';
import { freemem, totalmem, loadavg } from 'node:os';
import { execSync } from 'node:child_process';
import { type CloudSocket } from '../transport/cloud-socket.js';
import { createLogger, type Logger } from '../utils/index.js';
// Must match backend HealthReportPayload
interface HealthReportPayload {
cpuTempCelsius: number;
memoryUsedMb: number;
memoryTotalMb: number;
diskUsedPercent: number;
loadAvg1m: number;
heapUsedMb: number;
heapTotalMb: number;
uptimeSeconds: number;
wifiSsid: string | null;
wifiSignalDbm: number | null;
clientVersion: string;
nodeVersion: string;
reportedAt: string;
}
/**
* Periodically collects system metrics and sends them to the backend
* via the existing Socket.IO connection.
*/
export class TelemetryReporter {
private readonly logger: Logger;
private interval: ReturnType<typeof setInterval> | null = null;
private readonly clientVersion: string;
constructor(
private readonly cloudSocket: CloudSocket,
clientVersion = '0.0.1',
) {
this.logger = createLogger('telemetry', 'info');
this.clientVersion = clientVersion;
}
/**
* Start reporting at the given interval.
* Default: every 60 seconds.
*/
start(intervalMs = 60_000): void {
this.logger.info({ intervalMs }, 'Telemetry reporter started');
// Send initial report immediately
this.report();
this.interval = setInterval(() => {
this.report();
}, intervalMs);
}
stop(): void {
if (this.interval) {
clearInterval(this.interval);
this.interval = null;
}
}
private report(): void {
if (!this.cloudSocket.isConnected) {
this.logger.debug('Skipping telemetry report: not connected');
return;
}
try {
const payload = this.collectMetrics();
// Emit via the existing socket — the backend RobotGateway
// handles 'health_report' events
this.cloudSocket.emitRaw('health_report', payload);
this.logger.debug({ payload }, 'Health report sent');
} catch (err) {
this.logger.warn({ err }, 'Failed to collect/send telemetry');
}
}
private collectMetrics(): HealthReportPayload {
const mem = process.memoryUsage();
const totalMb = totalmem() / (1024 * 1024);
const freeMb = freemem() / (1024 * 1024);
return {
cpuTempCelsius: this.getCpuTemp(),
memoryUsedMb: round(totalMb - freeMb),
memoryTotalMb: round(totalMb),
diskUsedPercent: this.getDiskUsage(),
loadAvg1m: round(loadavg()[0]),
heapUsedMb: round(mem.heapUsed / (1024 * 1024)),
heapTotalMb: round(mem.heapTotal / (1024 * 1024)),
uptimeSeconds: Math.floor(process.uptime()),
wifiSsid: this.getWifiSsid(),
wifiSignalDbm: this.getWifiSignal(),
clientVersion: this.clientVersion,
nodeVersion: process.version,
reportedAt: new Date().toISOString(),
};
}
/**
* Read CPU temperature from thermal zone (Linux only).
*/
private getCpuTemp(): number {
try {
const raw = readFileSync('/sys/class/thermal/thermal_zone0/temp', 'utf-8');
return round(parseInt(raw, 10) / 1000);
} catch {
return -1;
}
}
/**
* Get disk usage for the root partition.
*/
private getDiskUsage(): number {
try {
const stats = statfsSync('/');
const totalBlocks = stats.blocks;
const freeBlocks = stats.bfree;
return round(((totalBlocks - freeBlocks) / totalBlocks) * 100);
} catch {
return -1;
}
}
/**
* Get current WiFi SSID via nmcli.
*/
private getWifiSsid(): string | null {
try {
const result = execSync('nmcli -t -f active,ssid dev wifi', {
encoding: 'utf-8',
timeout: 3000,
});
const active = result.split('\n').find((l) => l.startsWith('yes:'));
return active ? active.split(':')[1] || null : null;
} catch {
return null;
}
}
/**
* Get WiFi signal strength in dBm via nmcli.
*/
private getWifiSignal(): number | null {
try {
const result = execSync('nmcli -t -f active,signal dev wifi', {
encoding: 'utf-8',
timeout: 3000,
});
const active = result.split('\n').find((l) => l.startsWith('yes:'));
if (!active) return null;
const signal = parseInt(active.split(':')[1], 10);
// nmcli reports signal as 0-100 percentage; approximate dBm
// -30 dBm = 100%, -90 dBm = 0%
return Math.round(-90 + (signal / 100) * 60);
} catch {
return null;
}
}
}
function round(n: number, decimals = 1): number {
const factor = Math.pow(10, decimals);
return Math.round(n * factor) / factor;
}