From fb6fa9b16caa33eabeb44414d4681cea52d11784 Mon Sep 17 00:00:00 2001 From: Christina Ying Wang Date: Mon, 18 Nov 2024 13:41:41 -0800 Subject: [PATCH] Add ability to stream logs from host services to cloud Add `os-power-mode.service`, `nvpmodel.service`, and `os-fan-profile.service` which report status from applying power mode and fan profile configs as read from config.json. The Supervisor sets these configs in config.json for these host services to pick up and apply. Also add host log streaming from `jetson-qspi-manager.service` as it will very soon be needed for Jetson Orins. Relates-to: #2379 See: balena-io/open-balena-api#1792 See: balena-os/balena-jetson-orin#513 Change-type: minor Signed-off-by: Christina Ying Wang --- src/lib/journald.ts | 14 ++++++-- src/logging/balena-backend.ts | 2 +- src/logging/index.ts | 5 ++- src/logging/local-backend.ts | 2 +- src/logging/log-backend.ts | 16 +-------- src/logging/monitor.ts | 66 ++++++++++++++++++++++++++++------ src/logging/types.ts | 13 +++++++ test/unit/lib/journald.spec.ts | 3 ++ 8 files changed, 90 insertions(+), 31 deletions(-) create mode 100644 src/logging/types.ts diff --git a/src/lib/journald.ts b/src/lib/journald.ts index 5079819b1..4d83ece4d 100644 --- a/src/lib/journald.ts +++ b/src/lib/journald.ts @@ -21,7 +21,7 @@ export interface SpawnJournalctlOpts { unit?: string; containerId?: string; format: string; - filterString?: string; + filter?: string | string[]; since?: string; until?: string; } @@ -57,8 +57,16 @@ export function spawnJournalctl(opts: SpawnJournalctlOpts): ChildProcess { args.push('-o'); args.push(opts.format); - if (opts.filterString) { - args.push(opts.filterString); + if (opts.filter != null) { + // A single filter argument without spaces can be passed as a string + if (typeof opts.filter === 'string') { + args.push(opts.filter); + } else { + // Multiple filter arguments need to be passed as an array of strings + // instead of a single string with spaces, as `spawn` will interpret + // the single string as a single argument to journalctl, which is invalid. + args.push(...opts.filter); + } } log.debug('Spawning journalctl', args.join(' ')); diff --git a/src/logging/balena-backend.ts b/src/logging/balena-backend.ts index 39f9a1d2c..44df25e2b 100644 --- a/src/logging/balena-backend.ts +++ b/src/logging/balena-backend.ts @@ -6,7 +6,7 @@ import url from 'url'; import zlib from 'zlib'; import { setTimeout } from 'timers/promises'; -import type { LogMessage } from './log-backend'; +import type { LogMessage } from './types'; import { LogBackend } from './log-backend'; import log from '../lib/supervisor-console'; diff --git a/src/logging/index.ts b/src/logging/index.ts index fdb84c455..8973003d4 100644 --- a/src/logging/index.ts +++ b/src/logging/index.ts @@ -7,7 +7,8 @@ import type { LogType } from '../lib/log-types'; import { takeGlobalLockRW } from '../lib/process-lock'; import { BalenaLogBackend } from './balena-backend'; import { LocalLogBackend } from './local-backend'; -import type { LogBackend, LogMessage } from './log-backend'; +import type { LogBackend } from './log-backend'; +import type { LogMessage } from './types'; import logMonitor from './monitor'; import * as globalEventBus from '../event-bus'; @@ -49,6 +50,8 @@ export const initialized = _.once(async () => { backend.unmanaged = unmanaged; backend.publishEnabled = loggingEnabled; + logMonitor.attachSystemLogger(log); + if (!balenaBackend.isInitialised()) { globalEventBus.getInstance().once('deviceProvisioned', async () => { const conf = await config.getMany([ diff --git a/src/logging/local-backend.ts b/src/logging/local-backend.ts index 51a6ee787..dad0c0cb6 100644 --- a/src/logging/local-backend.ts +++ b/src/logging/local-backend.ts @@ -2,7 +2,7 @@ import _ from 'lodash'; import { Readable } from 'stream'; import { checkInt } from '../lib/validation'; -import type { LogMessage } from './log-backend'; +import type { LogMessage } from './types'; import { LogBackend } from './log-backend'; import log from '../lib/supervisor-console'; diff --git a/src/logging/log-backend.ts b/src/logging/log-backend.ts index 8c7239ac2..27a4ce0b9 100644 --- a/src/logging/log-backend.ts +++ b/src/logging/log-backend.ts @@ -1,18 +1,4 @@ -type BaseLogMessage = { - message: string; - isStdErr?: boolean; - timestamp: number; -}; -export type LogMessage = BaseLogMessage & - ( - | { - serviceId: number; - isSystem?: false; - } - | { - isSystem: true; - } - ); +import type { LogMessage } from './types'; export abstract class LogBackend { public unmanaged: boolean; diff --git a/src/logging/monitor.ts b/src/logging/monitor.ts index a2219f722..d8424b7de 100644 --- a/src/logging/monitor.ts +++ b/src/logging/monitor.ts @@ -1,14 +1,12 @@ import { pipeline } from 'stream/promises'; +import { setTimeout } from 'timers/promises'; import { spawnJournalctl, toJournalDate } from '../lib/journald'; import log from '../lib/supervisor-console'; -import { setTimeout } from 'timers/promises'; +import type { SystemLogMessage, BaseLogMessage } from './types'; -export type MonitorHook = (message: { - message: string; - isStdErr: boolean; - timestamp: number; -}) => Resolvable; +type MonitorHook = (message: BaseLogMessage) => Promise; +type SystemMonitorHook = (message: SystemLogMessage) => Promise; // This is nowhere near the amount of fields provided by journald, but simply the ones // that we are interested in @@ -18,12 +16,25 @@ interface JournalRow { MESSAGE: string | number[]; PRIORITY: string; __REALTIME_TIMESTAMP: string; + _SYSTEMD_UNIT: string; } // Wait 5s when journalctl failed before trying to read the logs again const JOURNALCTL_ERROR_RETRY_DELAY = 5000; const JOURNALCTL_ERROR_RETRY_DELAY_MAX = 15 * 60 * 1000; +// Additional host services we want to stream the logs for +const HOST_SERVICES = [ + // Balena service which applies power mode to config file on boot + 'os-power-mode.service', + // Balena service which applies fan profile to device at runtime + 'os-fan-profile.service', + // Nvidia power daemon which logs result from applying power mode from config file to device + 'nvpmodel.service', + // Runs at boot time and checks if Orin QSPI is accessible after provisioning + 'jetson-qspi-manager.service', +]; + function messageFieldToString(entry: JournalRow['MESSAGE']): string | null { if (Array.isArray(entry)) { return String.fromCharCode(...entry); @@ -60,6 +71,9 @@ class LogMonitor { hook: MonitorHook; }; } = {}; + private systemHook: SystemMonitorHook = async () => { + /* Default empty hook */ + }; private setupAttempts = 0; // Only stream logs since the start of the supervisor @@ -72,11 +86,16 @@ class LogMonitor { all: true, follow: true, format: 'json', - filterString: '_SYSTEMD_UNIT=balena.service', + filter: [ + // Monitor logs from balenad by default for container log-streaming + 'balena.service', + // Add any host services we want to stream + ...HOST_SERVICES, + ].map((s) => `_SYSTEMD_UNIT=${s}`), since: toJournalDate(this.lastSentTimestamp), }); if (!stdout) { - // this will be catched below + // This error will be caught below throw new Error('failed to open process stream'); } @@ -96,6 +115,8 @@ class LogMonitor { self.containers[row.CONTAINER_ID_FULL] ) { await self.handleRow(row); + } else if (HOST_SERVICES.includes(row._SYSTEMD_UNIT)) { + await self.handleHostServiceRow(row); } } catch { // ignore parsing errors @@ -135,6 +156,10 @@ class LogMonitor { delete this.containers[containerId]; } + public attachSystemLogger(hook: SystemMonitorHook) { + this.systemHook = hook; + } + private async handleRow(row: JournalRow) { if ( row.CONTAINER_ID_FULL == null || @@ -151,12 +176,33 @@ class LogMonitor { if (message == null) { return; } - const isStdErr = row.PRIORITY === '3'; + const isStdErr = parseInt(row.PRIORITY, 10) <= 3; const timestamp = Math.floor(Number(row.__REALTIME_TIMESTAMP) / 1000); // microseconds to milliseconds - await this.containers[containerId].hook({ message, isStdErr, timestamp }); + await this.containers[containerId].hook({ + message, + isStdErr, + timestamp, + }); this.lastSentTimestamp = timestamp; } + + private async handleHostServiceRow( + row: JournalRow & { _SYSTEMD_UNIT: string }, + ) { + const message = messageFieldToString(row.MESSAGE); + if (message == null) { + return; + } + const isStdErr = parseInt(row.PRIORITY, 10) <= 3; + const timestamp = Math.floor(Number(row.__REALTIME_TIMESTAMP) / 1000); // microseconds to milliseconds + void this.systemHook({ + message, + isStdErr, + timestamp, + isSystem: true, + }); + } } const logMonitor = new LogMonitor(); diff --git a/src/logging/types.ts b/src/logging/types.ts new file mode 100644 index 000000000..ec3db983f --- /dev/null +++ b/src/logging/types.ts @@ -0,0 +1,13 @@ +export type BaseLogMessage = { + message: string; + isStdErr?: boolean; + timestamp: number; +}; +export type SystemLogMessage = BaseLogMessage & { + isSystem: true; +}; +type ContainerLogMessage = BaseLogMessage & { + serviceId: number; + isSystem?: false; +}; +export type LogMessage = SystemLogMessage | ContainerLogMessage; diff --git a/test/unit/lib/journald.spec.ts b/test/unit/lib/journald.spec.ts index 34c97ea55..b0eb3052f 100644 --- a/test/unit/lib/journald.spec.ts +++ b/test/unit/lib/journald.spec.ts @@ -28,6 +28,7 @@ describe('lib/journald', () => { unit: 'nginx.service', containerId: 'abc123', format: 'json-pretty', + filter: ['_SYSTEMD_UNIT=test.service', '_SYSTEMD_UNIT=test2.service'], since: '2014-03-25 03:59:56.654563', until: '2014-03-25 03:59:59.654563', }); @@ -48,6 +49,8 @@ describe('lib/journald', () => { '2014-03-25 03:59:56.654563', '-U', '2014-03-25 03:59:59.654563', + '_SYSTEMD_UNIT=test.service', + '_SYSTEMD_UNIT=test2.service', ]; const actualCommand = spawn.firstCall.args[0];