doc: annotate various types and functions

matching Python doc comments written by Théo
livekit · May 22, 2024 · 4b50a80 · 4b50a80
1 parent 8fa6bdc
commit 4b50a80
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 3 deletions.
diff --git a/agents/src/generator.ts b/agents/src/generator.ts
@@ -11,7 +11,9 @@ export interface Agent {
 
 /**
  * Helper to define an agent according to the required interface.
- * @example `export default defineAgent(myAgent);`
+ * @example export default defineAgent(async (job: JobContext) => {
+ *   // ...
+ * });
  */
 export function defineAgent(entry: entryFunction): Agent {
   return { entry };

diff --git a/agents/src/ipc/job_main.ts b/agents/src/ipc/job_main.ts
@@ -45,6 +45,7 @@ if (process.send) {
   });
 
   // don't do anything on C-c
+  // this is handled in cli, triggering a termination of all child processes at once.
   process.on('SIGINT', () => {});
 
   const conn = room.connect(args.url || process.argv[4], args.token);

diff --git a/agents/src/job_context.ts b/agents/src/job_context.ts
@@ -5,6 +5,10 @@ import type { Job } from '@livekit/protocol';
 import type { LocalParticipant, RemoteParticipant, Room } from '@livekit/rtc-node';
 import type { EventEmitter } from 'events';
 
+/**
+ * JobContext contains information about the job, the room, and the participant.
+ * It is called internally by {@link Worker} and should not be created manually.
+ */
 export class JobContext {
   #job: Job;
   #room: Room;

diff --git a/agents/src/stt/stt.ts b/agents/src/stt/stt.ts
@@ -5,9 +5,25 @@ import type { AudioFrame } from '@livekit/rtc-node';
 import type { AudioBuffer } from '../utils.js';
 
 export enum SpeechEventType {
+  /**
+   * Indicate the start of speech.
+   * If the STT doesn't support this event, this will be emitted at the same time
+   * as the first INTERMIN_TRANSCRIPT.
+   */
   START_OF_SPEECH = 0,
+  /**
+   * Interim transcript, useful for real-time transcription.
+   */
   INTERIM_TRANSCRIPT = 1,
+  /**
+   * Final transcript, emitted when the STT is confident enough that a certain
+   * portion of the speech will not change.
+   */
   FINAL_TRANSCRIPT = 2,
+  /**
+   * Indicate the end of speech, emitted when the user stops speaking.
+   * The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.
+   */
   END_OF_SPEECH = 3,
 }
 
@@ -30,8 +46,19 @@ export class SpeechEvent {
 }
 
 export abstract class SpeechStream implements IterableIterator<SpeechEvent> {
+  /**
+   * Push a frame to be recognised.
+   * It is recommended to push frames as soon as they are available.
+   */
   abstract pushFrame(token: AudioFrame): void;
 
+  /**
+   * Close the stream.
+   *
+   * @param wait
+   *   Whether to wait for the STT to finish processing the remaining
+   *   frames before closing
+   */
   abstract close(wait: boolean): Promise<void>;
 
   abstract next(): IteratorResult<SpeechEvent>;

diff --git a/agents/src/tts/tts.ts b/agents/src/tts/tts.ts
@@ -10,8 +10,18 @@ export interface SynthesizedAudio {
 }
 
 export enum SynthesisEventType {
+  /**
+   * Indicate the start of synthesis.
+   * Retriggered after FINISHED.
+   */
   STARTED = 0,
+  /**
+   * Indicate that audio data is available.
+   */
   AUDIO = 1,
+  /**
+   * Indicate the end of synthesis. Does not necessarily mean stream is done.
+   */
   FINISHED = 2,
 }
 

diff --git a/agents/src/utils.ts b/agents/src/utils.ts
@@ -5,6 +5,11 @@ import { AudioFrame } from '@livekit/rtc-node';
 
 export type AudioBuffer = AudioFrame[] | AudioFrame;
 
+/**
+ * Merge one or more {@link AudioFrame}s into a single one.
+ *
+ * @param buffer Either an {@link AudioFrame} or a list thereof
+ */
 export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
   if (Array.isArray(buffer)) {
     buffer = buffer as AudioFrame[];

diff --git a/agents/src/vad.ts b/agents/src/vad.ts
@@ -11,23 +11,52 @@ export enum VADEventType {
 
 export interface VADEvent {
   type: VADEventType;
+  /**
+   * Index of the samples of the event (when the event was fired)
+   */
   samplesIndex: number;
+  /**
+   * Duration of speech, in seconds
+   */
   duration: number;
   speech: AudioFrame[];
 }
 
 export abstract class VAD {
+  /**
+   * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.
+   *
+   * @param options
+   */
   abstract stream({
     minSpeakingDuration,
     minSilenceDuration,
     paddingDuration,
     sampleRate,
     maxBufferedSpeech,
   }: {
+    /**
+     * Minimum duration of speech required to trigger a {@link VADEventType.START_OF_SPEECH} event
+     */
     minSpeakingDuration: number;
+    /**
+     * Milliseconds to wait before separating speech chunk.
+     * Not always precise, generally rounded to the nearest 40ms depending on VAD implementation
+     */
     minSilenceDuration: number;
+    /**
+     * Number of frames to pad the start and end of speech with
+     */
     paddingDuration: number;
+    /**
+     * Sample rate of inference/processing
+     */
     sampleRate: number;
+    /**
+     * Number of seconds the buffer may keep until {@link VADEventType.END_OF_SPEECH} is triggered.
+     * It is recommended to set this to a positive value, as zero may OOM if the user doesn't stop
+     * speaking.
+     */
     maxBufferedSpeech: number;
   }): VADStream;
 }

diff --git a/agents/src/worker.ts b/agents/src/worker.ts
@@ -128,10 +128,10 @@ type AssignmentPair = {
 
 class PendingAssignment {
   promise = new Promise<AssignmentPair>((resolve) => {
-    this.resolve = resolve; // oh, JavaScript.
+    this.resolve = resolve; // this is how JavaScript lets you resolve promises externally
   });
   resolve(arg: AssignmentPair) {
-    arg;
+    arg; // useless call to counteract TypeScript E6133
   }
 }