Skip to content

Commit

Permalink
doc: annotate various types and functions
Browse files Browse the repository at this point in the history
matching Python doc comments written by Théo
  • Loading branch information
nbsp committed May 22, 2024
1 parent 8fa6bdc commit 4b50a80
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 3 deletions.
4 changes: 3 additions & 1 deletion agents/src/generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ export interface Agent {

/**
* Helper to define an agent according to the required interface.
* @example `export default defineAgent(myAgent);`
* @example export default defineAgent(async (job: JobContext) => {
* // ...
* });
*/
export function defineAgent(entry: entryFunction): Agent {
return { entry };
Expand Down
1 change: 1 addition & 0 deletions agents/src/ipc/job_main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ if (process.send) {
});

// don't do anything on C-c
// this is handled in cli, triggering a termination of all child processes at once.
process.on('SIGINT', () => {});

const conn = room.connect(args.url || process.argv[4], args.token);
Expand Down
4 changes: 4 additions & 0 deletions agents/src/job_context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ import type { Job } from '@livekit/protocol';
import type { LocalParticipant, RemoteParticipant, Room } from '@livekit/rtc-node';
import type { EventEmitter } from 'events';

/**
* JobContext contains information about the job, the room, and the participant.
* It is called internally by {@link Worker} and should not be created manually.
*/
export class JobContext {
#job: Job;
#room: Room;
Expand Down
27 changes: 27 additions & 0 deletions agents/src/stt/stt.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,25 @@ import type { AudioFrame } from '@livekit/rtc-node';
import type { AudioBuffer } from '../utils.js';

export enum SpeechEventType {
/**
* Indicate the start of speech.
* If the STT doesn't support this event, this will be emitted at the same time
* as the first INTERMIN_TRANSCRIPT.
*/
START_OF_SPEECH = 0,
/**
* Interim transcript, useful for real-time transcription.
*/
INTERIM_TRANSCRIPT = 1,
/**
* Final transcript, emitted when the STT is confident enough that a certain
* portion of the speech will not change.
*/
FINAL_TRANSCRIPT = 2,
/**
* Indicate the end of speech, emitted when the user stops speaking.
* The first alternative is a combination of all the previous FINAL_TRANSCRIPT events.
*/
END_OF_SPEECH = 3,
}

Expand All @@ -30,8 +46,19 @@ export class SpeechEvent {
}

export abstract class SpeechStream implements IterableIterator<SpeechEvent> {
/**
* Push a frame to be recognised.
* It is recommended to push frames as soon as they are available.
*/
abstract pushFrame(token: AudioFrame): void;

/**
* Close the stream.
*
* @param wait
* Whether to wait for the STT to finish processing the remaining
* frames before closing
*/
abstract close(wait: boolean): Promise<void>;

abstract next(): IteratorResult<SpeechEvent>;
Expand Down
10 changes: 10 additions & 0 deletions agents/src/tts/tts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,18 @@ export interface SynthesizedAudio {
}

export enum SynthesisEventType {
/**
* Indicate the start of synthesis.
* Retriggered after FINISHED.
*/
STARTED = 0,
/**
* Indicate that audio data is available.
*/
AUDIO = 1,
/**
* Indicate the end of synthesis. Does not necessarily mean stream is done.
*/
FINISHED = 2,
}

Expand Down
5 changes: 5 additions & 0 deletions agents/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ import { AudioFrame } from '@livekit/rtc-node';

export type AudioBuffer = AudioFrame[] | AudioFrame;

/**
* Merge one or more {@link AudioFrame}s into a single one.
*
* @param buffer Either an {@link AudioFrame} or a list thereof
*/
export const mergeFrames = (buffer: AudioBuffer): AudioFrame => {
if (Array.isArray(buffer)) {
buffer = buffer as AudioFrame[];
Expand Down
29 changes: 29 additions & 0 deletions agents/src/vad.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,52 @@ export enum VADEventType {

export interface VADEvent {
type: VADEventType;
/**
* Index of the samples of the event (when the event was fired)
*/
samplesIndex: number;
/**
* Duration of speech, in seconds
*/
duration: number;
speech: AudioFrame[];
}

export abstract class VAD {
/**
* Returns a {@link VADStream} that can be used to push audio frames and receive VAD events.
*
* @param options
*/
abstract stream({
minSpeakingDuration,
minSilenceDuration,
paddingDuration,
sampleRate,
maxBufferedSpeech,
}: {
/**
* Minimum duration of speech required to trigger a {@link VADEventType.START_OF_SPEECH} event
*/
minSpeakingDuration: number;
/**
* Milliseconds to wait before separating speech chunk.
* Not always precise, generally rounded to the nearest 40ms depending on VAD implementation
*/
minSilenceDuration: number;
/**
* Number of frames to pad the start and end of speech with
*/
paddingDuration: number;
/**
* Sample rate of inference/processing
*/
sampleRate: number;
/**
* Number of seconds the buffer may keep until {@link VADEventType.END_OF_SPEECH} is triggered.
* It is recommended to set this to a positive value, as zero may OOM if the user doesn't stop
* speaking.
*/
maxBufferedSpeech: number;
}): VADStream;
}
Expand Down
4 changes: 2 additions & 2 deletions agents/src/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,10 +128,10 @@ type AssignmentPair = {

class PendingAssignment {
promise = new Promise<AssignmentPair>((resolve) => {
this.resolve = resolve; // oh, JavaScript.
this.resolve = resolve; // this is how JavaScript lets you resolve promises externally
});
resolve(arg: AssignmentPair) {
arg;
arg; // useless call to counteract TypeScript E6133
}
}

Expand Down

0 comments on commit 4b50a80

Please sign in to comment.