Skip to content

Commit

Permalink
feat(translator): Support entities translation mapping table
Browse files Browse the repository at this point in the history
  • Loading branch information
rxliuli committed Dec 6, 2024
1 parent dc8ee13 commit 3a5c120
Show file tree
Hide file tree
Showing 5 changed files with 238 additions and 31 deletions.
3 changes: 2 additions & 1 deletion packages/plugin-translator/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
"setup": "pnpm build",
"build": "novachat",
"dev": "pnpm build --watch",
"prepublishOnly": "pnpm build"
"prepublishOnly": "pnpm build",
"test": "vitest run"
},
"sideEffects": false,
"devDependencies": {
Expand Down
123 changes: 123 additions & 0 deletions packages/plugin-translator/src/__tests__/utils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import { beforeEach, describe, expect, it, Mock, vi } from 'vitest'
import { getMessages, parseEntities, translate } from '../utils'

it('parseEntities', async () => {
expect(
parseEntities('original 1: translated 1\noriginal 2: translated 2'),
).toEqual([
['original 1', 'translated 1'],
['original 2', 'translated 2'],
])
expect(parseEntities('Example:示例')).toEqual([['Example', '示例']])
})

describe('getMessages', () => {
it('getMessages', () => {
expect(
getMessages({
systemPrompt:
'You are a professional, authentic machine translation engine. Translate the following source text to {{to}}, Output translation directly without any additional text.\nTranslate the following entities: \n{{entities}}',
entities: [['Example', '示例']],
content: 'Example',
toLanguage: 'zh-CN',
}),
).toEqual([
{
role: 'system',
content:
'You are a professional, authentic machine translation engine. Translate the following source text to zh-CN, Output translation directly without any additional text.\nTranslate the following entities: \nExample: 示例',
},
{
role: 'user',
content: 'Example',
},
])
})
it('getMessages with no match entities', () => {
expect(
getMessages({
systemPrompt:
'You are a professional, authentic machine translation engine. Translate the following source text to {{to}}, Output translation directly without any additional text.\nTranslate the following entities: \n{{entities}}',
entities: [['Example', '示例']],
content: 'Test',
toLanguage: 'zh-CN',
}),
).toEqual([
{
role: 'system',
content:
'You are a professional, authentic machine translation engine. Translate the following source text to zh-CN, Output translation directly without any additional text.\nTranslate the following entities: \n',
},
{
role: 'user',
content: 'Test',
},
])
})
it('getMessages with lower case entities', () => {
expect(
getMessages({
systemPrompt:
'You are a professional, authentic machine translation engine. Translate the following source text to {{to}}, Output translation directly without any additional text.\nTranslate the following entities: \n{{entities}}',
entities: [['example', '示例']],
content: 'example',
toLanguage: 'zh-CN',
}),
).toEqual([
{
role: 'system',
content:
'You are a professional, authentic machine translation engine. Translate the following source text to zh-CN, Output translation directly without any additional text.\nTranslate the following entities: \nexample: 示例',
},
{
role: 'user',
content: 'example',
},
])
})
})

describe('translate', () => {
let f1: Mock = vi.fn().mockImplementation(async function* (q) {
yield {
content: q.messages[1].content,
}
})
beforeEach(() => {
vi.clearAllMocks()
})
it('translate stream', async () => {
const stream = translate({
systemPrompt:
'You are a professional, authentic machine translation engine. Translate the following source text to {{to}}, Output translation directly without any additional text.',
entities: [],
content: 'Example',
toLanguage: 'zh-CN',
model: 'gpt-4o',
t: f1,
})
for await (const it of stream) {
expect(it.content).eq('Example')
}
expect(f1).toHaveBeenCalledTimes(1)
})
it('split chunks', async () => {
const stream = translate({
systemPrompt:
'You are a professional, authentic machine translation engine. Translate the following source text to {{to}}, Output translation directly without any additional text.',
entities: [],
content: 'Example 1\n\n---\n\nExample 2',
toLanguage: 'zh-CN',
model: 'gpt-4o',
t: f1,
})
const r: string[] = []
for await (const it of stream) {
r.push(it.content)
}
expect(f1).toHaveBeenCalledTimes(2)
expect(f1.mock.calls[0][0].messages[1].content).eq('Example 1')
expect(f1.mock.calls[1][0].messages[1].content).eq('Example 2')
expect(r).toEqual(['Example 1', '\n\n---\n\n', 'Example 2'])
})
})
64 changes: 34 additions & 30 deletions packages/plugin-translator/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@ import * as novachat from '@novachat/plugin'
import { last } from 'lodash-es'
import { franc } from 'franc-min'
import { configuration } from './plugin.json'
import { parseEntities, translate } from './utils'

async function getTargetLanguage(content: string) {
const _localLanguage =
(await novachat.setting.get('translator.localLanguage')) ?? 'eng'
const language = franc(content)
const toLanguage = language === _localLanguage ? 'eng' : _localLanguage
const translateConfig = configuration.properties['translator.localLanguage']
return translateConfig.enum[translateConfig.enum.indexOf(toLanguage)]
}

async function getModel() {
const [model, defaultModel] = await Promise.all([
novachat.setting.get('translator.model'),
novachat.model.getDefault(),
])
return model ?? defaultModel?.id
}

export async function activate() {
await novachat.model.registerBot({
Expand All @@ -10,40 +28,26 @@ export async function activate() {
async *stream(
query: novachat.QueryRequest,
): AsyncGenerator<novachat.QueryChunkResponse> {
const systemPrompt = await novachat.setting.get('translator.systemPrompt')
const lastMessage = last(query.messages)
if (!lastMessage) {
const [systemPrompt, entitiesString] = await Promise.all([
novachat.setting.get('translator.systemPrompt'),
novachat.setting.get('translator.entities'),
])
const content = last(query.messages)?.content
if (!content) {
throw new Error('No last message')
}
const defaultModel =
((await novachat.setting.get('translator.model')) as string) ??
(await novachat.model.getDefault())?.id
if (!defaultModel) {
const model = await getModel()
if (!model) {
throw new Error('No default model')
}
const localLanguage =
(await novachat.setting.get('translator.localLanguage')) ?? 'eng'
const language = franc(lastMessage.content)
const toLanguage = language === localLanguage ? 'eng' : localLanguage
const translateConfig =
configuration.properties['translator.localLanguage']
const stream = novachat.model.stream({
messages: [
{
role: 'system',
content: systemPrompt.replace(
'{{to}}',
translateConfig.enumDescriptions[
translateConfig.enum.indexOf(toLanguage)
],
),
},
{
role: 'user',
content: lastMessage.content,
},
],
model: defaultModel,
const toLanguage = await getTargetLanguage(content)
const stream = translate({
systemPrompt,
entities: parseEntities(entitiesString),
content,
toLanguage,
model,
t: novachat.model.stream,
})
for await (const it of stream) {
yield it
Expand Down
6 changes: 6 additions & 0 deletions packages/plugin-translator/src/plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,12 @@
"type": "string",
"description": "Model",
"format": "model"
},
"translator.entities": {
"type": "string",
"description": "Entities",
"default": "original 1: translated 1\noriginal 2: translated 2",
"format": "markdown"
}
}
}
Expand Down
73 changes: 73 additions & 0 deletions packages/plugin-translator/src/utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import type * as novachat from '@novachat/plugin'

export function parseEntities(entities: string): [string, string][] {
return entities
.split('\n')
.map((it) => {
const i = it.indexOf(':') === -1 ? it.indexOf(':') : it.indexOf(':')
if (i === -1) {
return
}
return [it.slice(0, i).trim(), it.slice(i + 1).trim()] as const
})
.filter((it) => it && it[0] && it[1]) as [string, string][]
}

export function getMessages(options: {
systemPrompt: string
entities: [string, string][]
content: string
toLanguage: string
}): novachat.QueryRequest['messages'] {
const { systemPrompt, entities, content, toLanguage } = options
return [
{
role: 'system',
content: systemPrompt.replace('{{to}}', toLanguage).replace(
'{{entities}}',
entities
.filter(([k]) => content.toLowerCase().includes(k.toLowerCase()))
.map(([k, v]) => `${k}: ${v}`)
.join('\n'),
),
},
{
role: 'user',
content: content,
},
]
}

export async function* translate(options: {
systemPrompt: string
entities: [string, string][]
content: string
toLanguage: string
model: string
t: typeof novachat.model.stream
}) {
const { systemPrompt, entities, content, toLanguage, model, t } = options
const SPLITTER = '\n\n---\n\n'
const chunks = content.split(SPLITTER)
for (let i = 0; i < chunks.length; i++) {
const chunk = chunks[i]
const stream = t({
messages: getMessages({
systemPrompt,
entities,
content: chunk,
toLanguage,
}),
model,
})
for await (const it of stream) {
yield it
}
if (i < chunks.length - 1) {
yield {
role: 'assistant',
content: SPLITTER,
}
}
}
}

0 comments on commit 3a5c120

Please sign in to comment.