Skip to content

Commit

Permalink
v1.3.2
Browse files Browse the repository at this point in the history
  • Loading branch information
djkcyl committed Apr 3, 2023
1 parent b64281c commit e80bab8
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 197 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ venv
.venv
nuitka/
dist/
.pdm.toml
.pdm-python
aunly_bbot/static/test.js
2 changes: 1 addition & 1 deletion aunly_bbot/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from playwright.async_api._generated import BrowserContext
from graiax.playwright.installer import install_playwright

from ..utils.dynamic_shot import screenshot
from ..utils.browser_shot import screenshot
from ..utils.fonts_provider import get_font
from ..utils.detect_package import is_package

Expand Down
6 changes: 3 additions & 3 deletions aunly_bbot/function/command/content_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ async def openai_summarization():
logger.warning(f"视频 {aid} 总结失败:{ai_summary.raw}")
return

if summarise.lower().startswith("none"):
if "no meaning" in summarise.lower():
nonlocal chatgpt_thinks
chatgpt_thinks = False
raise AbortError("ChatGPT 认为这些字幕没有意义")
Expand All @@ -140,7 +140,7 @@ async def openai_summarization():
quote=info_message.source,
)
except AbortError as e:
logger.info(f"视频 {aid} 总结被终止:{e}")
logger.warning(f"视频 {aid} 总结被终止:{e}")
except Exception:
capture_exception()
logger.exception(f"视频 {aid} 总结出错")
Expand Down Expand Up @@ -228,7 +228,7 @@ async def openai_summarization():
group, MessageChain(Image(data_bytes=image)), quote=source
)
except AbortError as e:
logger.info(f"专栏 {column_id} 总结被终止:{e}")
logger.warning(f"专栏 {column_id} 总结被终止:{e}")
except Exception:
capture_exception()
logger.exception(f"专栏 {column_id} 总结出错")
Expand Down
2 changes: 1 addition & 1 deletion aunly_bbot/function/pusher/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ async def push(app: Ariadne, dyn: DynamicItem):
MessageChain(msg),
)
insert_dyn_push_to_group(dynid, data.group)
await asyncio.sleep(2)
await asyncio.sleep(5)
except UnknownTarget:
logger.warning(
f"[BiliBili推送] {dynid} | {up_name}({up_id}) 推送失败,找不到该群 {data.group},正在取消订阅"
Expand Down
4 changes: 2 additions & 2 deletions aunly_bbot/function/pusher/live.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,6 @@ async def main(app: Ariadne):
int(data.group),
MessageChain(msg),
)
await asyncio.sleep(1)
except UnknownTarget:
delete = await delete_group(data.group)
logger.info(
Expand All @@ -146,6 +145,7 @@ async def main(app: Ariadne):
except AccountMuted:
group = f"{group.name}{group.id})" if group else data.group
logger.warning(f"[BiliBili推送] 推送失败,账号在 {group} 被禁言")
await asyncio.sleep(5)

insert_live_push(
up_id, True, len(get_sub_by_uid(up_id)), title, area_parent, area
Expand Down Expand Up @@ -204,7 +204,7 @@ async def main(app: Ariadne):
except Exception: # noqa
capture_exception()
logger.exception("[BiliBili推送] 推送失败,未知错误")
await asyncio.sleep(1)
await asyncio.sleep(5)
insert_live_push(up_id, False, len(get_sub_by_uid(up_id)))
else:
logger.warning(f"[BiliBili推送] 未找到订阅 UP {up_name}{up_id})的群,正在退订!")
Expand Down
6 changes: 3 additions & 3 deletions aunly_bbot/utils/content_summarise.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@

from loguru import logger

from .openai import openai_req, get_small_size_transcripts, get_user_prompt, get_simple_prompt
from .openai import openai_req, get_small_size_transcripts, get_user_prompt, get_full_prompt


async def subtitle_summarise(sub: list[str], title: str):
"""请求字幕总结"""
small_size_transcripts = get_small_size_transcripts(sub)
prompt = get_user_prompt(title, small_size_transcripts)
logger.debug(prompt)
return await openai_req(get_simple_prompt(prompt))
return await openai_req(get_full_prompt(prompt, system=True))


async def column_summarise(cv_title: str, cv_text: str):
Expand All @@ -19,4 +19,4 @@ async def column_summarise(cv_title: str, cv_text: str):
small_size_transcripts = get_small_size_transcripts(sentences)
prompt = get_user_prompt(cv_title, small_size_transcripts)
logger.debug(prompt)
return await openai_req(get_simple_prompt(prompt))
return await openai_req(get_full_prompt(prompt, system=True))
49 changes: 29 additions & 20 deletions aunly_bbot/utils/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,26 +25,12 @@
def get_user_prompt(title: str, transcript: str) -> str:
title = title.replace("\n", " ").strip() if title else ""
transcript = transcript.replace("\n", " ").strip() if transcript else ""
language = "Chinese"
prompt = (
"Your output should use the following template:\n## Summary\n## Highlights\n"
"- [Emoji] Bulletpoint\n\n"
"Your task is to summarise the video I have given you in up to 2 to 6 concise bullet points, "
"starting with a short highlight, each bullet point is at least 15 words. "
"Choose an appropriate emoji for each bullet point. "
f"Use the video above: {{Title}} {{Transcript}}."
"If you think the content in the transcript is meaningless or nonsensical, "
"you can choose to skip summarization and simply output 'none'."
f"\n\nReply in {language} Language."
)
return f'Title: "{title}"\nTranscript: "{transcript}"\n\nInstructions: {prompt}'
return f'Title: "{title}"\nTranscript: "{transcript}"'


def count_tokens(contents: list[str]):
def count_tokens(prompts: list[dict[str, str]]):
"""根据内容计算 token 数"""

content = get_simple_prompt(get_user_prompt("", " ".join(contents)))

if BotConfig.Bilibili.openai_model == "gpt-3.5-turbo-0301":
tokens_per_message = 4
tokens_per_name = -1
Expand All @@ -55,7 +41,7 @@ def count_tokens(contents: list[str]):
raise ValueError(f"Unknown model name {BotConfig.Bilibili.openai_model}")

num_tokens = 0
for message in content:
for message in prompts:
num_tokens += tokens_per_message
for key, value in message.items():
num_tokens += len(tiktoken_enc.encode(value))
Expand All @@ -67,13 +53,35 @@ def count_tokens(contents: list[str]):

def get_small_size_transcripts(text_data: list[str], token_limit: int = LIMIT_COUNT):
unique_texts = list(OrderedDict.fromkeys(text_data))
while count_tokens(unique_texts) > token_limit:
while (
count_tokens(
get_full_prompt(get_user_prompt("", " ".join(unique_texts)), system=True)
)
> token_limit
):
unique_texts.pop(random.randint(0, len(unique_texts) - 1))
return " ".join(unique_texts)


def get_simple_prompt(prompt: str):
return [{"role": "user", "content": prompt}]
def get_full_prompt(prompt: str, system: bool = False):
plist: list[dict[str, str]] = []
if system:
language = "Chinese"
sys_prompt = (
"Your output should use the following template:\n## Summary\n## Highlights\n"
"- [Emoji] Bulletpoint\n\n"
"Your task is to summarise the video I have given you in up to 2 to 6 concise bullet points, "
"starting with a short highlight, each bullet point is at least 15 words. "
"Choose an appropriate emoji for each bullet point. "
f"Use the video above: {{Title}} {{Transcript}}."
"If you think that the content in the transcript is meaningless, "
"Or if there is very little content that cannot be well summarized, "
"then you can simply output the three words 'no meaning' Remember not to output anything else."
f"\n\nReply in {language} Language."
)
plist.append({"role": "system", "content": sys_prompt})
plist.append({"role": "user", "content": prompt})
return plist


async def openai_req(
Expand Down Expand Up @@ -101,5 +109,6 @@ async def openai_req(
)
if req.status_code != 200:
return AISummary(error=True, message=req.text, raw=req.json())
logger.info(f"[OpenAI] Response: {req.json()['choices'][0]['message']['content']}")
logger.info(f"[OpenAI] Response token 实际: {req.json()['usage']}")
return AISummary(summary=req.json()["choices"][0]["message"]["content"], raw=req.json())
Loading

0 comments on commit e80bab8

Please sign in to comment.