Skip to content

Commit

Permalink
v1.3.1
Browse files Browse the repository at this point in the history
  • Loading branch information
djkcyl committed Mar 31, 2023
1 parent f9b6938 commit b64281c
Show file tree
Hide file tree
Showing 17 changed files with 523 additions and 131 deletions.
7 changes: 7 additions & 0 deletions aunly_bbot/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,3 +96,10 @@
else:
saya.require("aunly_bbot.function")
from . import function # noqa

custom_path = Path("data", "custom")
custom_path.mkdir(parents=True, exist_ok=True)
for module in custom_path.glob("*"):
if module.name != "__pycache__":
logger.info(f"正在加载自定义模块:{module.stem}")
saya.require(f"data.custom.{module.stem}")
16 changes: 16 additions & 0 deletions aunly_bbot/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(self) -> None:
self.bilibili_username()
self.use_bilibili_login()
self.wordcloud()
self.bcut_asr()
self.event()
self.webui()
self.log_level()
Expand Down Expand Up @@ -322,6 +323,21 @@ def wordcloud(self):
else:
self.config["Bilibili"]["use_wordcloud"] = False

def bcut_asr(self):
if (
self.config["Bilibili"]["openai_summarization"]
or self.config["Bilibili"]["use_wordcloud"]
):
bcut_asr = ListPrompt(
"是否使用 Bilibili ASR 进行视频内容语音识别?(用于 AI 总结和词云制作)",
[Choice("是(开启)"), Choice("否(关闭)")],
allow_filter=False,
annotation="使用键盘的 ↑ 和 ↓ 来选择, 按回车确认",
).prompt()
self.config["Bilibili"]["use_bcut_asr"] = bcut_asr.name == "是(开启)"
else:
self.config["Bilibili"]["use_bcut_asr"] = False

def bilibili_concurrent(self):
while True:
concurrent = InputPrompt("请输入并发数(理论该值越大推送效率越高): ", default_text="10").prompt()
Expand Down
54 changes: 39 additions & 15 deletions aunly_bbot/function/command/content_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from graia.saya import Channel
from grpc.aio import AioRpcError
from graia.ariadne.app import Ariadne
from graia.ariadne.model import Group
from sentry_sdk import capture_exception
from bilireq.exceptions import GrpcError
from graia.ariadne.model import Group, Member
from httpx._exceptions import TimeoutException
from graia.broadcast.exceptions import ExecutionStop
from graia.ariadne.event.message import GroupMessage
from graia.ariadne.message.chain import MessageChain
from graia.ariadne.message.element import Image, Source
Expand All @@ -18,19 +19,21 @@
from ...model.exception import AbortError
from ...utils.column_resolve import get_cv
from ...core.data import ContentResolveData
from ...utils.text2image import rich_text2image
from ...core.control import Interval, Permission
from ...utils.video_subtitle import get_subtitle
from ...utils.message_resolve import message_resolve
from ...utils.draw_bili_image import binfo_image_create
from ...utils.text2image import rich_text2image, browser_text2image
from ...utils.bilibili_request import get_b23_url, grpc_get_view_info
from ...utils.content_summarise import column_summarise, get_browser_image, subtitle_summarise
from ...utils.content_summarise import column_summarise, subtitle_summarise

channel = Channel.current()


@channel.use(ListenerSchema(listening_events=[GroupMessage], decorators=[Permission.require()]))
async def main(app: Ariadne, group: Group, message: MessageChain, source: Source):
async def main(
app: Ariadne, group: Group, member: Member, message: MessageChain, source: Source
):
bili_number = await message_resolve(message)
if not bili_number:
return
Expand Down Expand Up @@ -68,12 +71,12 @@ async def main(app: Ariadne, group: Group, message: MessageChain, source: Source
title = video_info.activity_season.arc.title or video_info.arc.title
archive_data = ContentResolveData(aid=aid)
archive_data.title = title
await Interval.manual(aid + group.id)
await Interval.manual(aid + group.id, 30)
try:
logger.info(f"开始生成视频信息图片:{aid}")
b23_url = await get_b23_url(f"https://www.bilibili.com/video/{bvid}")
image = await binfo_image_create(video_info, b23_url)
await app.send_group_message(
info_message = await app.send_group_message(
group,
MessageChain(
Image(data_bytes=image),
Expand All @@ -90,6 +93,14 @@ async def main(app: Ariadne, group: Group, message: MessageChain, source: Source
subtitle = await get_subtitle(aid, cid)
archive_data.content = json.dumps(subtitle, ensure_ascii=False)

if (
len(subtitle) < 10
or video_info.arc.duration < BotConfig.Bilibili.asr_length_threshold
):
raise AbortError("字幕内容过少且视频时长过短,跳过总结请求")

chatgpt_thinks = True

async def openai_summarization():
logger.info(f"开始进行 AI 总结:{aid}")
try:
Expand All @@ -98,6 +109,12 @@ async def openai_summarization():
summarise = archive_data.openai
else:
logger.info(f"{aid} 总结不存在,正在尝试请求......")
try:
await Interval.manual(member, 600)
except ExecutionStop:
msg = f"{member.id} 在 10 分钟内已经请求过总结,跳过本次请求"
logger.info(msg)
raise AbortError(msg)
ai_summary = await subtitle_summarise(subtitle, title)
if ai_summary.summary:
summarise = ai_summary.summary
Expand All @@ -106,14 +123,21 @@ async def openai_summarization():
logger.warning(f"视频 {aid} 总结失败:{ai_summary.raw}")
return

if summarise.lower().startswith("none"):
nonlocal chatgpt_thinks
chatgpt_thinks = False
raise AbortError("ChatGPT 认为这些字幕没有意义")

logger.debug(summarise)
if BotConfig.Bilibili.use_browser:
image = await get_browser_image(summarise)
image = await browser_text2image(summarise)
else:
image = await rich_text2image(summarise)
if image:
await app.send_group_message(
group, MessageChain(Image(data_bytes=image))
group,
MessageChain(Image(data_bytes=image)),
quote=info_message.source,
)
except AbortError as e:
logger.info(f"视频 {aid} 总结被终止:{e}")
Expand Down Expand Up @@ -141,18 +165,18 @@ async def wordcloud():
wordcloud = await get_worldcloud_image(word_frequencies)
if wordcloud:
await app.send_group_message(
group, MessageChain(Image(data_bytes=wordcloud))
group,
MessageChain(Image(data_bytes=wordcloud)),
quote=info_message.source,
)
except Exception:
capture_exception()
logger.exception(f"视频 {aid} 词云出错")

gather = []
if BotConfig.Bilibili.openai_summarization:
gather.append(openai_summarization())
if BotConfig.Bilibili.use_wordcloud:
gather.append(wordcloud())
await asyncio.gather(*gather, return_exceptions=True)
await openai_summarization()
if BotConfig.Bilibili.use_wordcloud and chatgpt_thinks:
await wordcloud()

except AbortError as e:
logger.warning(f"视频 {aid} 总结失败:{e.message}")
Expand Down Expand Up @@ -196,7 +220,7 @@ async def openai_summarization():
return

if BotConfig.Bilibili.use_browser:
image = await get_browser_image(summarise)
image = await browser_text2image(summarise)
else:
image = await rich_text2image(summarise)
if image:
Expand Down
1 change: 1 addition & 0 deletions aunly_bbot/function/event/invited_join_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,5 +46,6 @@ async def main(app: Ariadne, event: BotInvitedJoinGroupRequestEvent):
"该群不在白名单中,已拒绝加入",
),
)
await app.send_friend_message(event.supplicant, MessageChain("该群不在白名单中,已拒绝加入"))
except UnknownTarget:
logger.warning(f"由于未添加 {admin} 为好友,无法发送通知")
115 changes: 115 additions & 0 deletions aunly_bbot/model/bcut_asr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
from enum import Enum
from pydantic import BaseModel


class ASRDataSeg(BaseModel):
"文字识别-断句"

class ASRDataWords(BaseModel):
"文字识别-逐字"
label: str
start_time: int
end_time: int
confidence: int

start_time: int
end_time: int
transcript: str
words: list[ASRDataWords]
confidence: int

def to_srt_ts(self) -> str:
"转换为srt时间戳"

def _conv(ms: int) -> tuple[int, int, int, int]:
return ms // 3600000, ms // 60000 % 60, ms // 1000 % 60, ms % 1000

s_h, s_m, s_s, s_ms = _conv(self.start_time)
e_h, e_m, e_s, e_ms = _conv(self.end_time)
return f"{s_h:02d}:{s_m:02d}:{s_s:02d},{s_ms:03d} --> {e_h:02d}:{e_m:02d}:{e_s:02d},{e_ms:03d}"

def to_lrc_ts(self) -> str:
"转换为lrc时间戳"

def _conv(ms: int) -> tuple[int, int, int]:
return ms // 60000, ms // 1000 % 60, ms % 1000 // 10

s_m, s_s, s_ms = _conv(self.start_time)
return f"[{s_m:02d}:{s_s:02d}.{s_ms:02d}]"


class ASRData(BaseModel):
"语音识别结果"
utterances: list[ASRDataSeg]
version: str

def __iter__(self):
"iter穿透"
return iter(self.utterances)

def has_data(self) -> bool:
"是否识别到数据"
return len(self.utterances) > 0

def to_txt(self) -> str:
"转成txt格式字幕 (无时间标记)"
return "\n".join(seg.transcript for seg in self.utterances)

def to_srt(self) -> str:
"转成srt格式字幕"
return "\n".join(
f"{n}\n{seg.to_srt_ts()}\n{seg.transcript}\n"
for n, seg in enumerate(self.utterances, 1)
)

def to_lrc(self) -> str:
"转成lrc格式字幕"
return "\n".join(f"{seg.to_lrc_ts()}{seg.transcript}" for seg in self.utterances)

def to_ass(self) -> str:
...


class ResourceCreateRspSchema(BaseModel):
"上传申请响应"
resource_id: str
title: str
type: int
in_boss_key: str
size: int
upload_urls: list[str]
upload_id: str
per_size: int


class ResourceCompleteRspSchema(BaseModel):
"上传提交响应"
resource_id: str
download_url: str


class TaskCreateRspSchema(BaseModel):
"任务创建响应"
resource: str
result: str
task_id: str # 任务id


class ResultStateEnum(Enum):
"任务状态枚举"
STOP = 0 # 未开始
RUNING = 1 # 运行中
ERROR = 3 # 错误
COMPLETE = 4 # 完成


class ResultRspSchema(BaseModel):
"任务结果查询响应"
task_id: str # 任务id
result: str # 结果数据-json
remark: str # 任务状态详情
state: ResultStateEnum # 任务状态

def parse(self) -> ASRData:
"解析结果数据"
return ASRData.parse_raw(self.result)
2 changes: 2 additions & 0 deletions aunly_bbot/model/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ class _Bilibili(BaseModel, extra=Extra.ignore):
openai_model: str = "gpt-3.5-turbo"
openai_proxy: Optional[AnyHttpUrl] = None
use_wordcloud: bool = False
use_bcut_asr: bool = False
asr_length_threshold: int = 60

# 验证是否可以登录
@validator("use_login", always=True)
Expand Down
2 changes: 2 additions & 0 deletions aunly_bbot/static/bot_config.exp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ Bilibili:
openai_model: "gpt-3.5-turbo-0301" # OpenAI 模型
openai_proxy: "http://localhost:7890" # 请求 OpenAI 所用的代理
use_wordcloud: true # 是否使用词云
use_bcut_asr: true # 是否使用 BCut 进行 AI 语音识别
asr_length_threshold: 60 # 调用语音识别的最小长度阈值(秒)
Event:
mute: true # 是否向管理员发送被禁言的事件提醒。
permchange: true # 是否向管理员发送权限变更的事件提醒。
Expand Down
30 changes: 2 additions & 28 deletions aunly_bbot/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
from graia.ariadne.message.parser.twilight import Twilight, FullMatch

from .core.control import Permission

# from .utils.wordcloud import get_frequencies, get_wordcloud
from .utils.bilibili_request import grpc_get_playview

channel = Channel.current()

Expand All @@ -25,29 +24,4 @@
)
)
async def main(app: Ariadne, group: Group):

sub = httpx.get(
"https://i0.hdslb.com/bfs/ai_subtitle/prod/31087008110492590207e0082656e40ce338d18289396914ffe"
).json()
subs = [x["content"] for x in sub["body"]]
title = ""
description = ""

word_counts = get_frequencies(subs)
word_cloud = await get_wordcloud(word_counts)

# browser_context = app.launch_manager.get_interface(PlaywrightContext).context
# page = await browser_context.new_page()
# await page.set_viewport_size({"width": 460, "height": 100})
# if req.error:
# print(req.message)
# return
# md = convert_md(req.summary)
# css = "\n".join(BuiltinCSS.github.value)
# await page.set_content(
# '<html><head><meta name="viewport" content="width=device-width,initial-scale=1.0">'
# f"<style>{css}</style></head><body>{md}<body></html>"
# )
# result = await page.screenshot(full_page=True, type="jpeg", quality=95)

await app.send_group_message(group, MessageChain(Image(data_bytes=word_cloud)))
print(await grpc_get_playview(439357034, 1076145927))
Loading

0 comments on commit b64281c

Please sign in to comment.