Skip to content

Commit

Permalink
Merge pull request #62 from djkcyl/dev
Browse files Browse the repository at this point in the history
v1.5.0-beta1
  • Loading branch information
djkcyl authored Jun 14, 2023
2 parents e9b7265 + 6232721 commit 2c8ef4f
Show file tree
Hide file tree
Showing 13 changed files with 508 additions and 352 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ jobs:
- name: Build Nuitka Binary
run: |
pdm run pip install -U --force-reinstall "https://github.com/Nuitka/Nuitka/archive/factory.zip"
pdm add "https://github.com/Nuitka/Nuitka/archive/factory.zip"
pdm run python -m nuitka --onefile --standalone --show-progress --assume-yes-for-downloads --output-dir=nuitka --windows-icon-from-ico=tv.ico --user-package-configuration-file=nuitka-${{ matrix.group }}.yml main.py
- name: Get commit hash
Expand Down
10 changes: 7 additions & 3 deletions aunly_bbot/cli/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from graiax.playwright.installer import install_playwright

from ..utils.browser_shot import screenshot
from ..utils.fonts_provider import get_font
from ..utils.fonts_provider import font_init
from ..utils.detect_package import is_package


Expand All @@ -27,7 +27,7 @@
async def init_playwright():
global PLAYWRIGIT
logger.info("正在下载字体...")
await get_font()
font_init()
logger.success("字体下载完成!")

await install_playwright(browser_type="firefox")
Expand All @@ -39,10 +39,14 @@ async def init_playwright():
"Mozilla/5.0 (Linux; Android 10; RMX1911) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/100.0.4896.127 Mobile Safari/537.36"
),
# headless=False,
)
PLAYWRIGIT = ff
logger.info("[Playwright] 正在获取浏览器版本")
page = await PLAYWRIGIT.new_page()
if len(PLAYWRIGIT.pages) > 0:
page = PLAYWRIGIT.pages[0]
else:
page = await PLAYWRIGIT.new_page()
version = await page.evaluate("navigator.appVersion")
logger.info(f"[BiliBili推送] 浏览器启动完成,当前版本 {version}")
logger.debug(await PLAYWRIGIT.cookies())
Expand Down
12 changes: 6 additions & 6 deletions aunly_bbot/cli/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def openai_api_token(self):
def openai_model(self):
openai_model = ListPrompt(
"请选择 OpenAI 模型",
[Choice("gpt-3.5-turbo-0301"), Choice("gpt-4-0314"), Choice("gpt-4-32k-0314")],
[Choice("gpt-3.5-turbo-0613"), Choice("gpt-3.5-turbo-16k-0613"), Choice("gpt-4-0613")],
allow_filter=False,
annotation="使用键盘的 ↑ 和 ↓ 来选择, 按回车确认",
).prompt()
Expand All @@ -306,13 +306,14 @@ def openai_proxy(self):

def bilibili_username(self):
username = InputPrompt("请输入 Bilibili 用户名: (可用于 AI 总结时获取 Bilibili 的 AI 字幕)").prompt()
if not username:
click.secho("用户名不能为空!", fg="bright_red", bold=True)
self.bilibili_username()
if not username or username == "":
self.config["Bilibili"]["username"] = username
return click.secho("用户名为空,已关闭对应功能!", fg="bright_red", bold=True)
elif not username.isdigit():
click.secho("用户名不合法!", fg="bright_red", bold=True)
self.bilibili_username()
self.config["Bilibili"]["username"] = username
self.bilibili_password()

def bilibili_password(self):
password = InputPrompt("请输入 Bilibili 密码: ", is_password=True).prompt()
Expand Down Expand Up @@ -450,12 +451,11 @@ def log_level(self):
ListPrompt(
"请选择日志等级",
[
Choice("DEBUG"),
Choice("INFO"),
Choice("DEBUG"),
Choice("WARNING"),
],
allow_filter=False,
default_select=1,
annotation="使用键盘的 ↑ 和 ↓ 来选择, 按回车确认",
)
.prompt()
Expand Down
1 change: 1 addition & 0 deletions aunly_bbot/core/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ def in_screen():
)

logger.success(f"成功重载 logger,当前日志等级为 {log_level}")
logger.info(f"日志文件将会保存在 {LOGPATH} 中")

# logger.trace("TRACE 等级将会输出至控制台")
# logger.debug("DEBUG 等级将会输出至控制台")
Expand Down
16 changes: 16 additions & 0 deletions aunly_bbot/model/captcha.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from typing import Optional
from pydantic import BaseModel


class CaptchaData(BaseModel):
captcha_id: str
points: list[list[int]]
rectangles: list[list[int]]
yolo_data: list[list[int]]
time: int


class CaptchaResponse(BaseModel):
code: int
message: str
data: Optional[CaptchaData]
5 changes: 3 additions & 2 deletions aunly_bbot/model/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def can_use_login(cls, enable, values):


class _Bilibili(BaseModel, extra=Extra.ignore):
username: Optional[int]
username: Optional[str]
password: Optional[str]
use_login: bool = False
use_browser: bool = True
Expand All @@ -57,14 +57,15 @@ class _Bilibili(BaseModel, extra=Extra.ignore):
dynamic_font_source: Literal["local", "remote"] = "local"
openai_summarization: bool = False
openai_api_token: Optional[str] = None
openai_model: str = "gpt-3.5-turbo"
openai_model: str = "gpt-3.5-turbo-0301"
openai_proxy: Optional[AnyHttpUrl] = None
openai_cooldown: int = 60
openai_whitelist_users: Optional[list[int]] = None
openai_promot_version: int = 2
use_wordcloud: bool = False
use_bcut_asr: bool = False
asr_length_threshold: int = 60
captcha_address: Optional[AnyHttpUrl] = None

# 验证是否可以登录
@validator("use_login", always=True)
Expand Down
1 change: 1 addition & 0 deletions aunly_bbot/static/bot_config.exp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Bilibili:
use_wordcloud: true # 是否使用词云
use_bcut_asr: true # 是否使用 BCut 接口进行 AI 语音识别
asr_length_threshold: 60 # 调用语音识别的最小长度阈值(秒)
captcha_address: null # 验证码识别服务地址
Event:
mute: true # 是否向管理员发送被禁言的事件提醒。
permchange: true # 是否向管理员发送权限变更的事件提醒。
Expand Down
110 changes: 109 additions & 1 deletion aunly_bbot/utils/browser_shot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import time
import httpx
import asyncio
import contextlib

Expand All @@ -10,9 +11,17 @@
from sentry_sdk import capture_exception
from playwright._impl._api_types import TimeoutError
from graiax.playwright.interface import PlaywrightContext
from playwright.async_api._generated import Request, Page, BrowserContext, Route
from playwright._impl._api_structures import Position
from playwright.async_api._generated import (
Page,
Route,
Request,
Response,
BrowserContext,
)

from ..core.bot_config import BotConfig
from ..model.captcha import CaptchaResponse

from .fonts_provider import get_font

Expand Down Expand Up @@ -46,6 +55,10 @@ async def fill_font(route: Route, request: Request):
await route.fallback()


async def resolve_select_captcha(page: Page):
pass


async def browser_dynamic(dynid: str):
app = Ariadne.current()
browser_context = app.launch_manager.get_interface(PlaywrightContext).context
Expand Down Expand Up @@ -125,16 +138,111 @@ def network_requestfailed(request: Request):

async def get_mobile_screenshot(page: Page, dynid: str):
url = f"https://m.bilibili.com/dynamic/{dynid}"
captcha_image_body = ""
last_captcha_id = ""
captcha_result = None

async def captcha_image_url_callback(response: Response):
nonlocal captcha_image_body
logger.debug(f"[Captcha] Get captcha image url: {response.url}")
captcha_image_body = await response.body()

async def captcha_result_callback(response: Response):
nonlocal captcha_result, last_captcha_id
logger.debug(f"[Captcha] Get captcha result: {response.url}")
captcha_resp = await response.text()
logger.debug(f"[Captcha] Result: {captcha_resp}")
if '"result": "success"' in captcha_resp:
logger.success("[Captcha] 验证码 Callback 验证成功")
captcha_result = True
elif '"result": "click"' in captcha_resp:
pass
else:
if last_captcha_id:
logger.warning(f"[Captcha] 验证码 Callback 验证失败,正在上报:{last_captcha_id}")
async with httpx.AsyncClient() as client:
await client.post(
f"{captcha_baseurl}/report", json={"captcha_id": last_captcha_id}
)
last_captcha_id = ""
captcha_result = False

await page.set_viewport_size({"width": 460, "height": 720})

captcha_address = BotConfig.Bilibili.captcha_address
if captcha_address:
page.on(
"response",
lambda response: captcha_image_url_callback(response)
if response.url.startswith("https://static.geetest.com/captcha_v3/")
else None,
)
page.on(
"response",
lambda response: captcha_result_callback(response)
if response.url.startswith("https://api.geetest.com/ajax.php")
else None,
)

with contextlib.suppress(TimeoutError):
await page.goto(url, wait_until="networkidle", timeout=20000)

if captcha_address:
captcha_baseurl = f"{captcha_address.scheme}://{captcha_address.host}:{captcha_address.port}/captcha/select"
while captcha_image_body or captcha_result is False:
logger.warning("[Captcha] 需要人机验证,正在尝试自动解决验证码")
captcha_image = await page.query_selector(".geetest_item_img")
assert captcha_image
captcha_size = await captcha_image.bounding_box()
assert captcha_size
origin_image_size = 344, 384

async with httpx.AsyncClient() as client:
captcha_req = await client.post(
f"{captcha_baseurl}/bytes",
timeout=10,
files={"img_file": captcha_image_body},
)
captcha_req = CaptchaResponse(**captcha_req.json())
logger.debug(f"[Captcha] Get Resolve Result: {captcha_req}")
assert captcha_req.data
last_captcha_id = captcha_req.data.captcha_id
if captcha_req.data:
click_points: list[list[int]] = captcha_req.data.points
logger.warning(f"[Captcha] 识别到 {len(click_points)} 个坐标,正在点击")
# 根据原图大小和截图大小计算缩放比例,然后计算出正确的需要点击的位置
for point in click_points:
real_click_points = {
"x": point[0] * captcha_size["width"] / origin_image_size[0],
"y": point[1] * captcha_size["height"] / origin_image_size[1],
}
await captcha_image.click(position=Position(**real_click_points))
await page.wait_for_timeout(800)
captcha_image_body = ""
await page.click("text=确认")
geetest_up = await page.wait_for_selector(".geetest_up", state="visible")
Path("captcha.jpg").write_bytes(await page.screenshot())
if not geetest_up:
logger.warning("[Captcha] 未检测到验证码验证结果,正在重试")
continue
geetest_result = await geetest_up.text_content()
assert geetest_result
logger.debug(f"[Captcha] Geetest result: {geetest_result}")
if "验证成功" in geetest_result:
logger.success("[Captcha] 极验网页 Tip 验证成功")
else:
logger.warning("[Captcha] 极验验证失败,正在重试")

with contextlib.suppress(TimeoutError):
await page.wait_for_load_state(state="domcontentloaded", timeout=20000)

if "bilibili.com/404" in page.url:
logger.warning(f"[Bilibili推送] {dynid} 动态不存在")
raise Notfound

await page.wait_for_load_state(state="domcontentloaded", timeout=20000)
await page.wait_for_selector(".opus-module-author", state="visible")

await page.add_script_tag(path=mobile_style_js)
await page.wait_for_function("getMobileStyle()")

Expand Down
14 changes: 8 additions & 6 deletions aunly_bbot/utils/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
import tiktoken_async

from loguru import logger
from httpx import Response
from typing import Optional
from collections import OrderedDict
from httpx import HTTPError, Response

from ..core.bot_config import BotConfig
from ..model.openai import OpenAI, TokenUsage

LIMIT_COUNT = {"gpt-3.5-turbo-0301": 3500, "gpt-4-0314": 7600, "gpt-4-32k-0314": 32200}.get(
BotConfig.Bilibili.openai_model or "gpt-3.5-turbo-0301", 3500
)
LIMIT_COUNT = {
"gpt-3.5-turbo-0613": 3500,
"gpt-3.5-turbo-16k-0613": 15000,
"gpt-4-0613": 7600,
}.get(BotConfig.Bilibili.openai_model or "gpt-3.5-turbo-0613", 3500)

if BotConfig.Bilibili.openai_summarization:
logger.info("正在加载 OpenAI Token 计算模型")
Expand Down Expand Up @@ -59,10 +61,10 @@ def get_summarise_prompt(title: str, transcript: str) -> list[dict[str, str]]:
def count_tokens(prompts: list[dict[str, str]]):
"""根据内容计算 token 数"""

if BotConfig.Bilibili.openai_model == "gpt-3.5-turbo-0301":
if BotConfig.Bilibili.openai_model.startswith("gpt-3.5-turbo"):
tokens_per_message = 4
tokens_per_name = -1
elif BotConfig.Bilibili.openai_model == "gpt-4":
elif BotConfig.Bilibili.openai_model.startswith("gpt-4"):
tokens_per_message = 3
tokens_per_name = 1
else:
Expand Down
Loading

0 comments on commit 2c8ef4f

Please sign in to comment.