-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathspider.py
163 lines (145 loc) · 5.24 KB
/
spider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import time
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from chaojiying import Chaojiying
import os
USERNAME = 'admin'
PASSWORD = 'admin'
CHAOJIYING_USERNAME = os.getenv('CHAOJIYING_USERNAME')
CHAOJIYING_PASSWORD = os.getenv('CHAOJIYING_PASSWORD')
CHAOJIYING_SOFT_ID = 893590
CHAOJIYING_KIND = 9102
if not CHAOJIYING_USERNAME or not CHAOJIYING_PASSWORD:
print('请设置用户名和密码')
exit(0)
class CrackCaptcha():
def __init__(self):
self.url = 'https://captcha3.scrape.center/'
self.browser = webdriver.Chrome()
self.wait = WebDriverWait(self.browser, 20)
self.username = USERNAME
self.password = PASSWORD
self.chaojiying = Chaojiying(CHAOJIYING_USERNAME, CHAOJIYING_PASSWORD, CHAOJIYING_SOFT_ID)
def __del__(self):
self.browser.close()
def open(self):
"""
打开网页输入用户名密码
:return: None
"""
self.browser.get(self.url)
# 填入用户名密码
username = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type="text"]')))
password = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'input[type="password"]')))
username.send_keys(self.username)
password.send_keys(self.password)
def get_captcha_button(self):
"""
获取初始验证按钮
:return:
"""
button = self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'button[type="button"]')))
return button
def get_captcha_element(self):
"""
获取验证图片对象
:return: 图片对象
"""
# 验证码图片加载出来
self.wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'img.geetest_item_img')))
# 验证码完整节点
element = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_panel_box')))
print('成功获取验证码节点')
return element
def get_captcha_position(self):
"""
获取验证码位置
:return: 验证码位置元组
"""
element = self.get_captcha_element()
time.sleep(2)
location = element.location
size = element.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[
'width']
return (top, bottom, left, right)
def get_screenshot(self):
"""
获取网页截图
:return: 截图对象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
screenshot.save('screenshot.png')
return screenshot
def get_captcha_image(self, name='captcha.png'):
"""
获取验证码图片
:return: 图片对象
"""
top, bottom, left, right = self.get_captcha_position()
print('验证码位置', top, bottom, left, right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left, top, right, bottom))
captcha.save(name)
return captcha
def get_points(self, captcha_result):
"""
解析识别结果
:param captcha_result: 识别结果
:return: 转化后的结果
"""
groups = captcha_result.get('pic_str').split('|')
locations = [[int(number) for number in group.split(',')] for group in groups]
return locations
def touch_click_words(self, locations):
"""
点击验证图片
:param locations: 点击位置
:return: None
"""
for location in locations:
ActionChains(self.browser).move_to_element_with_offset(self.get_captcha_element(), location[0],
location[1]).click().perform()
time.sleep(1)
def touch_click_verify(self):
"""
点击验证按钮
:return: None
"""
button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_commit')))
button.click()
def crack(self):
"""
破解入口
:return: None
"""
self.open()
time.sleep(5)
# 点击验证按钮
button = self.get_captcha_button()
button.click()
# 获取验证码图片
image = self.get_captcha_image()
bytes_array = BytesIO()
image.save(bytes_array, format='PNG')
# 识别验证码
result = self.chaojiying.post_pic(bytes_array.getvalue(), CHAOJIYING_KIND)
print(result)
locations = self.get_points(result)
self.touch_click_words(locations)
self.touch_click_verify()
# 判定是否成功
success = self.wait.until(
EC.text_to_be_present_in_element((By.TAG_NAME, 'h2'), '登录成功'))
time.sleep(5)
if success:
print('登录成功!')
if __name__ == '__main__':
crack = CrackCaptcha()
crack.crack()