编程 Python

使用Selenium破解新浪微博的四宫格验证码

Posted in Python onOctober 19, 2018

在我们爬虫的时候经常会遇到验证码，新浪微博的验证码是四宫格形式。

可以采用模板验证码的破解方式，也就是把所有验证码的情况全部列出来，然后拿验证码的图片和这所有情况中的图片进行对比，然后获取验证码，再通过selenium自动拖拽点击，进行破解。

我们将验证码四个点标注为1234，那么所有的情况就是以下24种情况。

数字代表箭头指向：

1234	2134	3124	4321
1243	2143	3142	4312
1342	2314	3214	4123
1324	2341	3241	4132
1423	2413	3412	4213
1432	2431	3421	4231

所有的情况就是以上24种。我们将这24中验证码的情况放在一个文件夹内，当我们在登录的时候用获取的验证码截图去和所有的情况一一对比，然后获取完全相同的验证码，进行点击即可。代码如下：

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.action_chains import ActionChains
import time
from PIL import Image
from io import BytesIO
from os import listdir
USERNAME = ''
PASSWORD = ''
class CrackWeiboSlide():
  def __init__(self):
    self.url = 'https://passport.weibo.cn/signin/login'
    self.browser = webdriver.Chrome()
    self.wait = WebDriverWait(self.browser,20)
    self.username = USERNAME
    self.password = PASSWORD
  def __del__(self):
    self.browser.close()
  def open(self):
    """
    打开网页输入用户名密码登录
    :return: None
    """
    self.browser.get(self.url)
    username = self.wait.until(EC.presence_of_element_located((By.ID,'loginName')))
    password = self.wait.until(EC.presence_of_element_located((By.ID,'loginPassword')))
    submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
    username.send_keys(self.username)
    password.send_keys(self.password)
    submit.click()
  def get_position(self):
    """
    获取验证码的位置
    :return: 位置
    """
    try:
      img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'patt-shadow')))
    except TimeoutException:
      print('未出现验证码')
      self.open()
    time.sleep(2)
    location = img.location
    size = img.size
    top=location['y']
    bottom = location['y']+size['height']
    left = location['x']
    right = location['x']+size['width']
    return (top,bottom,left,right)
  def get_screenshot(self):
    """
    获取截图
    :return:截图
    """
    screentshot = self.browser.get_screenshot_as_png()
    # BytesIO将网页截图转换成二进制
    screentshot = Image.open(BytesIO(screentshot))
    return screentshot
  def get_image(self,name):
    """获取验证码图片"""
    top,bottom,left,right = self.get_position()
    print('验证码位置',top,bottom,left,right)
    screenshot = self.get_screenshot()
    # crop()将图片裁剪出来,后面需要一个参数
    captcha = screenshot.crop((left,top,right,bottom))
    captcha.save(name)
    return captcha
  def detect_image(self,image):
    """
    匹配图片
    :param self:
    :param image: 图片
    :return: 拖动顺序
    """
    # 图片所在的文件夹
    for template_name in listdir('templates/'):
      print('正在匹配',template_name)
      template = Image.open('templates/'+template_name)
      # 匹配图片
      if self.same_img(image,template):
        # 将匹配到的文件名转换为列表
        numbers = [int(number)for number in list(template_name.split('.')[0])]
        print('拖动顺序',numbers)
        return numbers
  def is_pixel_equal(self,image1,image2,x,y):
    """
    判断两个像素的相似度
    :param image1: 图片1
    :param image2: 图片2
    :param x: 位置x
    :param y: 位置y
    :return: 像素是否相同
    """
     # 取像素点
    pixel1 = image1.load()[x,y]
    pixel2 = image2.load()[x,y]
    # 偏差量等于60
    threshold = 60
    if abs(pixel1[0]-pixel2[0]) < threshold and abs(pixel1[1]-pixel2[1])<threshold and abs(pixel1[2]-pixel2[2])<threshold:
      return True
    else:
      return False
  def same_img(self,image,template):
    """
    识别相似的验证码
    :param image: 准备识别的验证码
    :param template: 模板
    :return:
    """
    # 相似度阈值
    threshold = 0.99
    count = 0
    # 匹配所有像素点
    for x in range(image.width):
      for y in range(image.height):
        # 判断像素
        if self.is_pixel_equal(image,template,x,y):
          count+=1
    result = float(count)/(image.width*image.height)
    if result>threshold:
      print('成功匹配')
      return True
    return False
  def move(self,numbers):
    """
    根据顺序拖动,此处接收的参数为前面的验证码的顺序列表
    :param numbers:
    :return:
    """
    # 获取四宫格的四个点
    circles = self.browser.find_elements_by_css_selector('.patt-wrap .patt-circ')
    print('-----------------',circles)
    dx = dy =0
    for index in range(4):
      circle = circles[numbers[index]-1]
      if index == 0:
        # 点击第一个点
        ActionChains(self.browser).move_to_element_with_offset(circle,circle.size['width']/2,circle.size['height']/2).click_and_hold().perform()
      else:
        # 慢慢移动
        times = 30
        for i in range(times):
          ActionChains(self.browser).move_by_offset(dx/times,dy/times).perform()
          time.sleep(1/times)
      if index == 3:
        # 松开鼠标
        ActionChains(self.browser).release().perform()
      else:
        # 计算下次的偏移
        dx = circles[numbers[index+1]-1].location['x'] - circle.location['x']
        dy = circles[numbers[index+1]-1].location['y'] - circle.location['y']
  def crack(self):
    """
    破解入口
    :return:
    """
    self.open()
    # 获取验证码图片
    image = self.get_image('captcha.png')
    numbers = self.detect_image(image)
    self.move(numbers)
    time.sleep(10)
    print('识别结束')
if __name__ == '__main__':
  crack = CrackWeiboSlide()
  crack.crack()

设置自己的账号密码即可实现。

有时候会匹配不上，图片相似度阈值达不到0.99以上，这个时候可能是我们收集的验证码图片过时了，重新开启图片收集程序，运行收集一下即可。

收集图片程序代码如下：

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
import time
from PIL import Image
from io import BytesIO
from os import listdir
USERNAME = '18239831004'
PASSWORD = 'qweqweqwe'
class CrackWeiboSlide():
  def __init__(self):
    self.url = 'https://passport.weibo.cn/signin/login'
    self.browser = webdriver.Chrome()
    self.wait = WebDriverWait(self.browser,20)
    self.username = USERNAME
    self.password = PASSWORD
  def __del__(self):
    self.browser.close()
  def open(self):
    """
    打开网页输入用户名密码登录
    :return: None
    """
    self.browser.get(self.url)
    username = self.wait.until(EC.presence_of_element_located((By.ID,'loginName')))
    password = self.wait.until(EC.presence_of_element_located((By.ID,'loginPassword')))
    submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
    username.send_keys(self.username)
    password.send_keys(self.password)
    submit.click()
  def get_position(self):
    """
    获取验证码的位置
    :return: 位置
    """
    try:
      img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME,'patt-shadow')))
    except TimeoutException:
      print('未出现验证码')
      self.open()
    time.sleep(2)
    location = img.location
    size = img.size
    top=location['y']
    bottom = location['y']+size['height']
    left = location['x']
    right = location['x']+size['width']
    return (top,bottom,left,right)
  def get_screenshot(self):
    """
    获取截图
    :return:截图
    """
    screentshot = self.browser.get_screenshot_as_png()
    # BytesIO将网页截图转换成二进制
    screentshot = Image.open(BytesIO(screentshot))
    return screentshot
  def get_image(self,name):
    """获取验证码图片"""
    top,bottom,left,right = self.get_position()
    print('验证码位置',top,bottom,left,right)
    screenshot = self.get_screenshot()
    # crop()将图片裁剪出来,后面需要一个参数
    captcha = screenshot.crop((left,top,right,bottom))
    captcha.save(name)
    return captcha
  # 获取所有的验证码
  def main(self):
    count = 0
    while True:
      name = str(count)+'.png'
      self.open()
      self.get_image(name)
      count+=1
if __name__ == '__main__':
  crack = CrackWeiboSlide()
  crack.main()

总结

以上就是这篇文章的全部内容了，希望本文的内容对大家的学习或者工作具有一定的参考学习价值，谢谢大家对三水点靠木的支持。如果你想了解更多相关内容请查看下面相关链接

使用Selenium破解新浪微博的四宫格验证码

- Author -

回忆不说话

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

深入理解Django中内置的用户认证

Oct 06 Python

详解python OpenCV学习笔记之直方图均衡化

Feb 08 Python

解决pycharm无法调用pip安装的包问题

May 18 Python

Python json模块dumps、loads操作示例

Sep 06 Python

对python中类的继承与方法重写介绍

Jan 20 Python

Python控制键盘鼠标pynput的详细用法

Jan 28 Python

Python_查看sqlite3表结构,查询语句的示例代码

Jul 17 Python

Python气泡提示与标签的实现

Apr 01 Python

python计算Content-MD5并获取文件的Content-MD5值方式

Apr 03 Python

numpy矩阵数值太多不能全部显示的解决

May 14 Python

如何通过python计算圆周率PI

Nov 11 Python

快速解决pymongo操作mongodb的时区问题

Dec 05 Python

Selenium的使用详解

Oct 19 #Python

Python爬取成语接龙类网站

Oct 19 #Python

将Django项目部署到CentOs服务器中

Oct 18 #Python

python中将zip压缩包转为gz.tar的方法

Oct 18 #Python

Python 忽略warning的输出方法

Oct 18 #Python

解决python通过cx_Oracle模块连接Oracle乱码的问题

Oct 18 #Python

解决python3捕获cx_oracle抛出的异常错误问题

Oct 18 #Python