编程 Python

Python爬虫破解登陆哔哩哔哩的方法

Posted in Python onNovember 17, 2020

写在前面

作为一名找不到工作的爬虫菜鸡人士来说，登陆这一块肯定是个比较大的难题。
从今天开始准备一点点对大型网站进行逐个登陆破解。加深自己爬虫水平。

环境搭建

Python 3.7.7环境，Mac电脑测试
Python内置库
第三方库：rsa、urllib、requests

PC端登陆

全部代码：

'''PC登录哔哩哔哩'''
class Bilibili_For_PC():
  def __init__(self, **kwargs):
    for key, value in kwargs.items(): setattr(self, key, value)
    self.session = requests.Session()
    self.__initialize()
  '''登录函数'''
  def login(self, username, password, crack_captcha_func=None, **kwargs):
    # 若参数中给入代理,则设置
    self.session.proxies.update(kwargs.get('proxies', {}))
    # 是否需要验证码
    is_need_captcha = False
    while True:
      # 需要验证码
      if is_need_captcha:
        captcha_img = self.session.get(self.captcha_url, headers=self.captcha_headers).content
        data = {'image': base64.b64encode(captcha_img).decode('utf-8')}
        captcha = self.session.post(self.crack_captcha_url, json=data).json()['message']
      # 获得key值
      appkey = '1d8b6e7d45233436'
      data = {
            'appkey': appkey,
            'sign': self.__calcSign('appkey={}'.format(appkey))
          }
      response = self.session.post(self.getkey_url, data=data)
      response_json = response.json()
      key_hash = response_json['data']['hash']
      pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(response_json['data']['key'].encode('utf-8'))
      # 模拟登录
      if is_need_captcha:
        data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha={}&challenge=&channel=bili&cookies=&device=pc&password={}&permission=ALL&seccode=&subid=1&ts={}&username={}&validate=" \
            .format(appkey, captcha, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      else:
        data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha=&challenge=&channel=bili&cookies=&device=pc&password={}&permission=ALL&seccode=&subid=1&ts={}&username={}&validate=" \
            .format(appkey, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      data = "{}&sign={}".format(data, self.__calcSign(data))
      response = self.session.post(self.login_url, data=data, headers=self.login_headers)
      response_json = response.json()
      # 不需要验证码, 登录成功
      if response_json['code'] == 0 and response_json['data']['status'] == 0:
        for cookie in response_json['data']['cookie_info']['cookies']:
          self.session.cookies.set(cookie['name'], cookie['value'], domain='.bilibili')
        print('[INFO]: Account -> %s, login successfully' % username)
        infos_return = {'username': username}
        infos_return.update(response_json)
        return infos_return, self.session
      # 需要识别验证码
      elif response_json['code'] == -105:
        is_need_captcha = True
      # 账号密码错误
      elif response_json['code'] == -629:
        raise RuntimeError('Account -> %s, fail to login, username or password error' % username)
      # 其他错误
      else:
        raise RuntimeError(response_json.get('message'))
  '''计算sign值'''
  def __calcSign(self, param, salt="560c52ccd288fed045859ed18bffd973"):
    sign = hashlib.md5('{}{}'.format(param, salt).encode('utf-8'))
    return sign.hexdigest()
  '''初始化'''
  def __initialize(self):
   # 登陆请求头
    self.login_headers = {'Content-type': 'application/x-www-form-urlencoded'}
    # 破解验证码请求头
    self.captcha_headers = {'Host': 'passport.bilibili.com'}
    # 获取key密钥URL
    self.getkey_url = 'https://passport.bilibili.com/api/oauth2/getKey'
    # 获取登陆URL
    self.login_url = 'https://passport.bilibili.com/api/v3/oauth2/login'
    # 获取验证码URL
    self.captcha_url = 'https://passport.bilibili.com/captcha'
    # 破解网站来自: https://github.com/Hsury/Bilibili-Toolkit
    # 破解验证码URL
    self.crack_captcha_url = 'https://bili.dev:2233/captcha'
    # 请求头都得加这个
    self.session.headers.update({'User-Agent': "Mozilla/5.0 BiliDroid/5.51.1 (bbcallen@gmail.com)"})

移动端登陆

移动端与PC端类似，网址URL差异以及请求头差异。在此不过多介绍。
全部代码：

'''移动端登录B站'''
class Bilibili_For_Mobile():
  def __init__(self, **kwargs):
    for key, value in kwargs.items(): setattr(self, key, value)
    self.session = requests.Session()
    self.__initialize()
  '''登录函数'''
  def login(self, username, password, crack_captcha_func=None, **kwargs):
    self.session.proxies.update(kwargs.get('proxies', {}))
    # 是否需要验证码
    is_need_captcha = False
    while True:
      # 需要验证码
      if is_need_captcha:
        captcha_img = self.session.get(self.captcha_url, headers=self.captcha_headers).content
        data = {'image': base64.b64encode(captcha_img).decode('utf-8')}
        captcha = self.session.post(self.crack_captcha_url, json=data).json()['message']
      # 获得key值
      appkey = 'bca7e84c2d947ac6'
      data = {
            'appkey': appkey,
            'sign': self.__calcSign('appkey={}'.format(appkey))
          }
      response = self.session.post(self.getkey_url, data=data)
      response_json = response.json()
      key_hash = response_json['data']['hash']
      pub_key = rsa.PublicKey.load_pkcs1_openssl_pem(response_json['data']['key'].encode('utf-8'))
      # 模拟登录
      if is_need_captcha:
        data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha={}&challenge=&channel=bili&cookies=&device=phone&mobi_app=android&password={}&permission=ALL&platform=android&seccode=&subid=1&ts={}&username={}&validate=" \
            .format(appkey, captcha, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      else:
        data = "access_key=&actionKey=appkey&appkey={}&build=6040500&captcha=&challenge=&channel=bili&cookies=&device=phone&mobi_app=android&password={}&permission=ALL&platform=android&seccode=&subid=1&ts={}&username={}&validate=" \
            .format(appkey, urllib.parse.quote_plus(base64.b64encode(rsa.encrypt('{}{}'.format(key_hash, password).encode(), pub_key))), int(time.time()), urllib.parse.quote_plus(username))
      data = "{}&sign={}".format(data, self.__calcSign(data))
      response = self.session.post(self.login_url, data=data, headers=self.login_headers)
      response_json = response.json()
      # 不需要验证码, 登录成功
      if response_json['code'] == 0 and response_json['data']['status'] == 0:
        for cookie in response_json['data']['cookie_info']['cookies']:
          self.session.cookies.set(cookie['name'], cookie['value'], domain='.bilibili')
        print('[INFO]: Account -> %s, login successfully' % username)
        infos_return = {'username': username}
        infos_return.update(response_json)
        return infos_return, self.session
      # 需要识别验证码
      elif response_json['code'] == -105:
        is_need_captcha = True
      # 账号密码错误
      elif response_json['code'] == -629:
        raise RuntimeError('Account -> %s, fail to login, username or password error' % username)
      # 其他错误
      else:
        raise RuntimeError(response_json.get('message'))
  '''计算sign值'''
  def __calcSign(self, param, salt="60698ba2f68e01ce44738920a0ffe768"):
    sign = hashlib.md5('{}{}'.format(param, salt).encode('utf-8'))
    return sign.hexdigest()
  '''初始化'''
  def __initialize(self):
    self.login_headers = {
                'Content-type': 'application/x-www-form-urlencoded'
              }
    self.captcha_headers = {
                'Host': 'passport.bilibili.com'
              }
    self.getkey_url = 'https://passport.bilibili.com/api/oauth2/getKey'
    self.login_url = 'https://passport.bilibili.com/api/v3/oauth2/login'
    self.captcha_url = 'https://passport.bilibili.com/captcha'
    # 破解网站来自: https://github.com/Hsury/Bilibili-Toolkit
    self.crack_captcha_url = 'https://bili.dev:2233/captcha'
    self.session.headers.update({'User-Agent': "Mozilla/5.0 BiliDroid/5.51.1 (bbcallen@gmail.com)"})

到此这篇关于Python爬虫破解登陆哔哩哔哩的方法的文章就介绍到这了,更多相关Python爬虫破解登陆内容请搜索三水点靠木以前的文章或继续浏览下面的相关文章希望大家以后多多支持三水点靠木！

Python爬虫破解登陆哔哩哔哩的方法

- Author -

꧁༺北海以北的等待༻꧂

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

Python常用库推荐

Dec 04 Python

python添加模块搜索路径方法

Sep 11 Python

Python分析学校四六级过关情况

Nov 22 Python

python实现Floyd算法

Jan 03 Python

实例讲解Python爬取网页数据

Jul 08 Python

python输出电脑上所有的串口名的方法

Jul 02 Python

OpenCV里的imshow()和Matplotlib.pyplot的imshow()的实现

Nov 25 Python

python爬虫添加请求头代码实例

Dec 28 Python

python GUI库图形界面开发之PyQt5多行文本框控件QTextEdit详细使用方法实例

Feb 28 Python

python实现梯度法 python最速下降法

Mar 24 Python

解决Django no such table: django_session的问题

Apr 07 Python

Python自动发送和收取邮件的方法

Aug 12 Python

appium+python自动化配置(adk、jdk、node.js)

Nov 17 #Python

python调用百度API实现人脸识别

Nov 17 #Python

详解利用python识别图片中的条码（pyzbar）及条码图片矫正和增强

Nov 17 #Python

详解Pytorch显存动态分配规律探索

Nov 17 #Python

Python调用ffmpeg开源视频处理库，批量处理视频

Nov 16 #Python

python tkinter实现连连看游戏

Nov 16 #Python

详解python os.path.exists判断文件或文件夹是否存在

Nov 16 #Python