Python实现网页截图(PyQT5)过程解析


Posted in Python onAugust 12, 2019

方案说明

功能要求:实现网页加载后将页面截取成长图片

涉及模块:PyQT5 PIL

逻辑说明:

1:完成窗口设置,利用PyQT5 QWebEngineView加载网页地址,待网页加载完成后,调用check_pag;

class MainWindow(QMainWindow):
  def __init__(self, parent=None):
    super(MainWindow, self).__init__(parent)
    self.setWindowTitle('易哈佛')
    self.temp_height = 0
    self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
    # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
    self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框
  def urlScreenShot(self, url):
    self.browser = QWebEngineView()
    self.browser.load(QUrl(url))
    geometry = self.chose_screen()
    self.setGeometry(geometry)
    self.browser.loadFinished.connect(self.check_page)
    self.setCentralWidget(self.browser)
  def get_page_size(self):
    size = self.browser.page().contentsSize()
    self.set_height = size.height()
    self.set_width = size.width()
    return size.width(), size.height()
  def chose_screen(self):
    width, height = 750, 1370
    desktop = QApplication.desktop()
    screen_count = desktop.screenCount()
    for i in range(0, screen_count):
      rect = desktop.availableGeometry(i)
      s_width, s_height = rect.width(), rect.height()
      if s_width > width and s_height > height:
        return QRect(rect.left(), rect.top(), width, height)
    return QRect(0, 0, width, height)
if __name__ == '__main__':
  app = QApplication(sys.argv)
  win = MainWindow()
  win.show()
  app.exit(app.exec_())

2:收集页面高度,并计算分次截屏的次数和余量高度;实例化图片合并工具,设置定时器,超时信号发出后,执行exe_command;

def check_page(self):
    p_width, p_height = self.get_page_size()
    self.page, self.over_flow_size = divmod(p_height, self.height())
    if self.page == 0:
      self.page = 1
    self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
    self.timer = QTimer(self)
    self.timer.timeout.connect(self.exe_command)
    self.timer.setInterval(400)
    self.timer.start()

3:exe_command用来控制截图次数,并在每次截图完成后控制网页向下滑屏幕的高度;所有的页面都已截取时,完成图片合并。

def exe_command(self):
    if self.page > 0:
      self.screen_shot()
      self.run_js()
    elif self.page < 0:
      self.timer.stop()
      self.ssm.image_merge()
      self.close()
    elif self.over_flow_size > 0:
      self.screen_shot()
    self.page -= 1    
  def run_js(self):
    script = """
      var scroll = function (dHeight) {
      var t = document.documentElement.scrollTop
      var h = document.documentElement.scrollHeight
      dHeight = dHeight || 0
      var current = t + dHeight
      if (current > h) {
        window.scrollTo(0, document.documentElement.clientHeight)
       } else {
        window.scrollTo(0, current)
       }
      }
    """
    command = script + '\n scroll({})'.format(self.height())
    self.browser.page().runJavaScript(command)

4:screen_shot在每次截图完成后将图片保存,并将图片对象由图片合并根据保存到列表中。

def screen_shot(self):
    screen = QApplication.primaryScreen()
    winid = self.browser.winId()
    pix = screen.grabWindow(int(winid))
    name = '{}/temp.png'.format(self.ssm.root_path)
    pix.save(name)
    self.ssm.add_im(name)

5:截图合并工具,在每次截图完成后将图片对象保存,完成余量截图的重绘和截图的合并。

class ScreenShotMerge():
  def __init__(self, page, over_flow_size):
    self.im_list = []
    self.page = page
    self.over_flow_size = over_flow_size
    self.get_path()

  def get_path(self):
    self.root_path = Path(__file__).parent.joinpath('temp')
    if not self.root_path.exists():
      self.root_path.mkdir(parents=True)
    self.save_path = self.root_path.joinpath('merge.png')

  def add_im(self, path):
    if len(self.im_list) == self.page:
      im = self.reedit_image(path)
    else:
      im = Image.open(path)
    im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
    self.im_list.append(im)

  def get_new_size(self):
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度(以最宽的为准)和高度
    for img in self.im_list:
      width, height = img.size
      if width > max_width:
        max_width = width
      total_height += height
    return max_width, total_height

  def image_merge(self, ):
    if len(self.im_list) > 1:
      max_width, total_height = self.get_new_size()
      # 产生一张空白图
      new_img = Image.new('RGB', (max_width - 15, total_height), 255)
      x = y = 0
      for img in self.im_list:
        width, height = img.size
        new_img.paste(img, (x, y))
        y += height
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
    else:
      obj = self.im_list[0]
      width, height = obj.size
      left, top, right, bottom = 0, 0, width, height
      box = (left, top, right, bottom)
      region = obj.crop(box)
      new_img = Image.new('RGB', (width, height), 255)
      new_img.paste(region, box)
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)

  def reedit_image(self, path):
    obj = Image.open(path)
    width, height = obj.size
    left, top, right, bottom = 0, height - self.over_flow_size, width, height
    box = (left, top, right, bottom)
    region = obj.crop(box)
    return region

截图功能完整代码

#!/usr/bin/env python
# -*- coding:UTF-8 -*-
# Author:Leslie-x
import sys
from PyQt5.QtCore import *
from PyQt5.QtWidgets import *
from PyQt5.QtWebEngineWidgets import *
from PIL import Image
from pathlib import Path
class ScreenShotMerge():
  def __init__(self, page, over_flow_size):
    self.im_list = []
    self.page = page
    self.over_flow_size = over_flow_size
    self.get_path()
  def get_path(self):
    self.root_path = Path(__file__).parent.joinpath('temp')
    if not self.root_path.exists():
      self.root_path.mkdir(parents=True)
    self.save_path = self.root_path.joinpath('merge.png')
  def add_im(self, path):
    if len(self.im_list) == self.page:
      im = self.reedit_image(path)
    else:
      im = Image.open(path)
    im.save('{}/{}.png'.format(self.root_path, len(self.im_list) + 1))
    self.im_list.append(im)
  def get_new_size(self):
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度(以最宽的为准)和高度
    for img in self.im_list:
      width, height = img.size
      if width > max_width:
        max_width = width
      total_height += height
    return max_width, total_height
  def image_merge(self, ):
    if len(self.im_list) > 1:
      max_width, total_height = self.get_new_size()
      # 产生一张空白图
      new_img = Image.new('RGB', (max_width - 15, total_height), 255)
      x = y = 0
      for img in self.im_list:
        width, height = img.size
        new_img.paste(img, (x, y))
        y += height
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
    else:
      obj = self.im_list[0]
      width, height = obj.size
      left, top, right, bottom = 0, 0, width, height
      box = (left, top, right, bottom)
      region = obj.crop(box)
      new_img = Image.new('RGB', (width, height), 255)
      new_img.paste(region, box)
      new_img.save(self.save_path)
      print('截图成功:', self.save_path)
  def reedit_image(self, path):
    obj = Image.open(path)
    width, height = obj.size
    left, top, right, bottom = 0, height - self.over_flow_size, width, height
    box = (left, top, right, bottom)
    region = obj.crop(box)
    return region
class MainWindow(QMainWindow):
  def __init__(self, parent=None):
    super(MainWindow, self).__init__(parent)
    self.setWindowTitle('易哈佛')
    self.temp_height = 0
    self.setWindowFlag(Qt.WindowMinMaxButtonsHint, False) # 禁用最大化,最小化
    # self.setWindowFlag(Qt.WindowStaysOnTopHint, True) # 窗口顶置
    self.setWindowFlag(Qt.FramelessWindowHint, True) # 窗口无边框
  def urlScreenShot(self, url):
    self.browser = QWebEngineView()
    self.browser.load(QUrl(url))
    geometry = self.chose_screen()
    self.setGeometry(geometry)
    self.browser.loadFinished.connect(self.check_page)
    self.setCentralWidget(self.browser)
  def get_page_size(self):
    size = self.browser.page().contentsSize()
    self.set_height = size.height()
    self.set_width = size.width()
    return size.width(), size.height()
  def chose_screen(self):
    width, height = 750, 1370
    desktop = QApplication.desktop()
    screen_count = desktop.screenCount()
    for i in range(0, screen_count):
      rect = desktop.availableGeometry(i)
      s_width, s_height = rect.width(), rect.height()
      if s_width > width and s_height > height:
        return QRect(rect.left(), rect.top(), width, height)
    return QRect(0, 0, width, height)
  def check_page(self):
    p_width, p_height = self.get_page_size()
    self.page, self.over_flow_size = divmod(p_height, self.height())
    if self.page == 0:
      self.page = 1
    self.ssm = ScreenShotMerge(self.page, self.over_flow_size)
    self.timer = QTimer(self)
    self.timer.timeout.connect(self.exe_command)
    self.timer.setInterval(400)
    self.timer.start()
  def exe_command(self):
    if self.page > 0:
      self.screen_shot()
      self.run_js()

    elif self.page < 0:
      self.timer.stop()
      self.ssm.image_merge()
      self.close()

    elif self.over_flow_size > 0:
      self.screen_shot()
    self.page -= 1

  def run_js(self):
    script = """
      var scroll = function (dHeight) {
      var t = document.documentElement.scrollTop
      var h = document.documentElement.scrollHeight
      dHeight = dHeight || 0
      var current = t + dHeight
      if (current > h) {
        window.scrollTo(0, document.documentElement.clientHeight)
       } else {
        window.scrollTo(0, current)
       }
      }
    """
    command = script + '\n scroll({})'.format(self.height())
    self.browser.page().runJavaScript(command)

  def screen_shot(self):
    screen = QApplication.primaryScreen()
    winid = self.browser.winId()
    pix = screen.grabWindow(int(winid))
    name = '{}/temp.png'.format(self.ssm.root_path)
    pix.save(name)
    self.ssm.add_im(name)

if __name__ == '__main__':
  url = 'http://blog.sina.com.cn/lm/rank/focusbang//'
  app = QApplication(sys.argv)
  win = MainWindow()
  win.urlScreenShot(url)
  win.show()
  app.exit(app.exec_())

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
python使用Berkeley DB数据库实例
Sep 26 Python
使用Python脚本将绝对url替换为相对url的教程
Apr 24 Python
python转换字符串为摩尔斯电码的方法
Jul 06 Python
python发送HTTP请求的方法小结
Jul 08 Python
Python学习小技巧之利用字典的默认行为
May 20 Python
如何使用 Pylint 来规范 Python 代码风格(来自IBM)
Apr 06 Python
Python从使用线程到使用async/await的深入讲解
Sep 16 Python
Django中间件基础用法详解
Jul 18 Python
Python爬虫 批量爬取下载抖音视频代码实例
Aug 16 Python
python 爬取古诗文存入mysql数据库的方法
Jan 08 Python
Keras实现将两个模型连接到一起
May 23 Python
Python内置类型集合set和frozenset的使用详解
Apr 26 Python
python实现知乎高颜值图片爬取
Aug 12 #Python
python3 enum模块的应用实例详解
Aug 12 #Python
Python一键查找iOS项目中未使用的图片、音频、视频资源
Aug 12 #Python
django+echart数据动态显示的例子
Aug 12 #Python
Flask框架学习笔记之使用Flask实现表单开发详解
Aug 12 #Python
Flask框架学习笔记之表单基础介绍与表单提交方式
Aug 12 #Python
python内存管理机制原理详解
Aug 12 #Python
You might like
php curl请求信息和返回信息设置代码实例
2015/04/27 PHP
为你的 Laravel 验证器加上多验证场景的实现
2020/04/07 PHP
javascript 构建一个xmlhttp对象池合理创建和使用xmlhttp对象
2010/01/15 Javascript
25个优雅的jQuery Tooltip插件推荐
2011/05/25 Javascript
JS实现点击按钮后框架内载入不同网页的方法
2015/05/05 Javascript
javascript实现网页子页面遍历回调的方法(涉及 window.frames、递归函数、函数上下文)
2015/07/27 Javascript
jquery mobile开发常见问题分析
2016/01/21 Javascript
jQuery Ajax传值到Servlet出现乱码问题的解决方法
2016/10/09 Javascript
基于node.js依赖express解析post请求四种数据格式
2017/02/13 Javascript
webpack-dev-server远程访问配置方法
2018/02/22 Javascript
vue cli 3.0 使用全过程解析
2018/06/14 Javascript
vue+vue-router转场动画的实例代码
2018/09/01 Javascript
详解如何搭建mpvue框架搭配vant组件库的小程序项目
2019/05/16 Javascript
在JavaScript中使用严格模式(Strict Mode)
2019/06/13 Javascript
前端Electron新手入门教程详解
2019/06/21 Javascript
es6数组的flat(),flatMap()函数用法实例分析
2020/04/18 Javascript
Vue基于iview table展示图片实现点击放大
2020/08/05 Javascript
Django跨域请求CSRF的方法示例
2018/11/11 Python
Python实现随机创建电话号码的方法示例
2018/12/07 Python
python基于gevent实现并发下载器代码实例
2019/11/01 Python
python构造函数init实例方法解析
2020/01/19 Python
Python AutoCAD 系统设置的实现方法
2020/04/01 Python
django ObjectDoesNotExist 和 DoesNotExist的用法
2020/07/09 Python
如何在scrapy中捕获并处理各种异常
2020/09/28 Python
国际领先的学术出版商:Springer
2017/01/11 全球购物
洗煤厂厂长岗位职责
2014/01/03 职场文书
共产党员公开承诺书范文
2014/03/28 职场文书
房屋继承公证书
2014/04/10 职场文书
竞选卫生委员演讲稿
2014/04/28 职场文书
秋天的怀念教学反思
2014/04/28 职场文书
珍惜时间演讲稿
2014/05/14 职场文书
民主生活会对照检查材料思想汇报
2014/09/27 职场文书
2014年教研员工作总结
2014/12/23 职场文书
企业催款函范本
2015/06/24 职场文书
部分武汉产收音机展览
2022/04/07 无线电
css3手动实现pc端横向滚动
2022/06/21 HTML / CSS