python3+PyQt5实现支持多线程的页面索引器应用程序


Posted in Python onApril 20, 2018

本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子。

/home/yrd/eric_workspace/chap19/walker_ans.py

#!/usr/bin/env python3

import codecs
import html.entities
import re
import sys
from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt)

class Walker(QThread):
 finished = pyqtSignal(bool,int)
 indexed = pyqtSignal(str,int)
 COMMON_WORDS_THRESHOLD = 250
 MIN_WORD_LEN = 3
 MAX_WORD_LEN = 25
 INVALID_FIRST_OR_LAST = frozenset("0123456789_")
 STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE)
 ENTITY_RE = re.compile(r"&(\w+?);|&#(\d+?);")
 SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE)

 def __init__(self, index, lock, files, filenamesForWords,
     commonWords, parent=None):
  super(Walker, self).__init__(parent)
  self.index = index
  self.lock = lock
  self.files = files
  self.filenamesForWords = filenamesForWords
  self.commonWords = commonWords
  self.stopped = False
  self.mutex = QMutex()
  self.completed = False


 def stop(self):
  try:
   self.mutex.lock()
   self.stopped = True
  finally:
   self.mutex.unlock()


 def isStopped(self):
  try:
   self.mutex.lock()
   return self.stopped
  finally:
   self.mutex.unlock()


 def run(self):
  self.processFiles()
  self.stop()
  self.finished.emit(self.completed,self.index)


 def processFiles(self):
  def unichrFromEntity(match):
   text = match.group(match.lastindex)
   if text.isdigit():
    return chr(int(text))
   u = html.entities.name2codepoint.get(text)
   return chr(u) if u is not None else ""

  for fname in self.files:
   if self.isStopped():
    return
   words = set()
   fh = None
   try:
    fh = codecs.open(fname, "r", "UTF8", "ignore")
    text = fh.read()
   except EnvironmentError as e:
    sys.stderr.write("Error: {0}\n".format(e))
    continue
   finally:
    if fh is not None:
     fh.close()
   if self.isStopped():
    return
   text = self.STRIPHTML_RE.sub("", text)
   text = self.ENTITY_RE.sub(unichrFromEntity, text)
   text = text.lower()
   for word in self.SPLIT_RE.split(text):
    if (self.MIN_WORD_LEN <= len(word) <=
     self.MAX_WORD_LEN and
     word[0] not in self.INVALID_FIRST_OR_LAST and
     word[-1] not in self.INVALID_FIRST_OR_LAST):
     try:
      self.lock.lockForRead()
      new = word not in self.commonWords
     finally:
      self.lock.unlock()
     if new:
      words.add(word)
   if self.isStopped():
    return
   for word in words:
    try:
     self.lock.lockForWrite()
     files = self.filenamesForWords[word]
     if len(files) > self.COMMON_WORDS_THRESHOLD:
      del self.filenamesForWords[word]
      self.commonWords.add(word)
     else:
      files.add(str(fname))
    finally:
     self.lock.unlock()
   self.indexed.emit(fname,self.index)
  self.completed = True


/home/yrd/eric_workspace/chap19/pageindexer_ans.pyw

#!/usr/bin/env python3

import collections
import os
import sys
from PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt)
from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame,
        QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget,
        QPushButton, QVBoxLayout)
import walker_ans as walker


def isAlive(qobj):
 import sip
 try:
  sip.unwrapinstance(qobj)
 except RuntimeError:
  return False
 return True


class Form(QDialog):

 def __init__(self, parent=None):
  super(Form, self).__init__(parent)

  self.mutex = QMutex()
  self.fileCount = 0
  self.filenamesForWords = collections.defaultdict(set)
  self.commonWords = set()
  self.lock = QReadWriteLock()
  self.path = QDir.homePath()
  pathLabel = QLabel("Indexing path:")
  self.pathLabel = QLabel()
  self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)
  self.pathButton = QPushButton("Set &Path...")
  self.pathButton.setAutoDefault(False)
  findLabel = QLabel("&Find word:")
  self.findEdit = QLineEdit()
  findLabel.setBuddy(self.findEdit)
  commonWordsLabel = QLabel("&Common words:")
  self.commonWordsListWidget = QListWidget()
  commonWordsLabel.setBuddy(self.commonWordsListWidget)
  filesLabel = QLabel("Files containing the &word:")
  self.filesListWidget = QListWidget()
  filesLabel.setBuddy(self.filesListWidget)
  filesIndexedLabel = QLabel("Files indexed")
  self.filesIndexedLCD = QLCDNumber()
  self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat)
  wordsIndexedLabel = QLabel("Words indexed")
  self.wordsIndexedLCD = QLCDNumber()
  self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat)
  commonWordsLCDLabel = QLabel("Common words")
  self.commonWordsLCD = QLCDNumber()
  self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat)
  self.statusLabel = QLabel("Click the 'Set Path' "
         "button to start indexing")
  self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)

  topLayout = QHBoxLayout()
  topLayout.addWidget(pathLabel)
  topLayout.addWidget(self.pathLabel, 1)
  topLayout.addWidget(self.pathButton)
  topLayout.addWidget(findLabel)
  topLayout.addWidget(self.findEdit, 1)
  leftLayout = QVBoxLayout()
  leftLayout.addWidget(filesLabel)
  leftLayout.addWidget(self.filesListWidget)
  rightLayout = QVBoxLayout()
  rightLayout.addWidget(commonWordsLabel)
  rightLayout.addWidget(self.commonWordsListWidget)
  middleLayout = QHBoxLayout()
  middleLayout.addLayout(leftLayout, 1)
  middleLayout.addLayout(rightLayout)
  bottomLayout = QHBoxLayout()
  bottomLayout.addWidget(filesIndexedLabel)
  bottomLayout.addWidget(self.filesIndexedLCD)
  bottomLayout.addWidget(wordsIndexedLabel)
  bottomLayout.addWidget(self.wordsIndexedLCD)
  bottomLayout.addWidget(commonWordsLCDLabel)
  bottomLayout.addWidget(self.commonWordsLCD)
  bottomLayout.addStretch()
  layout = QVBoxLayout()
  layout.addLayout(topLayout)
  layout.addLayout(middleLayout)
  layout.addLayout(bottomLayout)
  layout.addWidget(self.statusLabel)
  self.setLayout(layout)

  self.walkers = []
  self.completed = []
  self.pathButton.clicked.connect(self.setPath)
  self.findEdit.returnPressed.connect(self.find)
  self.setWindowTitle("Page Indexer")


 def stopWalkers(self):
  for walker in self.walkers:
   if isAlive(walker) and walker.isRunning():
    walker.stop()
  for walker in self.walkers:
   if isAlive(walker) and walker.isRunning():
    walker.wait()
  self.walkers = []
  self.completed = []


 def setPath(self):
  self.stopWalkers()
  self.pathButton.setEnabled(False)
  path = QFileDialog.getExistingDirectory(self,
     "Choose a Path to Index", self.path)
  if not path:
   self.statusLabel.setText("Click the 'Set Path' "
          "button to start indexing")
   self.pathButton.setEnabled(True)
   return
  self.statusLabel.setText("Scanning directories...")
  QApplication.processEvents() # Needed for Windows
  self.path = QDir.toNativeSeparators(path)
  self.findEdit.setFocus()
  self.pathLabel.setText(self.path)
  self.statusLabel.clear()
  self.filesListWidget.clear()
  self.fileCount = 0
  self.filenamesForWords = collections.defaultdict(set)
  self.commonWords = set()
  nofilesfound = True
  files = []
  index = 0
  for root, dirs, fnames in os.walk(str(self.path)):
   for name in [name for name in fnames
       if name.endswith((".htm", ".html"))]:
    files.append(os.path.join(root, name))
    if len(files) == 1000:
     self.processFiles(index, files[:])
     files = []
     index += 1
     nofilesfound = False
  if files:
   self.processFiles(index, files[:])
   nofilesfound = False
  if nofilesfound:
   self.finishedIndexing()
   self.statusLabel.setText(
     "No HTML files found in the given path")


 def processFiles(self, index, files):
  thread = walker.Walker(index, self.lock, files,
    self.filenamesForWords, self.commonWords, self)
  thread.indexed[str,int].connect(self.indexed)
  thread.finished[bool,int].connect(self.finished)
  thread.finished.connect(thread.deleteLater)
  self.walkers.append(thread)
  self.completed.append(False)
  thread.start()
  thread.wait(300) # Needed for Windows


 def find(self):
  word = str(self.findEdit.text())
  if not word:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Enter a word to find in files")
   finally:
    self.mutex.unlock()
   return
  try:
   self.mutex.lock()
   self.statusLabel.clear()
   self.filesListWidget.clear()
  finally:
   self.mutex.unlock()
  word = word.lower()
  if " " in word:
   word = word.split()[0]
  try:
   self.lock.lockForRead()
   found = word in self.commonWords
  finally:
   self.lock.unlock()
  if found:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Common words like '{0}' "
      "are not indexed".format(word))
   finally:
    self.mutex.unlock()
   return
  try:
   self.lock.lockForRead()
   files = self.filenamesForWords.get(word, set()).copy()
  finally:
   self.lock.unlock()
  if not files:
   try:
    self.mutex.lock()
    self.statusLabel.setText("No indexed file contains "
      "the word '{0}'".format(word))
   finally:
    self.mutex.unlock()
   return
  files = [QDir.toNativeSeparators(name) for name in
     sorted(files, key=str.lower)]
  try:
   self.mutex.lock()
   self.filesListWidget.addItems(files)
   self.statusLabel.setText(
     "{0} indexed files contain the word '{1}'".format(
     len(files), word))
  finally:
   self.mutex.unlock()


 def indexed(self, fname, index):
  try:
   self.mutex.lock()
   self.statusLabel.setText(fname)
   self.fileCount += 1
   count = self.fileCount
  finally:
   self.mutex.unlock()
  if count % 25 == 0:
   try:
    self.lock.lockForRead()
    indexedWordCount = len(self.filenamesForWords)
    commonWordCount = len(self.commonWords)
   finally:
    self.lock.unlock()
   try:
    self.mutex.lock()
    self.filesIndexedLCD.display(count)
    self.wordsIndexedLCD.display(indexedWordCount)
    self.commonWordsLCD.display(commonWordCount)
   finally:
    self.mutex.unlock()
  elif count % 101 == 0:
   try:
    self.lock.lockForRead()
    words = self.commonWords.copy()
   finally:
    self.lock.unlock()
   try:
    self.mutex.lock()
    self.commonWordsListWidget.clear()
    self.commonWordsListWidget.addItems(sorted(words))
   finally:
    self.mutex.unlock()


 def finished(self, completed, index):
  done = False
  if self.walkers:
   self.completed[index] = True
   if all(self.completed):
    try:
     self.mutex.lock()
     self.statusLabel.setText("Finished")
     done = True
    finally:
     self.mutex.unlock()
  else:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Finished")
    done = True
   finally:
    self.mutex.unlock()
  if done:
   self.finishedIndexing()


 def reject(self):
  if not all(self.completed):
   self.stopWalkers()
   self.finishedIndexing()
  else:
   self.accept()


 def closeEvent(self, event=None):
  self.stopWalkers()


 def finishedIndexing(self):
  self.filesIndexedLCD.display(self.fileCount)
  self.wordsIndexedLCD.display(len(self.filenamesForWords))
  self.commonWordsLCD.display(len(self.commonWords))
  self.pathButton.setEnabled(True)
  QApplication.processEvents() # Needed for Windows


app = QApplication(sys.argv)
form = Form()
form.show()
app.exec_()

运行结果:

python3+PyQt5实现支持多线程的页面索引器应用程序

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
在Django的上下文中设置变量的方法
Jul 20 Python
详解Python中类的定义与使用
Apr 11 Python
Python 12306抢火车票脚本 Python京东抢手机脚本
Feb 06 Python
Python pyinotify模块实现对文档的实时监控功能方法
Oct 13 Python
在pycharm中python切换解释器失败的解决方法
Oct 29 Python
使用PM2+nginx部署python项目的方法示例
Nov 07 Python
Python判断对象是否相等及eq函数的讲解
Feb 25 Python
python批量图片处理简单示例
Aug 06 Python
在PyTorch中Tensor的查找和筛选例子
Aug 18 Python
使用python 将图片复制到系统剪贴中
Dec 13 Python
Python如何在单元测试中给对象打补丁
Aug 03 Python
浅析python字符串前加r、f、u、l 的区别
Jan 24 Python
python3+PyQt5+Qt Designer实现扩展对话框
Apr 20 #Python
pandas获取groupby分组里最大值所在的行方法
Apr 20 #Python
pandas多级分组实现排序的方法
Apr 20 #Python
Python PyQt4实现QQ抽屉效果
Apr 20 #Python
Python在groupby分组后提取指定位置记录方法
Apr 20 #Python
PyQt实现界面翻转切换效果
Apr 20 #Python
python3+PyQt5+Qt Designer实现堆叠窗口部件
Apr 20 #Python
You might like
在Windows系统下使用PHP生成Word文档的教程
2015/07/03 PHP
js 小数取整的函数
2010/05/10 Javascript
js正则表达exec与match的区别说明
2014/01/29 Javascript
php实例分享之实现显示网站运行时间
2014/05/20 Javascript
JQuery实现展开关闭层的方法
2015/02/17 Javascript
javascript中获取class的简单实现
2016/07/12 Javascript
IntersectionObserver API 详解篇
2016/12/11 Javascript
详解node HTTP请求客户端 - Request
2017/05/05 Javascript
Vue路由跳转问题记录详解
2017/06/15 Javascript
Vue父组件调用子组件事件方法
2018/02/23 Javascript
vue-cli 3.x配置跨域代理的实现方法
2019/04/12 Javascript
详解es6新增数组方法简便了哪些操作
2019/05/09 Javascript
vue如何自动化打包测试环境和正式环境的dist/test文件
2019/06/06 Javascript
Vue+ElementUI table实现表格分页
2019/12/14 Javascript
解决elementui表格操作列自适应列宽
2020/12/28 Javascript
Python实现在matplotlib中两个坐标轴之间画一条直线光标的方法
2015/05/20 Python
Python的条件语句与运算符优先级详解
2015/10/13 Python
编写Python爬虫抓取豆瓣电影TOP100及用户头像的方法
2016/01/20 Python
Linux上安装Python的PIL和Pillow库处理图片的实例教程
2016/06/23 Python
Python删除Java源文件中全部注释的实现方法
2017/08/30 Python
Python基于pygame模块播放MP3的方法示例
2017/09/30 Python
Python 2/3下处理cjk编码的zip文件的方法
2019/04/26 Python
在python中,使用scatter绘制散点图的实例
2019/07/03 Python
Django之PopUp的具体实现方法
2019/08/31 Python
Tensorflow 自定义loss的情况下初始化部分变量方式
2020/01/06 Python
在python中修改.properties文件的操作
2020/04/08 Python
美国高端牛仔品牌:Silver Jeans
2019/12/12 全球购物
印度第一网上礼品店:IGP.com
2020/02/06 全球购物
澳大利亚有机化妆品网上商店:The Well Store
2020/02/20 全球购物
印刷工程专业应届生求职信
2013/09/29 职场文书
生物学学生自我评价
2014/01/17 职场文书
注册资产评估专业求职信
2014/07/16 职场文书
自我管理的活动方案
2014/08/25 职场文书
单位工作证明范文
2014/09/14 职场文书
商务宴请邀请函范文
2015/02/02 职场文书
党章学习心得体会2016
2016/01/14 职场文书