python3+PyQt5实现支持多线程的页面索引器应用程序


Posted in Python onApril 20, 2018

本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子。

/home/yrd/eric_workspace/chap19/walker_ans.py

#!/usr/bin/env python3

import codecs
import html.entities
import re
import sys
from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt)

class Walker(QThread):
 finished = pyqtSignal(bool,int)
 indexed = pyqtSignal(str,int)
 COMMON_WORDS_THRESHOLD = 250
 MIN_WORD_LEN = 3
 MAX_WORD_LEN = 25
 INVALID_FIRST_OR_LAST = frozenset("0123456789_")
 STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE)
 ENTITY_RE = re.compile(r"&(\w+?);|&#(\d+?);")
 SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE)

 def __init__(self, index, lock, files, filenamesForWords,
     commonWords, parent=None):
  super(Walker, self).__init__(parent)
  self.index = index
  self.lock = lock
  self.files = files
  self.filenamesForWords = filenamesForWords
  self.commonWords = commonWords
  self.stopped = False
  self.mutex = QMutex()
  self.completed = False


 def stop(self):
  try:
   self.mutex.lock()
   self.stopped = True
  finally:
   self.mutex.unlock()


 def isStopped(self):
  try:
   self.mutex.lock()
   return self.stopped
  finally:
   self.mutex.unlock()


 def run(self):
  self.processFiles()
  self.stop()
  self.finished.emit(self.completed,self.index)


 def processFiles(self):
  def unichrFromEntity(match):
   text = match.group(match.lastindex)
   if text.isdigit():
    return chr(int(text))
   u = html.entities.name2codepoint.get(text)
   return chr(u) if u is not None else ""

  for fname in self.files:
   if self.isStopped():
    return
   words = set()
   fh = None
   try:
    fh = codecs.open(fname, "r", "UTF8", "ignore")
    text = fh.read()
   except EnvironmentError as e:
    sys.stderr.write("Error: {0}\n".format(e))
    continue
   finally:
    if fh is not None:
     fh.close()
   if self.isStopped():
    return
   text = self.STRIPHTML_RE.sub("", text)
   text = self.ENTITY_RE.sub(unichrFromEntity, text)
   text = text.lower()
   for word in self.SPLIT_RE.split(text):
    if (self.MIN_WORD_LEN <= len(word) <=
     self.MAX_WORD_LEN and
     word[0] not in self.INVALID_FIRST_OR_LAST and
     word[-1] not in self.INVALID_FIRST_OR_LAST):
     try:
      self.lock.lockForRead()
      new = word not in self.commonWords
     finally:
      self.lock.unlock()
     if new:
      words.add(word)
   if self.isStopped():
    return
   for word in words:
    try:
     self.lock.lockForWrite()
     files = self.filenamesForWords[word]
     if len(files) > self.COMMON_WORDS_THRESHOLD:
      del self.filenamesForWords[word]
      self.commonWords.add(word)
     else:
      files.add(str(fname))
    finally:
     self.lock.unlock()
   self.indexed.emit(fname,self.index)
  self.completed = True


/home/yrd/eric_workspace/chap19/pageindexer_ans.pyw

#!/usr/bin/env python3

import collections
import os
import sys
from PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt)
from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame,
        QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget,
        QPushButton, QVBoxLayout)
import walker_ans as walker


def isAlive(qobj):
 import sip
 try:
  sip.unwrapinstance(qobj)
 except RuntimeError:
  return False
 return True


class Form(QDialog):

 def __init__(self, parent=None):
  super(Form, self).__init__(parent)

  self.mutex = QMutex()
  self.fileCount = 0
  self.filenamesForWords = collections.defaultdict(set)
  self.commonWords = set()
  self.lock = QReadWriteLock()
  self.path = QDir.homePath()
  pathLabel = QLabel("Indexing path:")
  self.pathLabel = QLabel()
  self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)
  self.pathButton = QPushButton("Set &Path...")
  self.pathButton.setAutoDefault(False)
  findLabel = QLabel("&Find word:")
  self.findEdit = QLineEdit()
  findLabel.setBuddy(self.findEdit)
  commonWordsLabel = QLabel("&Common words:")
  self.commonWordsListWidget = QListWidget()
  commonWordsLabel.setBuddy(self.commonWordsListWidget)
  filesLabel = QLabel("Files containing the &word:")
  self.filesListWidget = QListWidget()
  filesLabel.setBuddy(self.filesListWidget)
  filesIndexedLabel = QLabel("Files indexed")
  self.filesIndexedLCD = QLCDNumber()
  self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat)
  wordsIndexedLabel = QLabel("Words indexed")
  self.wordsIndexedLCD = QLCDNumber()
  self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat)
  commonWordsLCDLabel = QLabel("Common words")
  self.commonWordsLCD = QLCDNumber()
  self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat)
  self.statusLabel = QLabel("Click the 'Set Path' "
         "button to start indexing")
  self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)

  topLayout = QHBoxLayout()
  topLayout.addWidget(pathLabel)
  topLayout.addWidget(self.pathLabel, 1)
  topLayout.addWidget(self.pathButton)
  topLayout.addWidget(findLabel)
  topLayout.addWidget(self.findEdit, 1)
  leftLayout = QVBoxLayout()
  leftLayout.addWidget(filesLabel)
  leftLayout.addWidget(self.filesListWidget)
  rightLayout = QVBoxLayout()
  rightLayout.addWidget(commonWordsLabel)
  rightLayout.addWidget(self.commonWordsListWidget)
  middleLayout = QHBoxLayout()
  middleLayout.addLayout(leftLayout, 1)
  middleLayout.addLayout(rightLayout)
  bottomLayout = QHBoxLayout()
  bottomLayout.addWidget(filesIndexedLabel)
  bottomLayout.addWidget(self.filesIndexedLCD)
  bottomLayout.addWidget(wordsIndexedLabel)
  bottomLayout.addWidget(self.wordsIndexedLCD)
  bottomLayout.addWidget(commonWordsLCDLabel)
  bottomLayout.addWidget(self.commonWordsLCD)
  bottomLayout.addStretch()
  layout = QVBoxLayout()
  layout.addLayout(topLayout)
  layout.addLayout(middleLayout)
  layout.addLayout(bottomLayout)
  layout.addWidget(self.statusLabel)
  self.setLayout(layout)

  self.walkers = []
  self.completed = []
  self.pathButton.clicked.connect(self.setPath)
  self.findEdit.returnPressed.connect(self.find)
  self.setWindowTitle("Page Indexer")


 def stopWalkers(self):
  for walker in self.walkers:
   if isAlive(walker) and walker.isRunning():
    walker.stop()
  for walker in self.walkers:
   if isAlive(walker) and walker.isRunning():
    walker.wait()
  self.walkers = []
  self.completed = []


 def setPath(self):
  self.stopWalkers()
  self.pathButton.setEnabled(False)
  path = QFileDialog.getExistingDirectory(self,
     "Choose a Path to Index", self.path)
  if not path:
   self.statusLabel.setText("Click the 'Set Path' "
          "button to start indexing")
   self.pathButton.setEnabled(True)
   return
  self.statusLabel.setText("Scanning directories...")
  QApplication.processEvents() # Needed for Windows
  self.path = QDir.toNativeSeparators(path)
  self.findEdit.setFocus()
  self.pathLabel.setText(self.path)
  self.statusLabel.clear()
  self.filesListWidget.clear()
  self.fileCount = 0
  self.filenamesForWords = collections.defaultdict(set)
  self.commonWords = set()
  nofilesfound = True
  files = []
  index = 0
  for root, dirs, fnames in os.walk(str(self.path)):
   for name in [name for name in fnames
       if name.endswith((".htm", ".html"))]:
    files.append(os.path.join(root, name))
    if len(files) == 1000:
     self.processFiles(index, files[:])
     files = []
     index += 1
     nofilesfound = False
  if files:
   self.processFiles(index, files[:])
   nofilesfound = False
  if nofilesfound:
   self.finishedIndexing()
   self.statusLabel.setText(
     "No HTML files found in the given path")


 def processFiles(self, index, files):
  thread = walker.Walker(index, self.lock, files,
    self.filenamesForWords, self.commonWords, self)
  thread.indexed[str,int].connect(self.indexed)
  thread.finished[bool,int].connect(self.finished)
  thread.finished.connect(thread.deleteLater)
  self.walkers.append(thread)
  self.completed.append(False)
  thread.start()
  thread.wait(300) # Needed for Windows


 def find(self):
  word = str(self.findEdit.text())
  if not word:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Enter a word to find in files")
   finally:
    self.mutex.unlock()
   return
  try:
   self.mutex.lock()
   self.statusLabel.clear()
   self.filesListWidget.clear()
  finally:
   self.mutex.unlock()
  word = word.lower()
  if " " in word:
   word = word.split()[0]
  try:
   self.lock.lockForRead()
   found = word in self.commonWords
  finally:
   self.lock.unlock()
  if found:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Common words like '{0}' "
      "are not indexed".format(word))
   finally:
    self.mutex.unlock()
   return
  try:
   self.lock.lockForRead()
   files = self.filenamesForWords.get(word, set()).copy()
  finally:
   self.lock.unlock()
  if not files:
   try:
    self.mutex.lock()
    self.statusLabel.setText("No indexed file contains "
      "the word '{0}'".format(word))
   finally:
    self.mutex.unlock()
   return
  files = [QDir.toNativeSeparators(name) for name in
     sorted(files, key=str.lower)]
  try:
   self.mutex.lock()
   self.filesListWidget.addItems(files)
   self.statusLabel.setText(
     "{0} indexed files contain the word '{1}'".format(
     len(files), word))
  finally:
   self.mutex.unlock()


 def indexed(self, fname, index):
  try:
   self.mutex.lock()
   self.statusLabel.setText(fname)
   self.fileCount += 1
   count = self.fileCount
  finally:
   self.mutex.unlock()
  if count % 25 == 0:
   try:
    self.lock.lockForRead()
    indexedWordCount = len(self.filenamesForWords)
    commonWordCount = len(self.commonWords)
   finally:
    self.lock.unlock()
   try:
    self.mutex.lock()
    self.filesIndexedLCD.display(count)
    self.wordsIndexedLCD.display(indexedWordCount)
    self.commonWordsLCD.display(commonWordCount)
   finally:
    self.mutex.unlock()
  elif count % 101 == 0:
   try:
    self.lock.lockForRead()
    words = self.commonWords.copy()
   finally:
    self.lock.unlock()
   try:
    self.mutex.lock()
    self.commonWordsListWidget.clear()
    self.commonWordsListWidget.addItems(sorted(words))
   finally:
    self.mutex.unlock()


 def finished(self, completed, index):
  done = False
  if self.walkers:
   self.completed[index] = True
   if all(self.completed):
    try:
     self.mutex.lock()
     self.statusLabel.setText("Finished")
     done = True
    finally:
     self.mutex.unlock()
  else:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Finished")
    done = True
   finally:
    self.mutex.unlock()
  if done:
   self.finishedIndexing()


 def reject(self):
  if not all(self.completed):
   self.stopWalkers()
   self.finishedIndexing()
  else:
   self.accept()


 def closeEvent(self, event=None):
  self.stopWalkers()


 def finishedIndexing(self):
  self.filesIndexedLCD.display(self.fileCount)
  self.wordsIndexedLCD.display(len(self.filenamesForWords))
  self.commonWordsLCD.display(len(self.commonWords))
  self.pathButton.setEnabled(True)
  QApplication.processEvents() # Needed for Windows


app = QApplication(sys.argv)
form = Form()
form.show()
app.exec_()

运行结果:

python3+PyQt5实现支持多线程的页面索引器应用程序

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。

Python 相关文章推荐
python调用cmd命令行制作刷博器
Jan 13 Python
Python新手们容易犯的几个错误总结
Apr 01 Python
python实现类之间的方法互相调用
Apr 29 Python
python接口自动化(十六)--参数关联接口后传(详解)
Apr 16 Python
基于Python的PIL库学习详解
May 10 Python
python实践项目之监控当前联网状态详情
May 23 Python
django多对多表的创建,级联删除及手动创建第三张表
Jul 25 Python
python 实现多线程下载m3u8格式视频并使用fmmpeg合并
Nov 15 Python
django rest framework serializers序列化实例
May 13 Python
全面介绍python中很常用的单元测试框架unitest
Dec 14 Python
python解包概念及实例
Feb 17 Python
用Python的绘图库(matplotlib)绘制小波能量谱
Apr 17 Python
python3+PyQt5+Qt Designer实现扩展对话框
Apr 20 #Python
pandas获取groupby分组里最大值所在的行方法
Apr 20 #Python
pandas多级分组实现排序的方法
Apr 20 #Python
Python PyQt4实现QQ抽屉效果
Apr 20 #Python
Python在groupby分组后提取指定位置记录方法
Apr 20 #Python
PyQt实现界面翻转切换效果
Apr 20 #Python
python3+PyQt5+Qt Designer实现堆叠窗口部件
Apr 20 #Python
You might like
php中的四舍五入函数代码(floor函数、ceil函数、round与intval)
2014/07/14 PHP
PHP开发APP端微信支付功能
2017/02/17 PHP
&amp;lt;script defer&amp;gt; defer 是什么意思
2009/05/10 Javascript
Javascript 汉字字节判断
2009/08/01 Javascript
jquery ajax 局部无刷新更新数据的实现案例
2014/02/08 Javascript
jquery动态调整div大小使其宽度始终为浏览器宽度
2014/06/06 Javascript
全面解析Bootstrap中tooltip、popover的使用方法
2016/06/13 Javascript
Javascript 实现计算器时间功能详解及实例(二)
2017/01/08 Javascript
javascript使用btoa和atob来进行Base64转码和解码
2017/03/20 Javascript
webpack学习笔记之代码分割和按需加载的实例详解
2017/07/20 Javascript
微信小程序实现倒计时60s获取验证码
2020/04/17 Javascript
详解Vue中一种简易路由传参办法
2017/09/15 Javascript
原生JS实现多个小球碰撞反弹效果示例
2018/01/31 Javascript
详解angularjs跨页面传参遇到的一些问题
2018/11/01 Javascript
Vue移动端实现图片上传及超过1M压缩上传
2019/12/23 Javascript
Vue简单实现原理详解
2020/05/07 Javascript
JS数组的常用10种方法详解
2020/05/08 Javascript
基于js实现数组相邻元素上移下移
2020/05/19 Javascript
python实现百度关键词排名查询
2014/03/30 Python
Windows下安装python2.7及科学计算套装
2015/03/05 Python
python类和函数中使用静态变量的方法
2015/05/09 Python
JSON Web Tokens的实现原理
2017/04/02 Python
Python实现判断字符串中包含某个字符的判断函数示例
2018/01/08 Python
详解django中使用定时任务的方法
2018/09/27 Python
win7下 python3.6 安装opencv 和 opencv-contrib-python解决 cv2.xfeatures2d.SIFT_create() 的问题
2019/10/24 Python
python列表推导式入门学习解析
2019/12/02 Python
python 装饰器的实际作用有哪些
2020/09/07 Python
设计师家具购买和委托在线市场:Viyet
2016/11/16 全球购物
医学类导师推荐信范文
2013/11/19 职场文书
工程专业毕业生自荐信范文
2013/12/25 职场文书
低碳生活的宣传标语
2014/06/23 职场文书
个人对照检查剖析材料
2014/10/13 职场文书
欢迎词范文
2015/01/27 职场文书
2015年实习班主任工作总结
2015/04/23 职场文书
学生会主席任命书
2015/09/21 职场文书
导游词之云南丽江古城
2019/09/17 职场文书