编程 Python

python多进程读图提取特征存npy

Posted in Python onMay 21, 2019

本文实例为大家分享了python多进程读图提取特征存npy的具体代码，供大家参考，具体内容如下

import multiprocessing
import os, time, random
import numpy as np
import cv2
import os
import sys
from time import ctime
import tensorflow as tf
 
image_dir = r"D:/sxl/处理图片/汉字分类/train10/"  #图像文件夹路径
data_type = 'test'
save_path = r'E:/sxl_Programs/Python/CNN/npy/'  #存储路径
data_name = 'Img10'        #npy文件名
 
char_set = np.array(os.listdir(image_dir))   #文件夹名称列表
np.save(save_path+'ImgShuZi10.npy',char_set)   #文件夹名称列表
char_set_n = len(char_set)       #文件夹列表长度
 
read_process_n = 1 #进程数
repate_n = 4   #随机移动次数
data_size = 1000000 #1个npy大小
 
shuffled = True  #是否打乱
 
#可以读取带中文路径的图
def cv_imread(file_path,type=0):
 cv_img=cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1)
 # print(file_path)
 # print(cv_img.shape)
 # print(len(cv_img.shape))
 if(type==0):
  if(len(cv_img.shape)==3):
   cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
 return cv_img
 
#多个数组按同一规则打乱数据
def ShuffledData(features,labels):
 '''
 @description:随机打乱数据与标签，但保持数据与标签一一对应
 '''
 permutation = np.random.permutation(features.shape[0])
 shuffled_features = features[permutation,:] #多维
 shuffled_labels = labels[permutation]  #1维
 return shuffled_features,shuffled_labels
 
#函数功能：简单网格
#函数要求：1.无关图像大小；2.输入图像默认为灰度图;3.参数只有输入图像
#返回数据：1x64*64维特征
def GetFeature(image):
 
 #图像大小归一化
 image = cv2.resize(image,(64,64))
 img_h = image.shape[0]
 img_w = image.shape[1]
 
 #定义特征向量
 feature = np.zeros(img_h*img_w,dtype=np.int16)
 
 for h in range(img_h):
  for w in range(img_w):
   feature[h*img_h+w] = image[h,w]
 
 return feature
 
# 写数据进程执行的代码:
def read_image_to_queue(queue):
 print('Process to write: %s' % os.getpid())
 for j,dirname in enumerate(char_set): # dirname 是文件夹名称
  label = np.where(char_set==dirname)[0][0]  #文件夹名称对应的下标序号
  print('序号：'+str(j),'读 '+dirname+' 文件夹...时间：',ctime() )
  for parent,_,filenames in os.walk(os.path.join(image_dir,dirname)):
   for filename in filenames:
    if(filename[-4:]!='.jpg'):
     continue
    image = cv_imread(os.path.join(parent,filename),0)
 
    # cv2.imshow(dirname,image)
    # cv2.waitKey(0)
    queue.put((image,label))
 
 for i in range(read_process_n):
  queue.put((None,-1))
 
 print('读图结束!')
 return True
  
# 读数据进程执行的代码:
def extract_feature(queue,lock,count):
 '''
 @description:从队列中取出图片进行特征提取
 @queue:先进先出队列
  lock：锁，在计数时上锁，防止冲突
  count:计数
 '''
 
 print('Process %s start reading...' % os.getpid())
 
 global data_n
 features = [] #存放提取到的特征
 labels = [] #存放标签
 flag = True #标志着进程是否结束
 while flag:
  image,label = queue.get() #从队列中获取图像和标签
 
  if len(features) >= data_size or label == -1: #特征数组的长度大于指定长度，则开始存储
 
   array_features = np.array(features) #转换成数组
   array_labels = np.array(labels)
 
   array_features,array_labels = ShuffledData(array_features,array_labels) #打乱数据
   
   lock.acquire() # 锁开始
 
   # 拆分数据为训练集，测试集
   split_x = int(array_features.shape[0] * 0.8)
   train_data, test_data = np.split(array_features, [split_x], axis=0)  # 拆分特征数据集
   train_labels, test_labels = np.split(array_labels, [split_x], axis=0) # 拆分标签数据集
 
   count.value += 1 #下标计数加1
   str_features_name_train = data_name+'_features_train_'+str(count.value)+'.npy'
   str_labels_name_train = data_name+'_labels_train_'+str(count.value)+'.npy'
   str_features_name_test = data_name+'_features_test_'+str(count.value)+'.npy'
   str_labels_name_test = data_name+'_labels_test_'+str(count.value)+'.npy'
 
   lock.release() # 锁释放
 
   np.save(save_path+str_features_name_train,train_data)
   np.save(save_path+str_labels_name_train,train_labels)
   np.save(save_path+str_features_name_test,test_data)
   np.save(save_path+str_labels_name_test,test_labels)
   print(os.getpid(),'save:',str_features_name_train)
   print(os.getpid(),'save:',str_labels_name_train)
   print(os.getpid(),'save:',str_features_name_test)
   print(os.getpid(),'save:',str_labels_name_test)
   features.clear()
   labels.clear()
 
  if label == -1:
   break
 
  # 获取特征向量，传入灰度图
  feature = GetFeature(image)
  features.append(feature)
  labels.append(label)
 
  # # 随机移动4次
  # for itime in range(repate_n):
  #  rMovedImage = randomMoveImage(image)
  #  feature = SimpleGridFeature(rMovedImage) # 简单网格
  #  features.append(feature)
  #  labels.append(label)
 
 print('Process %s is done!' % os.getpid())
 
if __name__=='__main__':
 time_start = time.time() # 开始计时
 
 # 父进程创建Queue，并传给各个子进程：
 image_queue = multiprocessing.Queue(maxsize=1000) #队列
 lock = multiprocessing.Lock()      #锁
 count = multiprocessing.Value('i',0)    #计数
 
 #将图写入队列进程
 write_sub_process = multiprocessing.Process(target=read_image_to_queue, args=(image_queue,))
 
 read_sub_processes = []       #读图子线程
 for i in range(read_process_n):
  read_sub_processes.append(
   multiprocessing.Process(target=extract_feature, args=(image_queue,lock,count))
  )
 
 # 启动子进程pw，写入:
 write_sub_process.start()
 
 # 启动子进程pr，读取:
 for p in read_sub_processes:
  p.start()
 
 # 等待进程结束:
 write_sub_process.join()
 for p in read_sub_processes:
  p.join()
 
 time_end=time.time()
 time_h=(time_end-time_start)/3600
 print('用时：%.6f 小时'% time_h)
 print ("读图提取特征存npy,运行结束！")

以上就是本文的全部内容，希望对大家的学习有所帮助，也希望大家多多支持三水点靠木。

python多进程读图提取特征存npy

- Author -

业余狙击手19

声明：登载此文出于传递更多信息之目的，并不意味着赞同其观点或证实其描述。

Python 相关文章推荐

python通过exifread模块获得图片exif信息的方法

Mar 16 Python

用Python制作检测Linux运行信息的工具的教程

Apr 01 Python

python实现用户登陆邮件通知的方法

Jul 09 Python

浅谈Python数据类型判断及列表脚本操作

Nov 04 Python

python3操作mysql数据库的方法

Jun 23 Python

详解Python 实现元胞自动机中的生命游戏(Game of life)

Jan 27 Python

Python基于多线程操作数据库相关问题分析

Jul 11 Python

python os.path模块常用方法实例详解

Sep 16 Python

python发送告警邮件脚本

Sep 17 Python

python栈的基本定义与使用方法示例【初始化、赋值、入栈、出栈等】

Oct 24 Python

Python OpenCV视频截取并保存实现代码

Nov 30 Python

Python统计文本词汇出现次数的实例代码

Feb 27 Python

Python中使用pypdf2合并、分割、加密pdf文件的代码详解

May 21 #Python

python+selenium实现简历自动刷新的示例代码

May 20 #Python

图文详解python安装Scrapy框架步骤

May 20 #Python

Python配置虚拟环境图文步骤

May 20 #Python

Python检测数据类型的方法总结

May 20 #Python

Python中的引用知识点总结

May 20 #Python

Python函数和模块的使用总结

May 20 #Python

You might like

站长助手-网站web在线管理程序 v1.0 下载

2007/05/12 PHP

FirePHP 推荐一款PHP调试工具

2011/04/23 PHP

php使用MySQL保存session会话的方法

2015/06/18 PHP

jQuery 扩展对input的一些操作方法

2009/10/30 Javascript

基于jQuery实现的水平和垂直居中的div窗口

2011/08/08 Javascript

由Javascript实现的页面日历

2011/11/04 Javascript

js常用代码段整理

2011/11/30 Javascript

JS上传前预览图片实例

2013/03/25 Javascript

深入了解Node.js中的一些特性

2014/09/25 Javascript

javascript input输入框模糊提示功能的实现

2017/09/25 Javascript

基于JavaScript实现前端数据多条件筛选功能

2020/08/19 Javascript

web前端vue之vuex单独一文件使用方式实例详解

2018/01/11 Javascript

在Vue项目中引入JQuery-ui插件的讲解

2019/01/27 jQuery

jquery实现广告上下滚动效果

2021/03/04 jQuery

python使用sorted函数对列表进行排序的方法

2015/04/04 Python

简单理解Python中的装饰器

2015/07/31 Python

Python 中 list 的各项操作技巧

2017/04/13 Python

Python设计模式之中介模式简单示例

2018/01/09 Python

Python Numpy计算各类距离的方法

2019/07/05 Python

解决Python命令行下退格,删除,方向键乱码(亲测有效)

2020/01/16 Python

Python识别html主要文本框过程解析

2020/02/18 Python

Python虚拟环境venv用法详解

2020/05/25 Python

HTML5本地存储localStorage、sessionStorage基本用法、遍历操作、异常处理等

2014/05/08 HTML / CSS

La Redoute英国官网：法国时尚品牌

2017/04/27 全球购物

香港卓悦化妆品官网：BONJOUR

2017/09/21 全球购物

英国健身仓库：Bodybuilding Warehouse

2019/03/06 全球购物

美国名牌香水折扣网站：Hottperfume

2021/02/10 全球购物

什么是典型的软件三层结构？软件设计为什么要分层？软件分层有什么好处？

2012/03/14 面试题

介绍一下你对SOA的认识

2016/04/24 面试题

工商治理实习生的自我评价

2014/01/15 职场文书

《宿建德江》教学反思

2014/04/23 职场文书

村抢险救灾方案

2014/05/09 职场文书

法人委托书的范本格式

2014/09/11 职场文书

学校开学标语

2014/10/06 职场文书

家庭贫困证明

2015/06/16 职场文书

2015年“我们的节日·中秋节”活动总结

2015/07/30 职场文书