Posted in Python onMay 21, 2019
本文实例为大家分享了python多进程读图提取特征存npy的具体代码,供大家参考,具体内容如下
import multiprocessing import os, time, random import numpy as np import cv2 import os import sys from time import ctime import tensorflow as tf image_dir = r"D:/sxl/处理图片/汉字分类/train10/" #图像文件夹路径 data_type = 'test' save_path = r'E:/sxl_Programs/Python/CNN/npy/' #存储路径 data_name = 'Img10' #npy文件名 char_set = np.array(os.listdir(image_dir)) #文件夹名称列表 np.save(save_path+'ImgShuZi10.npy',char_set) #文件夹名称列表 char_set_n = len(char_set) #文件夹列表长度 read_process_n = 1 #进程数 repate_n = 4 #随机移动次数 data_size = 1000000 #1个npy大小 shuffled = True #是否打乱 #可以读取带中文路径的图 def cv_imread(file_path,type=0): cv_img=cv2.imdecode(np.fromfile(file_path,dtype=np.uint8),-1) # print(file_path) # print(cv_img.shape) # print(len(cv_img.shape)) if(type==0): if(len(cv_img.shape)==3): cv_img = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY) return cv_img #多个数组按同一规则打乱数据 def ShuffledData(features,labels): ''' @description:随机打乱数据与标签,但保持数据与标签一一对应 ''' permutation = np.random.permutation(features.shape[0]) shuffled_features = features[permutation,:] #多维 shuffled_labels = labels[permutation] #1维 return shuffled_features,shuffled_labels #函数功能:简单网格 #函数要求:1.无关图像大小;2.输入图像默认为灰度图;3.参数只有输入图像 #返回数据:1x64*64维特征 def GetFeature(image): #图像大小归一化 image = cv2.resize(image,(64,64)) img_h = image.shape[0] img_w = image.shape[1] #定义特征向量 feature = np.zeros(img_h*img_w,dtype=np.int16) for h in range(img_h): for w in range(img_w): feature[h*img_h+w] = image[h,w] return feature # 写数据进程执行的代码: def read_image_to_queue(queue): print('Process to write: %s' % os.getpid()) for j,dirname in enumerate(char_set): # dirname 是文件夹名称 label = np.where(char_set==dirname)[0][0] #文件夹名称对应的下标序号 print('序号:'+str(j),'读 '+dirname+' 文件夹...时间:',ctime() ) for parent,_,filenames in os.walk(os.path.join(image_dir,dirname)): for filename in filenames: if(filename[-4:]!='.jpg'): continue image = cv_imread(os.path.join(parent,filename),0) # cv2.imshow(dirname,image) # cv2.waitKey(0) queue.put((image,label)) for i in range(read_process_n): queue.put((None,-1)) print('读图结束!') return True # 读数据进程执行的代码: def extract_feature(queue,lock,count): ''' @description:从队列中取出图片进行特征提取 @queue:先进先出队列 lock:锁,在计数时上锁,防止冲突 count:计数 ''' print('Process %s start reading...' % os.getpid()) global data_n features = [] #存放提取到的特征 labels = [] #存放标签 flag = True #标志着进程是否结束 while flag: image,label = queue.get() #从队列中获取图像和标签 if len(features) >= data_size or label == -1: #特征数组的长度大于指定长度,则开始存储 array_features = np.array(features) #转换成数组 array_labels = np.array(labels) array_features,array_labels = ShuffledData(array_features,array_labels) #打乱数据 lock.acquire() # 锁开始 # 拆分数据为训练集,测试集 split_x = int(array_features.shape[0] * 0.8) train_data, test_data = np.split(array_features, [split_x], axis=0) # 拆分特征数据集 train_labels, test_labels = np.split(array_labels, [split_x], axis=0) # 拆分标签数据集 count.value += 1 #下标计数加1 str_features_name_train = data_name+'_features_train_'+str(count.value)+'.npy' str_labels_name_train = data_name+'_labels_train_'+str(count.value)+'.npy' str_features_name_test = data_name+'_features_test_'+str(count.value)+'.npy' str_labels_name_test = data_name+'_labels_test_'+str(count.value)+'.npy' lock.release() # 锁释放 np.save(save_path+str_features_name_train,train_data) np.save(save_path+str_labels_name_train,train_labels) np.save(save_path+str_features_name_test,test_data) np.save(save_path+str_labels_name_test,test_labels) print(os.getpid(),'save:',str_features_name_train) print(os.getpid(),'save:',str_labels_name_train) print(os.getpid(),'save:',str_features_name_test) print(os.getpid(),'save:',str_labels_name_test) features.clear() labels.clear() if label == -1: break # 获取特征向量,传入灰度图 feature = GetFeature(image) features.append(feature) labels.append(label) # # 随机移动4次 # for itime in range(repate_n): # rMovedImage = randomMoveImage(image) # feature = SimpleGridFeature(rMovedImage) # 简单网格 # features.append(feature) # labels.append(label) print('Process %s is done!' % os.getpid()) if __name__=='__main__': time_start = time.time() # 开始计时 # 父进程创建Queue,并传给各个子进程: image_queue = multiprocessing.Queue(maxsize=1000) #队列 lock = multiprocessing.Lock() #锁 count = multiprocessing.Value('i',0) #计数 #将图写入队列进程 write_sub_process = multiprocessing.Process(target=read_image_to_queue, args=(image_queue,)) read_sub_processes = [] #读图子线程 for i in range(read_process_n): read_sub_processes.append( multiprocessing.Process(target=extract_feature, args=(image_queue,lock,count)) ) # 启动子进程pw,写入: write_sub_process.start() # 启动子进程pr,读取: for p in read_sub_processes: p.start() # 等待进程结束: write_sub_process.join() for p in read_sub_processes: p.join() time_end=time.time() time_h=(time_end-time_start)/3600 print('用时:%.6f 小时'% time_h) print ("读图提取特征存npy,运行结束!")
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持三水点靠木。
python多进程读图提取特征存npy
- Author -
业余狙击手19声明:登载此文出于传递更多信息之目的,并不意味着赞同其观点或证实其描述。
Reply on: @reply_date@
@reply_contents@