python分割文件的常用方法


Posted in Python onNovember 01, 2014

本文大家整理了一些比较好用的关于python分割文件的方法,方法非常的简单实用。分享给大家供大家参考。具体如下:

例子1 指定分割文件大小

配置文件 config.ini:

[global]

#原文件存放目录

dir1=F:\work\python\3595\pyserver\test

#新文件存放目录

dir2=F:\work\python\3595\pyserver\test1

python 代码如下:

#!/usr/bin/python

# -*- coding: utf-8 -*-

import os,sys,ConfigParser

class file_openate(object):

def __init__(self):

    #初如化读取数据库配置

    dir_config = ConfigParser.ConfigParser()

    file_config=open('config.ini',"rb")

    dir_config.readfp(file_config)

    self.dir1=str(dir_config.get("global","dir1"))

    self.dir1=unicode(self.dir1,'utf8')

    self.dir2=str(dir_config.get("global","dir2"))

    self.dir2=unicode(self.dir2,'utf8')

    file_config.close()

#print self.dir2

#self.dir1="F:\\work\\python\\3595\\pyserver\\test"

def file_list(self):

    input_name_han="软件有不确认性,前期使用最好先备份,以免发生数据丢失,确认备份后,请输入要分割的字节大小,按b来计算".decode('utf-8')

    print input_name_han

    while 1:

input_name=raw_input("number:")

if input_name.isdigit():

    input_name=int(input_name)

    os.chdir(self.dir1)

    for filename in os.listdir(self.dir1):

os.chdir(self.dir1)

#print filename

name, ext = os.path.splitext(filename)

file_size=int(os.path.getsize(filename))

f=open(filename,'r')

chu_nmuber=0

while file_size >= 1:

    #print file_size

    chu_nmuber=chu_nmuber + 1

    if file_size >= input_name:

file_size=file_size - input_name

a=f.read(input_name)

os.chdir(self.dir2)

filename1=name + '-' + str(chu_nmuber) + ext

new_f=open(filename1,'a')

new_f.write(a)

new_f.close()

#print file_size

    else:

a=f.read()

os.chdir(self.dir2)

filename1=name + '-' + str(chu_nmuber) + ext

new_f=open(filename1,'a')

new_f.write(a)

new_f.close()

break

print "分割成功".decode('utf-8') + filename

f.close()

else:

    print "请输入正确的数字,请重新输入".decode('utf-8')

file_name=file_openate()

file_name.file_list()

例子2,按行分割文件大小

#!/usr/bin/env python

#--*-- coding:utf-8 --*--

import os

class SplitFiles():

    """按行分割文件"""

    def __init__(self, file_name, line_count=200):

        """初始化要分割的源文件名和分割后的文件行数"""

        self.file_name = file_name

        self.line_count = line_count

    def split_file(self):

        if self.file_name and os.path.exists(self.file_name):

            try:

                with open(self.file_name) as f : # 使用with读文件

                    temp_count = 0

                    temp_content = []

                    part_num = 1

                    for line in f:

                        if temp_count < self.line_count:

                            temp_count += 1

                        else :

                            self.write_file(part_num, temp_content)

                            part_num += 1

                            temp_count = 1

                            temp_content = []

                        temp_content.append(line)

                    else : # 正常结束循环后将剩余的内容写入新文件中

                        self.write_file(part_num, temp_content)

            except IOError as err:

                print(err)

        else:

            print("%s is not a validate file" % self.file_name)

    def get_part_file_name(self, part_num):

        """"获取分割后的文件名称:在源文件相同目录下建立临时文件夹temp_part_file,然后将分割后的文件放到该路径下"""

        temp_path = os.path.dirname(self.file_name) # 获取文件的路径(不含文件名)

        part_file_name = temp_path + "temp_part_file"

        if not os.path.exists(temp_path) : # 如果临时目录不存在则创建

            os.makedirs(temp_path)

        part_file_name += os.sep + "temp_file_" + str(part_num) + ".part"

        return part_file_name

    def write_file(self, part_num, *line_content):

        """将按行分割后的内容写入相应的分割文件中"""

        part_file_name = self.get_part_file_name(part_num)

        print(line_content)

        try :

            with open(part_file_name, "w") as part_file:

                part_file.writelines(line_content[0])

        except IOError as err:

            print(err)

if __name__ == "__main__":

    sf = SplitFiles(r"F:\multiple_thread_read_file.txt")

    sf.split_file()

上面只是进行了分割了,如果我们又要合并怎么办呢?下面这个例子可以实现分割与合并哦,大家一起看看。

例子3, 分割文件与合并函数

#!/usr/bin/python

##########################################################################

# split a file into a set of parts; join.py puts them back together;

# this is a customizable version of the standard unix split command-line 

# utility; because it is written in Python, it also works on Windows and

# can be easily modified; because it exports a function, its logic can 

# also be imported and reused in other applications;

##########################################################################

      

import sys, os

kilobytes = 1024

megabytes = kilobytes * 1000

chunksize = int(1.4 * megabytes)   # default: roughly a floppy

      

def split(fromfile, todir, chunksize=chunksize): 

    if not os.path.exists(todir):  # caller handles errors

os.mkdir(todir)    # make dir, read/write parts

    else:

for fname in os.listdir(todir):    # delete any existing files

    os.remove(os.path.join(todir, fname)) 

    partnum = 0

    input = open(fromfile, 'rb')   # use binary mode on Windows

    while 1:       # eof=empty string from read

chunk = input.read(chunksize)      # get next part <= chunksize

if not chunk: break

partnum  = partnum+1

filename = os.path.join(todir, ('part%04d' % partnum))

fileobj  = open(filename, 'wb')

fileobj.write(chunk)

fileobj.close()    # or simply open().write()

    input.close()

    assert partnum <= 9999 # join sort fails if 5 digits

    return partnum

     

if __name__ == '__main__':

    if len(sys.argv) == 2 and sys.argv[1] == '-help':

print 'Use: split.py [file-to-split target-dir [chunksize]]'

    else:

if len(sys.argv) < 3:

    interactive = 1

    fromfile = raw_input('File to be split? ')       # input if clicked 

    todir    = raw_input('Directory to store part files? ')

else:

    interactive = 0

    fromfile, todir = sys.argv[1:3]  # args in cmdline

    if len(sys.argv) == 4: chunksize = int(sys.argv[3])

absfrom, absto = map(os.path.abspath, [fromfile, todir])

print 'Splitting', absfrom, 'to', absto, 'by', chunksize

      

try:

    parts = split(fromfile, todir, chunksize)

except:

    print 'Error during split:'

    print sys.exc_info()[0], sys.exc_info()[1]

else:

    print 'Split finished:', parts, 'parts are in', absto

if interactive: raw_input('Press Enter key') # pause if clicked

join_file.py
 

#!/usr/bin/python

##########################################################################

# join all part files in a dir created by split.py, to recreate file.  

# This is roughly like a 'cat fromdir/* > tofile' command on unix, but is 

# more portable and configurable, and exports the join operation as a 

# reusable function.  Relies on sort order of file names: must be same 

# length.  Could extend split/join to popup Tkinter file selectors.

##########################################################################

      

import os, sys

readsize = 1024

      

def join(fromdir, tofile):

    output = open(tofile, 'wb')

    parts  = os.listdir(fromdir)

    parts.sort()

    for filename in parts:

filepath = os.path.join(fromdir, filename)

fileobj  = open(filepath, 'rb')

while 1:

    filebytes = fileobj.read(readsize)

    if not filebytes: break

    output.write(filebytes)

fileobj.close()

    output.close()

      

if __name__ == '__main__':

    if len(sys.argv) == 2 and sys.argv[1] == '-help':

print 'Use: join.py [from-dir-name to-file-name]'

    else:

if len(sys.argv) != 3:

    interactive = 1

    fromdir = raw_input('Directory containing part files? ')

    tofile  = raw_input('Name of file to be recreated? ')

else:

    interactive = 0

    fromdir, tofile = sys.argv[1:]

absfrom, absto = map(os.path.abspath, [fromdir, tofile])

print 'Joining', absfrom, 'to make', absto

      

try:

    join(fromdir, tofile)

except:

    print 'Error joining files:'

    print sys.exc_info()[0], sys.exc_info()[1]

else:

   print 'Join complete: see', absto

if interactive: raw_input('Press Enter key') # pause if clicked

希望本文所述对大家的Python程序设计有所帮助。

Python 相关文章推荐
python基础教程之基本数据类型和变量声明介绍
Aug 29 Python
linux下python抓屏实现方法
May 22 Python
python MySQLdb使用教程详解
Mar 20 Python
matlab中实现矩阵删除一行或一列的方法
Apr 04 Python
windows下pycharm安装、创建文件、配置默认模板
Jul 31 Python
对Python 3.5拼接列表的新语法详解
Nov 08 Python
33个Python爬虫项目实战(推荐)
Jul 08 Python
python PIL/cv2/base64相互转换实例
Jan 09 Python
python GUI库图形界面开发之PyQt5窗口控件QWidget详细使用方法
Feb 26 Python
python属于跨平台语言码
Jun 09 Python
python 实现的截屏工具
May 08 Python
PyTorch 实现L2正则化以及Dropout的操作
May 27 Python
跟老齐学Python之通过Python连接数据库
Oct 28 #Python
Python对象体系深入分析
Oct 28 #Python
Python中类的继承代码实例
Oct 28 #Python
Python列表list数组array用法实例解析
Oct 28 #Python
python实现无证书加密解密实例
Oct 27 #Python
深入理解Python 代码优化详解
Oct 27 #Python
简单的Python抓taobao图片爬虫
Oct 26 #Python
You might like
PHP与SQL注入攻击[一]
2007/04/17 PHP
smarty模板中拼接字符串的方法
2014/02/14 PHP
PHP基于SPL实现的迭代器模式示例
2018/04/22 PHP
Laravel框架实现定时发布任务的方法
2018/08/16 PHP
50个优秀经典PHP算法大集合 附源码
2020/08/26 PHP
javascript iframe中打开文件,并检测iframe存在否
2008/12/28 Javascript
javascript 跳转代码集合
2009/12/03 Javascript
潜说js对象和数组
2011/05/25 Javascript
JavaScript String.replace函数参数实例说明
2013/06/06 Javascript
JS实现可改变列宽的table实例
2013/07/02 Javascript
jQuery制作简洁的多级联动Select下拉框
2014/12/23 Javascript
javascript实现ecshop搜索框键盘上下键切换控制
2015/03/18 Javascript
轻量级网页遮罩层jQuery插件用法实例
2015/07/31 Javascript
JS实现仿腾讯微博无刷新删除微博效果代码
2015/10/16 Javascript
Google 地图API资料整理及详细介绍
2016/08/06 Javascript
折叠菜单及选择器的运用
2017/02/03 Javascript
Webpack中雪碧图插件使用详解
2018/05/25 Javascript
实例详解ztree在vue项目中使用并且带有搜索功能
2018/08/24 Javascript
浅析Vue中拆分视图层代码的5点建议
2019/08/15 Javascript
js 根据对象数组中的属性进行排序实现代码
2019/09/12 Javascript
es6数组的flat(),flatMap()函数用法实例分析
2020/04/18 Javascript
python实现爬取千万淘宝商品的方法
2015/06/30 Python
简单讲解Python编程中namedtuple类的用法
2016/06/21 Python
Python中操作mysql的pymysql模块详解
2016/09/13 Python
Python自动化运维之IP地址处理模块详解
2017/12/10 Python
解决python 无法加载downsample模型的问题
2018/10/25 Python
Django中的AutoField字段使用
2020/05/18 Python
意大利大型购物中心:Oliviero.it
2017/10/19 全球购物
GOLFINO英国官网:高尔夫服装
2020/04/11 全球购物
Linux内核的同步机制是什么?主要有哪几种内核锁
2016/07/11 面试题
怎样有效的进行自我评价
2013/10/06 职场文书
公证委托书模板
2014/04/03 职场文书
关于分班的感言
2015/08/04 职场文书
Pytorch distributed 多卡并行载入模型操作
2021/06/05 Python
详解Oracle数据库中自带的所有表结构(sql代码)
2021/11/20 Oracle
Java中的随机数Random
2022/03/17 Java/Android