拆分大型fi时的空块

try: f = open(self.__filename, 'rb') except (OSError, IOError), e: raise FileSplitterException, str(e) bname = (os.path.split(self.__filename))[1] fsize = os.path.getsize(self.__filename) self.__chunksize = int(float(fsize)/float(self.__numchunks)) chunksz = self.__chunksize total_bytes = 0 for x in range(self.__numchunks): chunkfilename = bname + '-' + str(x+1) + self.__postfix if x == self.__numchunks - 1: chunksz = fsize - total_bytes try: print 'Writing file',chunkfilename data = f.read(chunksz) total_bytes += len(data) chunkf = file(chunkfilename, 'wb') chunkf.write(data) chunkf.close() except (OSError, IOError), e: print e continue except EOFError, e: print e break

2条回答

网友

1楼 · 编辑于 2024-10-01 04:50:02

你的问题不清楚，因为你没有包含一个Minimal, Complete, and Verifiable example-所以我不知道你的代码到底出了什么问题。然而，在创建/模拟了我对缺失部分的猜测之后，我想我能想出一些完全符合你要求的东西。你知道吗

import os

class FileSplitterException(Exception): pass

class FileSplitter(object):
    def __init__(self, filename, chunksize):
        if not os.path.isfile(filename):
            raise FileSplitterException(
                "File: {!r} does not exist".format(filename))
        self._filename = filename
        self._postfix = 'chunk'
        self._chunksize = chunksize

    def split(self):
        bname = os.path.splitext(self._filename)[0]
        fsize = os.path.getsize(self._filename)
        chunks, partial = divmod(fsize, self._chunksize)
        if partial:
            chunks += 1

        with open(self._filename, 'rb') as infile:
            for i in range(chunks):
                chunk_filename = os.path.join('{}-{}.{}'.format(
                                                bname, i, self._postfix))
                with open(chunk_filename, 'wb') as outfile:
                    data = infile.read(self._chunksize)
                    if data:
                        outfile.write(data)
                    else:
                        FileSplitterException('unexpected EOF encountered')

if __name__ == '__main__':
    import glob

    filename = 'big_file.txt'
    chunksize = 1 * 1024 * 1024  # 1 Mb

    print('splitting {} into {:,} sized chunks'.format(filename, chunksize))

    fs = FileSplitter(filename, chunksize)
    fs.split()

    print('chunk files written:')
    bname = os.path.splitext(filename)[0]
    for chunkname in sorted(glob.glob(bname + '-*.' + fs._postfix)):
        fsize = os.path.getsize(chunkname)
        print('  {}: size: {:,}'.format(chunkname, fsize))

网友

2楼 · 编辑于 2024-10-01 04:50:02

问题中的代码似乎专注于生成一组块，而不是50MB大小的文件。你知道吗

此代码生成50MB文件。你知道吗

import os


try:
    f = open('big.txt', 'rb')
except (OSError, IOError), e:
    raise FileSplitterException, str(e)

bname = (os.path.split('big.txt'))[1]

chunksz = 50 * 1000 * 1000 # metric MB - use 1024 * 1024 for binary MB (MiB)

counter = 0

while True:
    chunkfilename = bname + '-' + str(counter+1) + '.foo'

    try:
        print 'Writing file',chunkfilename
        data = f.read(chunksz)
        if not data:
            # We have reached the end of the file, end the script.
            break
        chunkf = file(chunkfilename, 'wb')
        chunkf.write(data)
        chunkf.close()
    except (OSError, IOError), e:
        print e
        continue
    except EOFError, e:
        print e
        break
    counter += 1

在现代python中，代码的某些方面被认为是糟糕的风格——例如不使用上下文管理器来打开文件——但我没有更改这些内容，以防OP在旧的python（如2.5）上运行。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章