创作和写作时的记忆错误

import numpy as np import subprocess as subp from array import array keepData = 1 if(not keepData): subp.call(['rm', 'Bertha.DAT']) #removes previous file if present girth = int(8e6) #number of final rows girthier = girth*4 bigger_tim = np.zeros(girthier) #initial array File = 'Bertha.DAT' bid = open(File, 'wb') for ii in range(0,girth): tiny_tim = 100*(2*np.random.rand(1,3)-1) bigger_tim[ii*4]=4 bigger_tim[ii*4+1]=tiny_tim[0,0] bigger_tim[ii*4+2]=tiny_tim[0,1] bigger_tim[ii*4+3]=tiny_tim[0,2] #for loop that inputs values in the style of the input result line.tofile(bid) #writes into file bid.close()

Traceback (most recent call last): File "<ipython-input-1-cb13d37b70b9>", line 1, in <module> runfile('D:/WinPython-32bit-2.7.6.3/Big_Bertha.py', wdir='D:/WinPython-32bit-2.7.6.3') File "D:\WinPython-32bit-2.7.6.3\python-2.7.6\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 540, in runfile execfile(filename, namespace) File "D:/WinPython-32bit-2.7.6.3/Big_Bertha.py", line 19, in <module> bigger_tim = np.zeros(girthier) #initial array MemoryError

Traceback (most recent call last): File "<ipython-input-1-268052dcc4e8>", line 1, in <module> runfile('D:/WinPython-32bit-2.7.6.3/binaryReader.py', wdir='D:/WinPython-32bit-2.7.6.3') File "D:\WinPython-32bit-2.7.6.3\python-2.7.6\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 540, in runfile execfile(filename, namespace) File "D:/WinPython-32bit-2.7.6.3/binaryReader.py", line 14, in <module> Bertha = np.fromfile('Bertha.DAT',dtype = float,count = -1, sep = "") MemoryError

2条回答

网友

1楼 · 编辑于 2024-09-27 23:27:51

您收到的错误来自python无法分配更多内存的事实。在

在第二个示例中，您正在分配一个包含3200万行和4列的numpy表。使用通常的双精度浮点数，仅此一项就是1吉布。行np.fromfile =需要加载一个非常大的文件，因为文件长度应该与Matrix相匹配，也就是说，文件中至少需要1gib的数据。在

它是：1吉比特+1吉比特=2吉比特，这是32位python的最大值。（这就是2400万行没问题的原因）这就是从文件加载数据时抛出错误的原因。此外，用户数据的限制不是2 GiB，而是总共2 GiB，这在实际中可能会少得多。在

您可以做以下几件事：

不要创建空表。从文件中加载数据并将其重塑为所需的形状（四列，尽可能多的行）：
m = np.fromfile("Bertha.DAT").reshape(-1,4)
使用其他数据类型而不是float（8位双精度浮点型）。如果没有遇到精度问题，请使用“float32”（或“f4”）。但是，您不能更改np.fromfile中的数据类型，因为它也决定了文件中的数据类型和顺序。
使用64位python。如果你处理的是大数据，那就是最好的方法。在某些情况下，它会消耗更多的内存（在某些情况下会消耗大量内存，但如果您的计算机有大量RAM，即使是非常大的表也能很好地工作。

如果您对查看对象占用内存的量感兴趣，sys模块有一个很好的函数sys.getsizeof，例如sys.getsizeof(Bertha)。在

在你的代码中有一些风格方面的问题你可能需要修正。一个是命名变量，它们应该是小写的（类名大写）。对于这类信息，通读PEP 8建议非常有用。（名称Matrix无论如何都有点不幸，因为有一种东西叫做numpy.matrix。）

另一件吸引我注意的事情是，您正在使用for循环迭代numpy数组。这通常是一个警告信号，表明某件事做得很慢。很少有需要这样做的情况，但通常有非常简洁和快速的方法来操作数组。在

网友

2楼 · 编辑于 2024-09-27 23:27:51

最大的问题是，当你最需要在内存中保存四行数据时，你试图把所有的东西都保存在内存中。这个快速而肮脏的代码只使用刚刚加载的Python2.7解释器来完成任务。在

#!python2

import sqlite3

def make_narrow_file(rows, path):
    """make a text data file in path with rows of elements"""
    with open(path, 'w') as outf:
        for i in xrange(rows):
            outf.write(hex(i) + '\n')

def widen_file(inpath, outpath):
    """tranforms the single column in inpath to four columns in outpath"""
    inf = open(inpath)
    compose = []
    with open(outpath, 'w') as outf:
        for line in inf:
            compose.append(line.rstrip())
            if len(compose) == 4:
                outf.write(' '.join(compose))
                outf.write('\n')
                compose = []
    inf.close()

# But flat files are an inconvenient way for dealing with massive data.
# Put another way, flat ascii files are degenerate databases, so we'll use
# the sqlite database which is built into Python.

def create_database(db_path):
    """creates a database schema to hold 4 strings per row"""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    c.execute('drop table if exists wide')
    c.execute('create table wide (a, b, c, d)')
    conn.close()

def fill_database(inpath, db_path):
    """tranforms the single column of data in inpath to four columns in 
       db_path"""
    inf = open(inpath)
    conn = sqlite3.connect(db_path, isolation_level='DEFERRED')
    cur = conn.cursor()
    compose = []
    for line in inf:
        compose.append(line.rstrip())
        if len(compose) == 4:
            cur.execute('insert into wide values(?, ?, ?, ?)', compose)
            compose = []
    conn.commit()
    inf.close()

if __name__ == '__main__':
    make_narrow_file(int(2e8), 'bertha.dat')
    widen_file('bertha.dat', 'berthaw.dat')

    create_database('berthaw.db')
    fill_database('bertha.dat', 'berthaw.db')

相关问题更多 >

编程相关推荐

热门问题

热门文章