我尝试使用cython代码的例子来读取文件cythonized reading
from libc.stdio cimport *
cdef extern from "stdio.h":
#FILE * fopen ( const char * filename, const char * mode )
FILE *fopen(const char *, const char *)
#int fclose ( FILE * stream )
int fclose(FILE *)
#ssize_t getline(char **lineptr, size_t *n, FILE *stream);
ssize_t getline(char **, size_t *, FILE *)
def read_file(filename):
filename_byte_string = filename.encode("UTF-8")
cdef char* fname = filename_byte_string
cdef FILE* cfile
cfile = fopen(fname, "rb")
if cfile == NULL:
raise IOError(2, "No such file or directory: '%s'" % filename)
cdef char * line = NULL
cdef size_t l = 0
cdef ssize_t read
while True:
read = getline(&line, &l, cfile)
if read == -1: break
yield line
fclose(cfile)
return []
我将此函数用于:
^{pr2}$在读了大量的csv文件后,我得到了“分段错误(核心转储)”。这个函数“read_file”以与python的标准open(filename,'rb')相同的方式读取文件,但速度要快得多。我还尝试了C++扩展,使用这样的代码:
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <sstream>
std::vector<std::string> readFile(std::string fileName)
{
std::string line;
std::vector<std::string> fileStrings;
std::ifstream myfile(fileName.c_str());
if(!myfile)
{
std::cout<<"Error opening output file"<< std::endl;
return fileStrings;
}
while (std::getline(myfile, line))
{
if(line.empty()) continue;
std::istringstream is(line);
fileStrings.push_back(is.str());
}
return fileStrings;
}
在我的pyx文件中:
from cython.operator cimport dereference, preincrement
cdef extern from "read_file.h":
vector[string] readFile(string user_name) nogil
def readFileIterator(string fileName):
file_lines_list=[]
fileLines = readFile(fileName)
iterator = fileLines.begin()
while iterator != fileLines.end():
file_lines_list.append(dereference(iterator))
preincrement(iterator)
return iter(file_lines_list)
这样的方式似乎并不等同于标准python的读取,因为在读取csv(带有非标准符号的东西)的过程中,我还有很多额外的错误。 所以问题是:在cython中读取文件的正确方式是什么?cython可以作为纯python中的标准读取接口?为什么会出现“分段错误”?在
目前没有回答
相关问题 更多 >
编程相关推荐