我有几个分销商,提供他们的库存水平与CSV文件我。其中一些分销商有多达3个文件的信息。这些文件相当大,有170000多行数据。你知道吗
我要做的是编写一个程序,允许我将这些数据重新组织成一个新的CSV文件,这样每个分发服务器都会有一个按我所希望的方式组织的文件。你知道吗
下面是我在代码方面的一些小细节,但不要太过技术化:
step 1
Open file1
for row1 in file1
Grab partnumber from row[1]
step 2
open file2
for row2 in file2
if partnumber == row2[2]
grab data from row[4]
break
然后对我想要提取的每一个数据重复步骤2。我看到的问题是程序运行得非常快,直到它深入到数据中,因为它仍然读取每一行数据,即使我已经从文本行中收集了数据。我甚至想过,如果我开始删除行数据时,我完成了它,但我想也许有一个替代我不知道。任何帮助都会很好。你知道吗
def PartSearch():
Partexists = "N"
global SelectedObject
with open(eval("file"+str(SelectedFile))) as f:
r2 = csv.reader(f, delimiter = eval("file"+str(SelectedFile)+"Del"))
for row2 in r2:
if int(SelectedFile) == 1:
if str(row2[int(file1PartNumber)]) == str(PartNumberobject):
Partexists= "Y"
SelectedObject = row2[int(SelectedCol)]
break
if int(SelectedFile) == 2:
if row2[int(file2PartNumber)] == PartNumberobject:
Partexists= "Y"
SelectedObject = row2[int(SelectedCol)]
break
if int(SelectedFile) == 3:
if row2[int(file3PartNumber)] == PartNumberobject:
Partexists= "Y"
SelectedObject = row2[int(SelectedCol)]
break
if int(SelectedFile) == 4:
if row2[int(file4PartNumber)] == PartNumberobject:
Partexists= "Y"
SelectedObject = row2[int(SelectedCol)]
break
if Partexists != "Y":
SelectedObject = "X"
with open("C:\\Python34\\Python34\\Distributors\\ListOfDistributors.txt") as f:
r3 = csv.reader(f, delimiter = "\t")
for row3 in r3:
distributor = row3[0]
with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f:
r4 = csv.reader(f, delimiter = "\t")
totalRows = sum(1 for _ in f)
i = totalRows
if totalRows == 1:
with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f:
r4 = csv.reader(f, delimiter = "\t")
for row4 in r4:
file1 = row4[1]
file1Del = row4[2]
file1titles = row4[3]
file1titles = row4[3]
file1PartNumber = row4[4]
if totalRows == 2:
with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f:
r4 = csv.reader(f, delimiter = "\t")
for row4 in r4:
if i == 2:
file1 = row4[1]
file1Del = row4[2]
file1titles = row4[3]
file1titles = row4[3]
file1PartNumber = row4[4]
if i == 1:
file2 = row4[1]
file2Del = row4[2]
file2titles = row4[3]
file2titles = row4[3]
file2PartNumber = row4[4]
i = i-1
if totalRows == 3:
with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f:
r4 = csv.reader(f, delimiter = "\t")
for row4 in r4:
if i == 3:
file1 = row4[1]
file1Del = row4[2]
file1titles = row4[3]
file1titles = row4[3]
file1PartNumber = row4[4]
if i == 2:
file2 = row4[1]
file2Del = row4[2]
file2titles = row4[3]
file2titles = row4[3]
file2PartNumber = row4[4]
if i == 1:
file3 = row4[1]
file3Del = row4[2]
file3titles = row4[3]
file3titles = row4[3]
file3PartNumber = row4[4]
i = i-1
if totalRows == 4:
with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"files.txt") as f:
r4 = csv.reader(f, delimiter = "\t")
for row4 in r4:
if i == 4:
file1 = row4[1]
file1Del = row4[2]
file1titles = row4[3]
file1titles = row4[3]
file1PartNumber = row4[4]
if i == 3:
file2 = row4[1]
file2Del = row4[2]
file2titles = row4[3]
file2titles = row4[3]
file2PartNumber = row4[4]
if i == 2:
file3 = row4[1]
file3Del = row4[2]
file3titles = row4[3]
file3titles = row4[3]
file3PartNumber = row4[4]
if i == 1:
file4 = row4[1]
file4Del = row4[2]
file4titles = row4[3]
file4titles = row4[3]
file4PartNumber = row4[4]
i = i-1
with open("C:\\Python34\\Python34\\Distributors\\"+distributor+"structure.txt") as f:
r5 = csv.reader(f, delimiter = "\t")
i=1
for row5 in r5:
if i == 1:
DistributorName = row5[0]
PartNumberFile = row5[2]
PartNumberCol = row5[3]
AltPartNumberFile = row5[5]
AltPartNumberCol = row5[6]
VendorPartNumberFile = row5[8]
VendorPartNumberCol = row5[9]
AltVendorPartNumberFile = row5[11]
AltVendorPartNumberCol = row5[12]
DescriptionFile = row5[14]
DescriptionCol = row5[15]
BrandFile = row5[17]
BrandCol = row5[18]
CostFile = row5[20]
CostCol = row5[21]
RetailFile = row5[23]
RetailCol = row5[24]
StatusFile = row5[26]
StatusCol = row5[27]
WeightFile = row5[29]
WeightCol = row5[30]
if i == 2:
if row5[2] == 0:
NumofOnedaywarehouse = row5[2]
if row5[2] == 1:
NumofOnedaywarehouse = row5[2]
Oneday1WarehouseFile = row5[4]
Oneday1WarehouseCol = row5[5]
if row5[2] == 2:
NumofOnedaywarehouse = row5[2]
Oneday1WarehouseFile = row5[4]
Oneday1WarehouseCol = row5[5]
Oneday2WarehouseFile = row5[7]
Oneday2WarehouseCol = row5[8]
if row5[2] == 3:
NumofOnedaywarehouse = row5[2]
Oneday1WarehouseFile = row5[4]
Oneday1WarehouseCol = row5[5]
Oneday2WarehouseFile = row5[7]
Oneday2WarehouseCol = row5[8]
Oneday3WarehouseFile = row5[10]
Oneday3WarehouseCol = row5[11]
if i == 3:
if row5[2] == 0:
NumofTwodaywarehouse = row5[2]
if row5[2] == 1:
NumofTwodaywarehouse = row5[2]
Twoday1WarehouseFile = row5[4]
Twoday1WarehouseCol = row5[5]
if row5[2] == 2:
NumofTwodaywarehouse = row5[2]
Twoday1WarehouseFile = row5[4]
Twoday1WarehouseCol = row5[5]
Twoday2WarehouseFile = row5[7]
Twoday2WarehouseCol = row5[8]
if row5[2] == 3:
NumofTwodaywarehouse = row5[2]
Twoday1WarehouseFile = row5[4]
Twoday1WarehouseCol = row5[5]
Twoday2WarehouseFile = row5[7]
Twoday2WarehouseCol = row5[8]
Twoday3WarehouseFile = row5[10]
Twoday3WarehouseCol = row5[11]
if i == 4:
if row5[2] == 0:
NumofThreedaywarehouse = row5[2]
if row5[2] == 1:
NumofThreedaywarehouse = row5[2]
Threeday1WarehouseFile = row5[4]
Threeday1WarehouseCol = row5[5]
if row5[2] == 2:
NumofThreedaywarehouse = row5[2]
Threeday1WarehouseFile = row5[4]
Threeday1WarehouseCol = row5[5]
Threeday2WarehouseFile = row5[7]
Threeday2WarehouseCol = row5[8]
if row5[2] == 3:
NumofThreedaywarehouse = row5[2]
Threeday1WarehouseFile = row5[4]
Threeday1WarehouseCol = row5[5]
Threeday2WarehouseFile = row5[7]
Threeday2WarehouseCol = row5[8]
Threeday3WarehouseFile = row5[10]
Threeday3WarehouseCol = row5[11]
if i == 5:
if row5[2] == 0:
NumofFourdaywarehouse = row5[2]
if row5[2] == 1:
NumofFourdaywarehouse = row5[2]
Fourday1WarehouseFile = row5[4]
Threeday1WarehouseCol = row5[5]
if row5[2] == 2:
NumofFourdaywarehouse = row5[2]
Fourday1WarehouseFile = row5[4]
Fourday1WarehouseCol = row5[5]
Fourday2WarehouseFile = row5[7]
Fourday2WarehouseCol = row5[8]
if row5[2] == 3:
NumofFourdaywarehouse = row5[2]
Fourday1WarehouseFile = row5[4]
Fourday1WarehouseCol = row5[5]
Fourday2WarehouseFile = row5[7]
Fourday2WarehouseCol = row5[8]
Fourday3WarehouseFile = row5[10]
Fourday3WarehouseCol = row5[11]
if i == 6:
if row5[2] == 0:
NumofFivedaywarehouse = row5[2]
if row5[2] == 1:
NumofFivedaywarehouse = row5[2]
Fiveday1WarehouseFile = row5[4]
Fiveday1WarehouseCol = row5[5]
if row5[2] == 2:
NumofFivedaywarehouse = row5[2]
Fiveday1WarehouseFile = row5[4]
Fiveday1WarehouseCol = row5[5]
Fiveday2WarehouseFile = row5[7]
Fiveday2WarehouseCol = row5[8]
if row5[2] == 3:
NumofFivedaywarehouse = row5[2]
Fiveday1WarehouseFile = row5[4]
Fiveday1WarehouseCol = row5[5]
Fiveday2WarehouseFile = row5[7]
Fiveday2WarehouseCol = row5[8]
Fiveday3WarehouseFile = row5[10]
Fiveday3WarehouseCol = row5[11]
i = i+1
"""print(file1Del)
PartNumberFile = 1
PartNumberCol = 1
CostFile = 2
CostCol = 2
SelectedFile = PartNumberFile
SelectedCol = PartNumberCol
number = 1"""
#Program to grab Part Number
with open(file1) as f:
r = csv.reader(f, delimiter = file1Del)
if file1titles == "Y":
file=r.__next__()
for row in r:
PartNumberobject = row[int(file1PartNumber)]
"start of data collection, save variables as SelectedFile and SelectedCol. Run PartSearch() then save Variable SelectedObject"
SelectedFile = PartNumberFile
SelectedCol = PartNumberCol
PartSearch()
FPartNumber = SelectedObject
SelectedFile = AltPartNumberFile
SelectedCol = AltPartNumberCol
PartSearch()
FAltPartNumber = SelectedObject
SelectedFile = VendorPartNumberFile
SelectedCol = VendorPartNumberCol
PartSearch()
FVendorPartNumber = SelectedObject
SelectedFile = AltVendorPartNumberFile
SelectedCol = AltVendorPartNumberCol
PartSearch()
FAltVendorPartNumber = SelectedObject
SelectedFile = DescriptionFile
SelectedCol = DescriptionCol
PartSearch()
FDescription = SelectedObject
SelectedFile = BrandFile
SelectedCol = BrandCol
PartSearch()
FBrand = SelectedObject
SelectedFile = CostFile
SelectedCol = CostCol
PartSearch()
FCost = SelectedObject
SelectedFile = RetailFile
SelectedCol = RetailCol
PartSearch()
FRetail = SelectedObject
SelectedFile = StatusFile
SelectedCol = StatusCol
PartSearch()
FStatus = SelectedObject
SelectedFile = WeightFile
SelectedCol = WeightCol
PartSearch()
FWeight = SelectedObject
print(DistributorName, PartNumberobject, FAltPartNumber, FVendorPartNumber, FAltVendorPartNumber, FDescription, FBrand, FCost, FRetail, FStatus, FWeight)
既然你没有提供细节,我只能非常笼统地回答。你知道吗
考虑到您只想使用很少的csv文件,并且它们的大小大约为170k行,我假设这些数据适合内存。如果是这种情况,并且您想使用python(一个非常合理的选择),我强烈建议您投入一些时间来学习pandas。Pandas为您提供了大量处理表格数据的选项,包括强大的筛选或数据库样式的合并和联接操作。你知道吗
当你问一个更具体的问题时,我相信我们可以提供进一步的帮助。你知道吗
如果零件文件足够小,可以放入内存中,可以通过将其加载到字典(高效、快速的访问数据结构)中来加快速度。当您在
file2
中循环时,您正在查找一行row[2] == partnumber
,然后(可能)使用row[4]
,因此row[2]
作为键,row[4]
作为值的字典将使查找非常快:然后,不要每次都重新打开该文件,只需执行以下操作:
编辑:您还可以做一些其他事情来改进代码:
使用
True
和False
表示布尔值,而不是字符串“Y”和“N”。你知道吗将数组拆分为变量时,可以更轻松地执行以下操作:
您需要重复大量代码来处理一行、两行、三行和四行的情况。考虑在这里使用循环和列表。它还可以让你摆脱那种
eval
。这看起来像是无谓的吹毛求疵,但不太重复的代码更容易改进。你知道吗
相关问题 更多 >
编程相关推荐