解析大文本文件,提取数据并将其存储在CSV文件中。。T

2024-05-17 02:35:58 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个大的日志文件(比如1-3gb),我需要解析、提取数据并将其保存在CSV文件中。在

文本文件数据

  *    D:40035FC8 wr-long         00000008 \\core0\Global\u4TimeHiCnt         1.000us
  *    D:40027C5C rd-byte               00 *core0\Global\Ypf_OILL_OilLvlOn   20.342us
  *    D:40010044 rd-word             0FE2 *l\u2SAD_OILLVS_RecoveryCounter    0.160us
  *    D:40010044 wr-word             0FE1 *l\u2SAD_OILLVS_RecoveryCounter    0.040us
  *    D:40035FC8 wr-long         00000008 \\core0\Global\u4TimeHiCnt         1.000us

我必须提取位于最后一个\之后的变量名,然后是读写数以及数据类型,并将其存储在CSV文件中。在

CSV文件结果

^{pr2}$

问题是花了太多时间。你能看看所附的代码和建议的方法,使它更快。在

import string
import sys
import time

MyFile = open("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\core1_sram_ReadWrite.txt")#core0_sram_ReadWrite_rawdata

GeneratedFile = open(str(("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\")+'ParsedOutput.csv'),'w')

try:

    MyVariableList = []
    TimeStartTest       = time.time()        #Starting Time     

    GeneratedFile.write('\nVariable')
    GeneratedFile.write(', Datatype')
    GeneratedFile.write(', CORE 0')
    GeneratedFile.write(',, CORE 1')
    GeneratedFile.write(',, CORE X')

    GeneratedFile.write('\n,, Read ')
    GeneratedFile.write(', Write ')

    GeneratedFile.write(', Read ')
    GeneratedFile.write(', Write ')

    GeneratedFile.write(', Read ')
    GeneratedFile.write(', Write ')                            
    GeneratedFile.write('\n')      



    for CurrentLine in MyFile:

        NoofSpaces = 0

        if CurrentLine.find('\\') != -1:
            MyVariable     = CurrentLine[CurrentLine.rfind('\\')+1:].split(' ')[0]             
        elif CurrentLine.find('*\\') != -1:
            MyVariable     = CurrentLine[CurrentLine.rfind('*\\')+1:].split(' ')[0]             
        elif CurrentLine.find('*') != -1:                
            MyVariable     = CurrentLine[CurrentLine.rfind('*')+1:].split(' ')[0]    

        VariableFound = 0            
        MyVariableList.sort()

        Lowerbound = 0
        Upperbound = len(MyVariableList)-1

        while Lowerbound <= Upperbound and VariableFound == 0:
            middle_pos = (Lowerbound+Upperbound) // 2
            if MyVariableList[middle_pos] < MyVariable:
                Lowerbound = middle_pos + 1
            elif MyVariableList[middle_pos] > MyVariable:
                Upperbound = middle_pos - 1
            else:
                VariableFound = 1        

        if VariableFound == 0:            
            MyVariableList.append(MyVariable) 
            try:
                MyFile1 = open("C:\\Users\\AEC_FULL\\Saravanan\\Workspace\\Trace32Log_Parser\\core1_sram_ReadWrite.txt")#core0_sram_ReadWrite_rawdata

                Core0_ReadCount  = 0
                Core0_WriteCount = 0
                Core1_ReadCount  = 0
                Core1_WriteCount = 0
                CoreX_ReadCount  = 0
                CoreX_WriteCount = 0                    
                for CurrentLine1 in MyFile1:

                    if CurrentLine1.find(MyVariable) != -1:
                        ##   CORE 0  ##
                        if CurrentLine1.find("0\\Global") != -1:
                            DataType        = CurrentLine1.split('         ')[0].split('-')[1]
                            DataOperation   = CurrentLine1.split('         ')[0].split('-')[0].split(' ')[-1]
                            if DataOperation == 'rd':
                                Core0_ReadCount = Core0_ReadCount + 1
                            elif DataOperation == 'wr':
                                Core0_WriteCount = Core0_WriteCount + 1                                              
                        ##   CORE 1  ##                        
                        elif CurrentLine1.find("1\\Global") != -1:                                
                            DataType        = CurrentLine1.split('         ')[0].split('-')[1]
                            DataOperation   = CurrentLine1.split('         ')[0].split('-')[0].split(' ')[-1]
                            if DataOperation == 'rd':
                                Core1_ReadCount = Core1_ReadCount + 1
                            elif DataOperation == 'wr':
                                Core1_WriteCount = Core1_WriteCount + 1                            
                        ##   CORE X  ##                        
                        else:
                            DataType        = CurrentLine1.split('         ')[0].split('-')[1]
                            DataOperation   = CurrentLine1.split('         ')[0].split('-')[0].split(' ')[-1]
                            if DataOperation == 'rd':
                                CoreX_ReadCount =  CoreX_ReadCount + 1
                            elif DataOperation == 'wr':
                                CoreX_WriteCount = CoreX_WriteCount + 1

                GeneratedFile.write('\n %s' %MyVariable)
                GeneratedFile.write(', %s' %DataType)                            
                GeneratedFile.write(', %d' %Core0_ReadCount)
                GeneratedFile.write(', %d' %Core0_WriteCount)             
                GeneratedFile.write(', %d' %Core1_ReadCount)
                GeneratedFile.write(', %d' %Core1_WriteCount)  
                GeneratedFile.write(', %d' %CoreX_ReadCount)
                GeneratedFile.write(', %d' %CoreX_WriteCount)                            
                GeneratedFile.write('\n')                                                    


            finally:
                MyFile1.close()            

except:

    print sys.exc_info()       

finally:

    GeneratedFile.close()  
    MyFile.close()
    TimeStopTest       = time.time()
    print str(int((TimeStopTest - TimeStartTest)/60))

Tags: ifcore0wrwritesplitelifcorexreadcount