我有剧本。请在代码审阅here找到完整的代码
我已经删除了字符串连接,并用list append替换它,如下所示
旧代码示例
def comment_line(self, line):
self.line = line
line_to_write = ""
line_to_write += "//"+self.line+"\n" #This is the line that I have changed
self.outputfile.write(line_to_write)
示例新代码
def comment_line(self, line):
self.line = line
self.output_list.append("//"+self.line+"\n") #This is the line that I have changed
完整的代码在代码评审here
然后我使用下面的脚本运行代码来查找执行时间
import time
start = time.time()
import os
t =os.system('python_file.py input_file.asn')
print('It took', time.time()-start, 'seconds.')
新旧脚本占用的时间如下
Old script took 0.019999980926513672 seconds.
>>> ================================ RESTART ================================
>>>
New script took 0.2999999523162842 seconds.
完成新代码
import re
from collections import deque
import sys
import inflection
class Convert(object):
'''To do: add data'''
def __init__(self):
'''To do: add data'''
self.plist = []
self.slist = []
self.tlist = []
self.llist = []
self.lines = []
self.line = None
self.open_braces = []
self.close_braces = []
self.outputfile = None
self.i = None
self.open_brace_list = []
self.close_brace_list = []
self.file_name = None
self.split_character = None
self.length = None
self.enumvariable_flag = None
self.inner_variable_prefix=""
self.output_list=[]
def start_tag(self, split_character, line):
'''To do: add data'''
self.split_character = split_character
self.line = line
self.output_list.append("enum E")
self.inner_variable_prefix = inflection.camelize(inflection.underscore((self.line.split(self.split_character)[0]).replace('-', '_')).lower()).strip()
self.output_list.append(self.inner_variable_prefix)
self.output_list.append("//"+self.line)
self.output_list.append("\n")
self.output_list.append("{\n")
self.enumvariable_flag = True
def end_tag(self,line):
self.line=line
self.output_list.append("};\n")
self.enumvariable_flag = False
def comment_line(self, line):
self.line = line
self.output_list.append("//"+self.line+"\n")
def handle_comment(self, line):
'''To do: add data'''
self.line = line
if (line.strip()).startswith("--")or(re.search(r'(.*)\{(.*)\}(.*)', line)):
self.output_list.append(" ")
self.output_list.append("//"+self.line+"\n")
def handle_inner_element(self, line, index):
'''To do: add data'''
self.line = line
self.index = index
if self.output_list[-1] != " ":
self.output_list.append(" ")
try:
try:
value = (re.findall(r'\d+', self.line.strip().split(' ')[1])[0])
self.output_list.append("e")
self.output_list.append(self.inner_variable_prefix)
self.output_list.append(inflection.camelize((self.line.strip().split(' ')[0]).replace('-', '_')))
self.output_list.append(" = ")
self.output_list.append(value)
if self.index not in self.llist:
self.output_list.append(",")
self.output_list.append("\n")
except Exception as e:
if (self.line.strip().split(' ')[0]).lower() == \
self.line.strip().split(' ')[1].split('-')[0].lower():
self.output_list.append("e")
self.output_list.append(self.inner_variable_prefix)
self.output_list.append(inflection.camelize((
self.line.strip().split(' ')[0].replace('-', '_')).lower()))
if self.index not in self.llist:
self.output_list.append(",")
else:
self.output_list.append("//")
self.output_list.append(self.line)
self.output_list.append("\n")
except Exception as exception:
print(exception)
def generate_lists(self, length, lines):
'''To do: add data'''
self.length = length
self.lines = lines
flag_llist=False
lastl=None
reg1 = r'::=(.*)\n\{'
reg2 = r'{'
reg3 = r'\}'
reg4 = r'(.*)\{(.*)\}(.*)'
for index, line in enumerate(self.lines):
if index < (self.length-1):
val = str(line) + "\n" + str(self.lines[index+1])
else:
val = str(line)
if re.search(reg1, val)and(not re.search(reg4, val)):
self.plist.append(index)
flag_llist=True
else:
val = str(line)
if re.search(reg2, val)and(not re.search(reg4, val)):
if index in self.plist:
pass
else:
self.slist.append(index)
flag_llist=True
if re.search(reg3, val)and(not re.search(reg4, val)):
self.tlist.append(index)
self.llist.append(lastl)
flag_llist=False
elif flag_llist:
try:
value = (re.findall(r'\d+', line.strip().split(' ')[1])[0])
lastl=index
except Exception as e:
pass
try:
if (line.strip().split(' ')[0]).lower() == \
line.strip().split(' ')[1].split('-')[0].lower():
lastl=index
except Exception as e:
pass
return self.plist, self.slist, self.tlist
def add_sub_element(self, open_brace_list, close_brace_list):
'''To do: add data'''
self.open_brace_list = open_brace_list
self.close_brace_list = close_brace_list
self.enumvariable_flag = False
for i in range(1, len(self.open_brace_list)):
for index, line in enumerate(self.lines):
if index == self.open_brace_list[i]-1:
self.start_tag(' ', line)
if (index <= self.close_brace_list[i-1])and\
(index > self.open_brace_list[i])and self.enumvariable_flag:
self.handle_comment(line)
if (self.line.strip()).startswith("}"):
self.end_tag(line)
if self.enumvariable_flag and(not (self.line.strip()).startswith("--"))and\
(not (self.line.strip()).startswith("{")and\
(index <= self.close_brace_list[i-1])and(index > open_brace_list[i])):
self.handle_inner_element(line, index)
def braces_line_no(self, i):
'''To do: add data'''
self.i = i
remaining_slist = [a for a in self.slist if a > self.plist[self.i]]
remaining_tlist = [a for a in self.tlist if a > self.plist[self.i]]
try:
self.open_braces = [b for b in remaining_slist if b < self.plist[self.i+1]]
except Exception as e:
self.open_braces = remaining_slist
try:
self.close_braces = [b for b in remaining_tlist if b < self.plist[self.i+1]]
except Exception as e:
self.close_braces = remaining_tlist
return self.open_braces, self.close_braces
def generate_output(self, file_name):
'''To do: add data'''
self.file_name = file_name
output_file_name = self.file_name.split('.')[0]+".hpp"
self.outputfile = open(output_file_name, 'w')
with open(self.file_name) as f_in:
self.lines = (line.strip() for line in f_in)
self.lines = list(line for line in self.lines if line)
length = len(self.lines)
self.plist, self.slist, self.tlist = self.generate_lists(length, self.lines)
for i in range(len(self.plist)):
self.open_braces, self.close_braces = self.braces_line_no(i)
open_braces_qu = deque(self.open_braces)
for index, line in enumerate(self.lines):
if (not self.enumvariable_flag)and(self.tlist[-1] != self.close_braces[-1]):
if(index > self.close_braces[-1]) and (index < self.slist[self.slist.index(self.open_braces[-1])+1]-1):
self.comment_line(line)
elif self.enumvariable_flag==None and (index < self.plist[0]):
self.comment_line(line)
elif self.close_braces[-1] == self.tlist[-1] and index > self.tlist[-1]:
self.comment_line(line)
if index == self.plist[i]:
self.start_tag('::=', line)
elif len(self.open_braces) == 1 and len(self.close_braces) == 1 and\
self.enumvariable_flag:
self.handle_comment(line)
if (self.line.strip()).startswith("}"):
self.end_tag(line)
if self.enumvariable_flag and(not (line.strip()).startswith("--"))and\
(not (line.strip()).startswith("{")):
self.handle_inner_element(line, index)
elif self.enumvariable_flag and(len(self.open_braces) > 1)and(len(open_braces_qu) > 1):
if self.output_list[-1] != " ":
self.output_list.append(" ")
try:
if index == open_braces_qu[1]-1:
try:
value = (re.findall(r'\d+', line.strip().split(' ')[1])[0])
self.output_list.append("e")
self.output_list.append(self.inner_variable_prefix)
self.output_list.append(inflection.camelize(inflection.underscore(line.strip().split(' ')[0]\
.replace('-', '_')).lower()))
self.output_list.append(" = ")
self.output_list.append(value)
if len(open_braces_qu) > 2:
self.output_list.append(", ")
self.output_list.append("\n")
except Exception as e:
if (line.strip().split(' ')[0]).lower() == line.strip()\
.split(' ')[1].split('-')[0].lower():
self.output_list.append("e")
self.output_list.append(self.inner_variable_prefix)
self.output_list.append(inflection.camelize(inflection.underscore(line.strip().split(' ')[0].replace('-', '_')).lower()))
if len(open_braces_qu) > 2:
self.output_list.append(", ")
else:
self.output_list.append("//")
self.output_list.append(line)
self.output_list.append("\n")
open_braces_qu.popleft()
if len(open_braces_qu) == 1:
self.end_tag(line)
open_braces_qu.popleft()
self.add_sub_element(self.open_braces, self.close_braces)
except Exception as exception:
print(exception)
for data in self.output_list:
self.outputfile.write(data)
self.outputfile.close()
if __name__ == '__main__':
INPUT_FILE_NAME = sys.argv[1]
CON_OBJ = Convert()
CON_OBJ.generate_output(INPUT_FILE_NAME)
第二个示例包含字符串连接(
"//"+self.line+"\n"
)和附加到列表。你知道吗这并不能解释为什么它突然慢了很多;我的猜测是这个列表包含了很多元素。附加到长列表可能会很昂贵,因为Python最终必须复制列表。你知道吗
在原始代码中,您只需创建一个短字符串并将其刷新到缓冲区(最终刷新到文件系统)。这种手术相对便宜。如果附加到一个包含数百万个元素的列表中,Python最终会耗尽底层数据结构中的空间,必须将其复制到一个更大的数据结构中。在添加N个元素之后,它也必须这样做。你知道吗
除此之外,你的代码来衡量时间是不可靠的。当你这样做的时候,背景工作会产生巨大的影响。使用cdrake建议的
timeit
模块,或者使用shell命令time
(timeit
会更准确)。你知道吗[编辑]有三种策略:字符串串联(SC)、列表附加(LA)和流式处理到文件(STF)。你知道吗
当您连接短字符串并且不将它们保持很长时间时,SC是有效的。当字符串变长时,SC的效率变得越来越低,因为对于每个附加,Python都必须复制整个字符串。你知道吗
当您需要保留数据时,LA是高效的。列表分配N个插槽。只要你不需要全部,添加到列表中是很便宜的:你只需要使用一个免费的插槽。当插槽用完时,列表会变得很昂贵,因为Python必须复制列表。因此,它们的效率要比SC高一点,但最终,它们也会遇到同样的潜在问题:附加太多,复制时间会让你丧命。你知道吗
STF意味着打开一个文件,并在生成时将数据写入该文件。您只在内存中保留少量数据。这对于大量输出非常有效,因为可以避免复制现有数据。缺点是,由于开销的原因,这对于少量的数据是无效的。你知道吗
结论:了解你的数据结构。没有一种结构在任何情况下都能起作用。它们各有优缺点。你知道吗
相关问题 更多 >
编程相关推荐