<p>我试图分解代码并用函数表示它。
有什么建议可以再即兴编写代码吗?如何将从网站获取的错误转储到一个新的html文件中,该文件采用表格格式,其中包含包含错误的书籍和章节的详细信息。在</p>
<p>更新代码如下:</p>
<pre><code>import urllib2, sys
from bs4 import BeautifulSoup
def get_details(link, index):
"""
This function takes in two arguments and returns a list which contains details of
books and/or chapters like:
* name of the book or chapter
* link of the book or chapter
Getting details from book or chapter is set by index value
* index = 1 > gets details of the book
* index = 0 > gets details of the chapter
"""
details_list = []
src = BeautifulSoup(urllib2.urlopen(link))
table = src.find('table')
for row in table.find_all('tr'):
column = row.find_all('td')
name, link = column[index].a.string, column[index].a.get("href")
details_list.append([name, link])
return details_list
def get_chapter_errors(chap_link):
"""
This function takes in chapter link from chapter_details_list as argument and returns
* Number of example errors(SyntaxErrors, NameErrors, ValueErrors, etc) present in the chapter
OR
* HTTPError while loading the chapter
"""
try:
chp_src = BeautifulSoup(urllib2.urlopen(chap_link))
example_errors = chp_src.find_all('div', {'class': 'output_subarea output_text output_error'})
error = len(example_errors)
if not example_errors:
error = None
except urllib2.HTTPError as e:
print e
error = "Page fetch error"
return error
def main():
log_dict = {}
book_dict = {}
url = sys.argv[1] # accept url as argument
book_details_list = get_details(url, index=1)
for book_name, book_link in book_details_list:
chapter_details_list = get_details('http://tbc-python.fossee.in%s' % book_link, index=0)
_id = book_link.strip('/book-details')
book_dict = {'name': book_name,
'url': 'http://tbc-python.fossee.in%s' % book_link,
'id': _id,
'chapters': []
}
for chap_name, chap_link in chapter_details_list:
error = get_chapter_errors('http://tbc-python.fossee.in%s' % chap_link)
book_dict.get('chapters').append({'name': chap_name,
'url': 'http://tbc-python.fossee.in%s' % chap_link,
'errors': error
})
log_dict.update({_id: book_dict})
print log_dict
print "\n\n\n\n"
if __name__ == '__main__':
main()
</code></pre>