我一直在尝试使用Python3中的模块PyRSS2Gen为站点生成RSS,通过使用下面的代码,我已经能够生成xml文件。然后,我可以在RSS阅读器(Liferea)中打开xml文件并读取内容
但是,如果网站更新了一篇新文章,我如何在保留现有xml文件项不变的同时在xml文件中反映更新的项目
全文如下:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import urllib.request
import re
import datetime
import PyRSS2Gen
import itertools
def target_url():
url = "xxx"
return url
def anti_anti_crawling():
header = {
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
return header
def parse_response_to_html():
request = urllib.request.Request(target_url(), headers = anti_anti_crawling())
response = urllib.request.urlopen(request)
read_response = response.read()
decode = read_response.decode('utf-8')
return decode
def extract_elements_for_xml():
title_search_pattern = re.compile(r'(?<=</em>).*(?=</h3>)')
title_raw = title_search_pattern.findall(parse_response_to_html())
link_search_pattern = ['','','','','','','']
link_raw = ['','','','','','','']
description_search_pattern = re.compile(r'(?<=<p><em>).*(?=</p>)')
description_raw = description_search_pattern.findall(parse_response_to_html())
pubDate_search_pattern = re.compile(r'(?<=<span class="time">).*(?=</span>)')
pubDate_raw = pubDate_search_pattern.findall(parse_response_to_html())
return title_raw, link_raw, description_raw, pubDate_raw
def list_generation():
title = extract_elements_for_xml()[0]
link = extract_elements_for_xml()[1]
description = extract_elements_for_xml()[2]
pubDate = extract_elements_for_xml()[3]
listT = list(map(list,zip(title,link,description,pubDate)))
return listT
def items_generation():
items = []
for i in list_generation():
_title,_link,_description,_pubDate = i
item = PyRSS2Gen.RSSItem(
title = _title,
link = _link,
description = _description,
pubDate = _pubDate
)
items.append(item)
return items
def rss_generation():
rss = PyRSS2Gen.RSS2(
title = "xxx",
link = "xxx.com",
description = "",
lastBuildDate = datetime.datetime.now(),
items = items_generation())
return rss
xmlFileName = "xxxxx"
rss_generation().write_xml(open(xmlFileName, "w"))
谢谢你的帮助
目前没有回答
相关问题 更多 >
编程相关推荐