正在检查ElementTree节点是否为空failu

xmldoc = minidom.parse('pubsClean.xml') #loop through <pub> tags to find number of pubs to grab root = xmldoc.getElementsByTagName("root")[0] pubs = [a.firstChild.data for a in root.getElementsByTagName("pub")] num_pubs = len(pubs) count = 0 while(count < num_pubs): temp_pages = 0 #get data from each <pub> tag temp_pub = root.getElementsByTagName("pub")[count] temp_ID = temp_pub.getElementsByTagName("ID")[0].firstChild.data temp_title = temp_pub.getElementsByTagName("title")[0].firstChild.data temp_year = temp_pub.getElementsByTagName("year")[0].firstChild.data temp_booktitle = temp_pub.getElementsByTagName("booktitle")[0].firstChild.data #handling no value if temp_pub.getElementsByTagName("pages").data != None: temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data else: temp_pages = -1 temp_authors = temp_pub.getElementsByTagName("authors")[0] temp_author_array = [a.firstChild.data for a in temp_authors.getElementsByTagName("author")] num_authors = len(temp_author_array) count = count + 1

#for execute command to work import sqlite3 import xml.etree.ElementTree as ET con = sqlite3.connect("publications.db") cur = con.cursor() from xml.dom import minidom #use this to clean the foreign characters import re def anglicise(matchobj): if matchobj.group(0) == '&': return matchobj.group(0) else: return matchobj.group(0)[1] outputFilename = 'pubsClean.xml' with open('test.xml') as inXML, open(outputFilename, 'w') as outXML: outXML.write('<root>\n') for line in inXML.readlines(): if (line.find("<sub>") or line.find("</sub>")): newline = line.replace("<sub>", "") newLine = newline.replace("</sub>", "") outXML.write(re.sub('&[a-zA-Z]+;',anglicise,newLine)) outXML.write('\n</root>') tree = ET.parse('pubsClean.xml') root = tree.getroot() xmldoc = minidom.parse('pubsClean.xml') #loop through <pub> tags to find number of pubs to grab root2 = xmldoc.getElementsByTagName("root")[0] pubs = [a.firstChild.data for a in root2.getElementsByTagName("pub")] num_pubs = len(pubs) count = 0 while(count < num_pubs): temp_pages = 0 #get data from each <pub> tag temp_ID = root.find(".//ID").text temp_title = root.find(".//title").text temp_year = root.find(".//year").text temp_booktitle = root.find(".//booktitle").text #handling no value if root.find(".//pages").text: temp_pages = root.find(".//pages").text else: temp_pages = -1 temp_authors = root.find(".//authors") temp_author_array = [a.text for a in temp_authors.findall(".//author")] num_authors = len(temp_author_array) count = count + 1 #process results into sqlite pub_params = (temp_ID, temp_title) cur.execute("INSERT OR IGNORE INTO publication (id, ptitle) VALUES (?, ?)", pub_params) cur.execute("INSERT OR IGNORE INTO journal (jtitle, pages, year, pub_id, pub_title) VALUES (?, ?, ?, ?, ?)", (temp_booktitle, temp_pages, temp_year, temp_ID, temp_title)) x = 0 while(x < num_authors): cur.execute("INSERT OR IGNORE INTO authors (name, pub_id, pub_title) VALUES (?, ?, ?)", (temp_author_array[x],temp_ID, temp_title)) cur.execute("INSERT OR IGNORE INTO wrote (name, jtitle) VALUES (?, ?)", (temp_author_array[x], temp_booktitle)) x = x + 1 con.commit() con.close() print("\nNumber of entries processed: ", count)

2条回答

网友

1楼 · 编辑于 2024-10-06 11:30:40

您可以使用attributes方法来获取类似字典的对象（Doc），然后查询字典：

if temp_pub.getElementsByTagName("pages").attributes.get('data'):

网友

2楼 · 编辑于 2024-10-06 11:30:40

正如错误消息建议的那样，getElementsByTagName()既不返回单个节点，也不返回{}，而是返回`NodeList。所以您应该检查长度，看看返回的列表是否包含任何项：

if len(temp_pub.getElementsByTagName("pages")) > 0:  
    temp_pages = temp_pub.getElementsByTagName("pages")[0].firstChild.data

或者您可以直接将列表传递给if，因为空列表是falsy：

^{pr2}$

附带说明，尽管这个问题的标题和标记是什么，但代码表明您使用的是minidom，而不是{}。使用ElementTree可以简化代码，例如：

# minidom
temp_ID = temp_pub.getElementsByTagName("ID")[0].firstChild.data
# finding single element can be using elementtree's `find()`
temp_ID = temp_pub.find(".//ID").text
....
# minidom
temp_author_array = [a.firstChild.data for a in temp_authors.getElementsByTagName("author")]
# finding multiple elements using elementtree's `find_all()`
temp_author_array = [a.text for a in temp_authors.find_all(".//author")]

相关问题更多 >

编程相关推荐

热门问题

热门文章