<p>你可以试试这样的方法:</p>
<pre><code>import MySQLdb
from lxml import etree
import config
def fast_iter(context, func, args=[], kwargs={}):
# http://www.ibm.com/developerworks/xml/library/x-hiperfparse/
# Author: Liza Daly
for event, elem in context:
func(elem, *args, **kwargs)
elem.clear()
while elem.getprevious() is not None:
del elem.getparent()[0]
del context
def extract_paper_elements(element,cursor):
pub={}
pub['InventoryID']=element.attrib['ID']
try:
pub['PublisherClassID']=element.xpath('PublisherClass/@ID')[0]
except IndexError:
pub['PublisherClassID']=None
pub['PublisherClassID']=element.xpath('PublisherClass/@ID')[0]
for key in ('Name','Type','ID'):
try:
pub[key]=element.xpath(
'PublisherClass/Publisher/PublisherDetails/{k}/text()'.format(k=key))[0]
except IndexError:
pub[key]=None
sql='''INSERT INTO Publishers (InventoryID, PublisherClassID, Name, Type, ID)
VALUES (%s, %s, %s, %s, %s)
'''
args=[pub.get(key) for key in
('InventoryID', 'PublisherClassID', 'Name', 'Type', 'ID')]
print(args)
# cursor.execute(sql,args)
for bookdetail in element.xpath('descendant::BookList/Listing/Book/BookDetail'):
pub['BookDetailID']=bookdetail.attrib['ID']
for key in ('BookName', 'Author', 'Pages', 'ISBN'):
try:
pub[key]=bookdetail.xpath('{k}/text()'.format(k=key))[0]
except IndexError:
pub[key]=None
sql='''INSERT INTO Books
(PublisherID, BookDetailID, Name, Author, Pages, ISBN)
VALUES (%s, %s, %s, %s, %s, %s)
'''
args=[pub.get(key) for key in
('ID', 'BookDetailID', 'BookName', 'Author', 'Pages', 'ISBN')]
# cursor.execute(sql,args)
print(args)
def main():
context = etree.iterparse("book.xml", events=("end",), tag='Inventory')
connection=MySQLdb.connect(
host=config.HOST,user=config.USER,
passwd=config.PASS,db=config.MYDB)
cursor=connection.cursor()
fast_iter(context,extract_paper_elements,args=(cursor,))
cursor.close()
connection.commit()
connection.close()
if __name__ == '__main__':
main()
</code></pre>
<ol>
<li>不要使用<code>fast_iter2</code>。<a href="http://www.ibm.com/developerworks/xml/library/x-hiperfparse/" rel="nofollow">original ^{<cd2>}</a>分隔
来自特定处理函数的有用工具
(<code>extract_paper_elements</code>)。<code>fast_iter2</code>将两者混合在一起
没有可重复的代码。在</li>
<li>如果在<code>etree.iterparse("book.xml",
events=("end",), tag='Inventory')</code>中设置<code>tag</code>参数,则处理函数
<code>extract_paper_elements</code>将只看到<code>Inventory</code>元素。在</li>
<li>给定一个Inventory元素,您可以使用<code>xpath</code>方法进行挖掘
把所需的数据收集下来。在</li>
<li><code>args</code>和<code>kwargs</code>参数被添加到<code>fast_iter</code>所以<code>cursor</code>
可以传递给<code>extract_paper_elements</code>。在</li>
</ol>