使用LXML和XPath搜索XML表

import xml.etree.ElementTree as ET tree = ET.parse('file.xml') root = tree.getroot() TagSlug = '{http://www.aec.gov.au/xml/schema/mediafeed}' for child in root: DID = child.find('PollingDistrictIdentifier') for grandchild in child.getchildren(): Name = grandchild.find('TagSlug+Name') for grandgrandchild in grandchild.getchildren(): for grandgrandgrandchild in grandgrandchild.getchildren(): PP = grandgrandchild.find(TagSlug+'PollingPlaceIdentifier') print(PP.attrib['Id'], PP.attrib['Name'], DID.attrib['Id'], Name.text)

<PollingDistrictList Created="2018-10-30T12:01:21.043" xmlns="http://www.aec.gov.au/xml/schema/mediafeed" xmlns:eml="urn:oasis:names:tc:evs:schema:eml" xmlns:ds="http://www.w3.org/2000/09/xmldsig#" xmlns:xal="urn:oasis:names:tc:ciq:xsdschema:xAL:2.0" xmlns:xnl="urn:oasis:names:tc:ciq:xsdschema:xNL:2.0" xmlns:ts="urn:oasis:names:tc:evs:schema:eml:ts" xmlns:xs="http://www.w3.org/2001/XMLSchema-instance"> <TransactionId>4C59F7F3-2405-4443-8A1F-3F2BEF6E07C4</TransactionId> <eml:EventIdentifier Id="12122"> <eml:EventName>State Election 2018</eml:EventName> </eml:EventIdentifier> <PollingDistrict> <PollingDistrictIdentifier Id="10153"> <Name>Albert Park District</Name> </PollingDistrictIdentifier> <PollingPlaces> <PollingPlace> <PollingPlaceIdentifier Id="13133" Name="Bridport" /> <WheelchairAccess>None</WheelchairAccess> </PollingPlace> <PollingPlace> <PollingPlaceIdentifier Id="13987" Name="Kerferd South" /> <WheelchairAccess>None</WheelchairAccess> </PollingPlace> <PollingPlaceIdentifier Id="13504" Name="Middle Park" /> <WheelchairAccess>None</WheelchairAccess> </PollingPlace> </PollingDistrict> <PollingDistrict> <PollingDistrictIdentifier = .... et cetera

1条回答

网友

1楼 · 发布于 2024-09-30 22:26:37

修复了以下问题。这样你就可以看到它在做什么

    import os ###Required to change directory
    os.chdir('C:/XMLDataLocation') ###Set directory
    import lxml
    from lxml import etree
    import xml.etree.ElementTree as ET ###Will parse xml
    import requests ###Requests will be used for the VEC site, not utilised at this stage
    tree = ET.parse('State2018MediaFilePollingLocations.xml') ###Loads file
    root = tree.getroot()


    TagSlug = '{http://www.aec.gov.au/xml/schema/mediafeed}' #This is pre-appended all nodes so saves space

    PollingDistricts = root.findall(TagSlug+'PollingDistrict') #Goes from level 0 (root) to level 1 (PollingDistrict)
    for PollingDistrict in PollingDistricts: #Required otherwise only the first district would display
        DistrictID = PollingDistrict.find(TagSlug+'PollingDistrictIdentifier') #Finds the district ID
        Name = DistrictID.find(TagSlug+'Name') #Finds the name of each electorate (as a child of DistrictID)
        PollingPlaces = PollingDistrict.find(TagSlug+'PollingPlaces') 
        PollingPlace = PollingPlaces.find(TagSlug+'PollingPlace') #These two lines are ONLY for navigating the XML file
        for PollingPlace in PollingPlaces: #Required otherwise it would only print the first booth in each electorate
            PPID = PollingPlace.find(TagSlug+'PollingPlaceIdentifier') #Finds both the booth ID and name
            print(PPID.attrib['Id'], PPID.attrib['Name'], DistrictID.attrib['Id'], Name.text) #Prints the text

相关问题更多 >

编程相关推荐

热门问题

热门文章