Python使用ElementTree解析XML数据

2024-10-03 21:33:36 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在为我的Discord机器人开发一个模块,该模块将从URL获取数据并将其排序到嵌入中。我花了几个小时尝试不同的方法让它工作,我设法让它显示我需要的东西。现在,当我从XML更改为XML2URL(我需要更多的数据)时,出现了一点问题,它不想再工作了

import xml.etree.ElementTree as ET
import requests


tree = ET.fromstring(requests.get('http://vatbook.euroutepro.com/xml.php?fir=LJLA').text)
#Testing what is displayed
for atcs in tree:
    callsign = atcs.find('callsign')
    name = atcs.find('name')
    time_start = atcs.find('time_start')
    time_end = atcs.find('time_end')
    if callsign is not None:
        print(f"{name.text} booked {callsign.text} from {time_start.text} to {time_end.text}")

输出:

Mirza Ibrahimovic booked LJLJ_TWR from 2020-05-19 1800 to 2020-05-19 2100
Mirza Ibrahimovic booked LJLJ_APP from 2020-05-19 1800 to 2020-05-19 2100

我的问题是,只要我用secound url替换first url,我的代码就不会显示任何内容。有什么想法吗


Tags: 模块totextnamefromimporttimexml
2条回答

我发现我忘了添加代码的一小部分以使其运行

因此,我的解决方案如下:

import xml.etree.ElementTree as ET

import requests

atc = ["ADR_CTR", "ADR_W_CTR", "ADR_U_CTR", "ADR_E_CTR", "LDZO_CTR", "LJLA_CTR", "LYBA_CTR", "LWSS_CTR", "LAAA_CTR", "LQSB_CTR", "LJLJ_TWR", "LJLJ_APP", "LJLJ_GND", "LJMB_TWR", "LJMB_APP", "LJPZ_TWR", "LJPZ_APP", "LDZA_APP", "LDZA_TWR", "LDZA_GND", "LDDU_TWR", "LDDU_APP", "LDSP_TWR", "LDSP_APP", "LDPL_TWR", "LDPL_APP", "LDRI_TWR", "LDZD_TWR", "LDZD_APP", "LDOS_TWR", "LDOS_APP", "LYBE_APP",
       "LYBE_TWR", "LYBE_GND", "LYTV_TWR", "LYPG_TWR", "LYPG_APP", "LYNI_TWR", "LYNI_APP", "LATI_APP", "LATI_TWR", "LATI_GND", "LWSK_TWR", "LWSK_APP", "LWSK_GND", "LWOH_TWR", "BKPR_TWR", "BKPR_APP", "LQSA_TWR", "LQSA_GND", "LQSA_APP", "LQMO_TWR", "LQMO_APP", "LQBK_TWR", "LQBK_APP", "LQTZ_TWR", "LQTZ_APP", "LYUZ_TWR", "LYUZ_APP", "LYKV_APP", "LYKV_TWR", "LDZO_T_CTR", "LJLA_T_CTR", "LYBA_T_CTR", 
       "LWSS_T_CTR", "LAAA_T_CTR", "LQSB_T_CTR", "LJLJ_T_TWR", "LJLJ_T_APP", "LJLJ_T_GND", "LJMB_T_TWR", "LJMB_T_APP", "LJPZ_T_TWR", "LJPZ_T_APP", "LDZA_T_APP", "LDZA_T_TWR", "LDZA_T_GND", "LDDU_T_TWR", "LDDU_T_APP", "LDSP_T_TWR", "LDSP_T_APP", "LDPL_T_TWR", "LDPL_T_APP", "LDRI_T_TWR", "LDZD_T_TWR", "LDZD_T_APP", "LDOS_T_TWR", "LDOS_T_APP", "LYBE_T_APP",
       "LYBE_T_TWR", "LYBE_T_GND", "LYTV_T_TWR", "LYPG_T_TWR", "LYPG_T_APP", "LYNI_T_TWR", "LYNI_T_APP", "LATI_T_APP", "LATI_T_TWR", "LATI_T_GND", "LWSK_T_TWR", "LWSK_T_APP", "LWSK_T_GND", "LWOH_T_TWR", "BKPR_T_TWR", "BKPR_T_APP", "LQSA_T_TWR", "LQSA_T_GND", "LQSA_T_APP", "LQMO_T_TWR", "LQMO_T_APP", "LQBK_T_TWR", "LQBK_T_APP", "LQTZ_T_TWR", "LQTZ_T_APP", "LYUZ_T_TWR", "LYUZ_T_APP", "LYKV_T_APP", "LYKV_T_TWR"]

tree = ET.fromstring(requests.get('http://vatbook.euroutepro.com/xml2.php?fir=').text)
#Testing what is displayed
for atcs in tree.find('atcs'):
    callsign = atcs.find('callsign')
    name = atcs.find('name')
    time_start = atcs.find('time_start')
    time_end = atcs.find('time_end')
    if callsign is not None:
    print(f"{name.text} booked {callsign.text} from {time_start.text} to {time_end.text}")

因为两个URL都是不同的结构,考虑有条件地检查是否存在{{CD1>}节点,然后将一个动态搜索路径传递到^ {CD2>}。下面使用内置的urllib模式从URL解析XML:

from urllib.request import urlopen
import xml.etree.ElementTree as ET

def vatbook_parse(url):
    with urlopen(url) as f:
        tree = ET.parse(f)
        root = tree.getroot()

        # CONDITIONALLY SET SEARCH PATH
        path = './/atcs/booking' if tree.find('atc') is None else './/atc'

        for atcs in root.iterfind(path):
            callsign = atcs.find('callsign')
            name = atcs.find('name')
            time_start = atcs.find('time_start')
            time_end = atcs.find('time_end')

            if callsign is not None:
                print(f"{name.text} booked {callsign.text} from {time_start.text} to {time_end.text}")

第一个URL

vatbook_parse('http://vatbook.euroutepro.com/xml.php?fir=LJLA')

# Mirza Ibrahimovic booked LJLJ_APP from 2020-05-19 18:00:00 to 2020-05-19 21:00:00
# Mirza Ibrahimovic booked LJLJ_TWR from 2020-05-19 18:00:00 to 2020-05-19 21:00:00

第二个URL

vatbook_parse('http://vatbook.euroutepro.com/xml2.php?fir=LJLA')

# Mirza Ibrahimovic booked LJLJ_APP from 2020-05-19 18:00:00 to 2020-05-19 21:00:00
# Mirza Ibrahimovic booked LJLJ_TWR from 2020-05-19 18:00:00 to 2020-05-19 21:00:00

相关问题 更多 >