来自HTML表的Python dict

from bs4 import BeautifulSoup url = 'https://www.imdb.com/title/tt8579674/awards' response = requests.get(url) html_soup = BeautifulSoup(response.text, 'html.parser') award_list = [] for table in html_soup.find_all('table', {'class': 'awards'}): for tr in table.find_all('tr'): for title_award_outcome in tr.find_all('td', {'class': 'title_award_outcome'}): award_name = title_award_outcome.get_text(separator=' ', strip=True).split(' ', 1)[1] for award_description in tr.find_all('td', {'class': 'award_description'}): award_description = award_description.get_text(separator=' ', strip=True).split(' ', 1)[0] award = award_name+'_'+award_description for title_award_outcome in tr.find_all('td', {'class': 'title_award_outcome'}): result = title_award_outcome.get_text(separator=' ', strip=True).split(' ', 1)[0] award_dict[award] = result award_list.append(award_dict) print(award_list)

[{'Golden Globe_Best Motion Picture - Drama': 'Winner', 'Golden Globe_Best Original Score - Motion Picture': 'Nominee', 'Golden Globe_Best Original Score - Motion Picture': 'Nominee', 'BAFTA Film Award_Best Director': 'Nominee', 'BAFTA Film Award_Outstanding British Film of the Year': 'Nominee', etc, etc, etc}]

1条回答

网友

1楼 · 发布于 2024-10-06 17:21:28

要创建所需词典，可以使用以下示例：

import requests
from bs4 import BeautifulSoup

url = 'https://www.imdb.com/title/tt8579674/awards'

soup = BeautifulSoup(requests.get(url).content, 'html.parser')

out = {}
for td in soup.select('.awards td'):
    outcome, cat = td.select_one('.title_award_outcome b'), td.select_one('.award_category')
    if outcome and cat:
        current = []
        out[(outcome.get_text(strip=True), cat.get_text(strip=True))] = current
    else:
        for a in td.select('a'):
            a.extract()
        current.append(td.contents[0].strip())

# transform the dict to desired structure:
out2 = {}
for (outcome, award), v in out.items():
    for i in v:
        out2['{}_{}'.format(award, i)] = outcome

# print it
from pprint import pprint
pprint(out2)

印刷品：

{'AACTA International Award_Best Direction': 'Nominee',
 'AFCA Award_Best Cinematography': 'Winner',
 'AFCA Award_Best Film Editing': 'Nominee',
 'AFCA Award_Best Score': 'Winner',
 'AFCC Award_Best Cinematography': 'Winner',
 'AFCC Award_Best Original Score': 'Winner',
 'AFCC Award_Top Ten Films': 'Nominee',
 'AFI Award_Movie of the Year': 'Winner',
 'ALFS Award_British/Irish Actor of the Year': 'Nominee',
 'ALFS Award_British/Irish Film of the Year': 'Nominee',
 'ALFS Award_Director of the Year': 'Nominee',
 'ALFS Award_Film of the Year': 'Nominee',

...and so on.

相关问题更多 >

编程相关推荐

热门问题

热门文章