HTTP错误404:找不到BeautifulGroup和Python

from urllib.request import urlopen, Request from bs4 import BeautifulSoup as soup import json atd_url = 'https://courses.lumenlearning.com/catalog/achievingthedream' #opening up connection and grabbing page res = Request(atd_url,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}) uClient = urlopen(res) page_html = uClient.read() uClient.close() #html parsing page_soup = soup(page_html, "html.parser") #grabs info for each textbook containers = page_soup.findAll("div",{"class":"book-info"}) data = [] for container in containers: item = {} item['type'] = "Course" item['title'] = container.h2.text item['author'] = container.p.text item['link'] = container.p.a["href"] item['source'] = "Achieving the Dream Courses" item['base_url'] = "https://courses.lumenlearning.com/catalog/achievingthedream" data.append(item) # add the item to the list with open("./json/atd-lumen.json", "w") as writeJSON: json.dump(data, writeJSON, ensure_ascii=False)

1条回答

网友

1楼 · 发布于 2024-05-20 13:36:29

改为使用请求库，这样可以：

import requests

#opening up connection and grabbing page
response = requests.get(atd_url,headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'})

#html parsing
page_soup = soup(response.content, "html.parser")

相关问题更多 >

编程相关推荐

热门问题

热门文章