使用BeautifulSoup下载图像

import urllib.request import os from bs4 import BeautifulSoup URL = "http://twitter.com/banprada/statuses/829102430017187841" list_dir="D:\\" default_dir = os.path.join(list_dir,"Pictures_neu") opener = urllib.request.build_opener() urllib.request.install_opener(opener) soup = BeautifulSoup(urllib.request.urlopen(URL).read()) imgs = soup.findAll("img",{"alt":True, "src":True}) for img in imgs: img_url = img["src"] filename = os.path.join(default_dir, img_url.split("/")[-1]) img_data = opener.open(img_url) f = open(filename,"wb") f.write(img_data.read()) f.close()

1条回答

网友

1楼 · 发布于 2024-10-01 11:27:02

这就是使用Selenium+requests获得只提到的图像的方法

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait as wait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import requests

link = 'https://twitter.com/banprada/statuses/829102430017187841'
driver = webdriver.PhantomJS()
driver.get(link)
wait(driver, 10).until(EC.frame_to_be_available_and_switch_to_it((By.XPATH, "//iframe[starts-with(@id, 'xdm_default')]")))
image_src = driver.find_element_by_tag_name('img').get_attribute('src')
response = requests.get(image_src).content
with open('C:\\Users\\You\\Desktop\\Image.jpeg', 'wb') as f:
    f.write(response)

如果您想从页面上的iframes中获取all的图像（不包括可以通过代码获得的初始页面源上的图像）：

^{pr2}$

注意，您可以使用any ^{}

相关问题更多 >

编程相关推荐

热门问题

热门文章