从谷歌图片搜索下载图片？

import os import sys import time from urllib import FancyURLopener import urllib2 import simplejson # Define search term searchTerm = "parrot" # Replace spaces ' ' in search term for '%20' in order to comply with request searchTerm = searchTerm.replace(' ','%20') # Start FancyURLopener with defined version class MyOpener(FancyURLopener): version = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' myopener = MyOpener() # Set count to 0 count= 0 for i in range(0,10): # Notice that the start changes for each iteration in order to request a new set of images for each loop url = ('https://ajax.googleapis.com/ajax/services/search/images?' + 'v=1.0& q='+searchTerm+'&start='+str(i*10)+'&userip=MyIP') print url request = urllib2.Request(url, None, {'Referer': 'testing'}) response = urllib2.urlopen(request) # Get results using JSON results = simplejson.load(response) data = results['responseData'] dataInfo = data['results'] # Iterate for each result and get unescaped url for myUrl in dataInfo: count = count + 1 my_url = myUrl['unescapedUrl'] myopener.retrieve(myUrl['unescapedUrl'],str(count)+'.jpg')

3条回答

网友

1楼 · 编辑于 2024-05-17 07:01:15

谷歌不赞成他们的API，抓取Google很复杂，所以我建议改用Bing API：

https://datamarket.azure.com/dataset/5BA839F1-12CE-4CCE-BF57-A49D98D29A44

谷歌不是那么好，微软也不是那么邪恶

网友

2楼 · 编辑于 2024-05-17 07:01:15

我修改了代码。现在代码可以为给定的查询下载100个图像，并且图像是完全高分辨率的，即正在下载的原始图像。

我正在使用urllib2&Beautiful soup下载图像

from bs4 import BeautifulSoup
import requests
import re
import urllib2
import os
import cookielib
import json

def get_soup(url,header):
    return BeautifulSoup(urllib2.urlopen(urllib2.Request(url,headers=header)),'html.parser')


query = raw_input("query image")# you can change the query for the image  here
image_type="ActiOn"
query= query.split()
query='+'.join(query)
url="https://www.google.co.in/search?q="+query+"&source=lnms&tbm=isch"
print url
#add the directory for your image here
DIR="Pictures"
header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
}
soup = get_soup(url,header)


ActualImages=[]# contains the link for Large original images, type of  image
for a in soup.find_all("div",{"class":"rg_meta"}):
    link , Type =json.loads(a.text)["ou"]  ,json.loads(a.text)["ity"]
    ActualImages.append((link,Type))

print  "there are total" , len(ActualImages),"images"

if not os.path.exists(DIR):
            os.mkdir(DIR)
DIR = os.path.join(DIR, query.split()[0])

if not os.path.exists(DIR):
            os.mkdir(DIR)
###print images
for i , (img , Type) in enumerate( ActualImages):
    try:
        req = urllib2.Request(img, headers={'User-Agent' : header})
        raw_img = urllib2.urlopen(req).read()

        cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
        print cntr
        if len(Type)==0:
            f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+".jpg"), 'wb')
        else :
            f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+"."+Type), 'wb')


        f.write(raw_img)
        f.close()
    except Exception as e:
        print "could not load : "+img
        print e

我希望这对你有帮助

网友

3楼 · 编辑于 2024-05-17 07:01:15

对于Google Image Search API is deprecated，您需要使用Google Custom Search来实现您想要的目标。要获取图像，您需要执行以下操作：

import urllib2
import simplejson
import cStringIO

fetcher = urllib2.build_opener()
searchTerm = 'parrot'
startIndex = 0
searchUrl = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=" + searchTerm + "&start=" + startIndex
f = fetcher.open(searchUrl)
deserialized_output = simplejson.load(f)

这将给您4个结果，作为JSON，您需要通过递增API请求中的startIndex来迭代地获得结果。

要获取图像，需要使用cStringIO这样的库。

例如，要访问第一个图像，需要执行以下操作：

imageUrl = deserialized_output['responseData']['results'][0]['unescapedUrl']
file = cStringIO.StringIO(urllib.urlopen(imageUrl).read())
img = Image.open(file)

相关问题更多 >

编程相关推荐

热门问题

热门文章