我是Python新手,正在尝试进行一些web抓取。我正在尝试从一个关于游戏《皇家冲突》的网站上获取诸如牌组名称、用户名、长生不老药成本和卡的信息。我获取数据,然后将其发送到项目目录中名为“data”的文件夹中。文件创建得很好,但是每个.json文件中都有空括号[]。我不知道我做错了什么。任何帮助都将不胜感激。谢谢!代码如下:
from bs4 import BeautifulSoup
import requests
import uuid
import json
import os.path
from multiprocessing.dummy import Pool as Threadpool
def getdata(url):
save_path=r'/Users/crazy4byu/PycharmProjects/Final/Data'
clashlist=[]
html = requests.get(url).text
soup = BeautifulSoup(html,'html5lib')
clash = soup.find_all('div',{'class':'row result'})
for clashr in clash:
clashlist.append(
{
'Deck Name':clashr.find('a').text,
'User':clashr.find('td',{'class':'user center'}).text,
'Elixir Cost':clashr.find('span',{'class':'elixir_cost'}).text,
'Card':clashr.find('span',{'class':None}).text
}
)
decks = soup.find_all('div',{'class':' row result'})
for deck in decks:
clashlist.append(
{
'Deck Name':clashr.find('a').text,
'User':clashr.find('td',{'class':'user center'}).text,
'Elixir Cost':clashr.find('span',{'class':'elixir_cost'}).text,
'Card':clashr.find('span',{'class':None}).text
}
)
with open(os.path.join(save_path,'data_'+str(uuid.uuid1())+'.json'),'w') as outfile:
json.dump(clashlist,outfile)
if'__main__' == __name__:
urls=[]
urls.append(r'http://clashroyaledeckbuilder.com/clashroyale/deckViewer/highestRated')
for i in range(20,990,10):
urls.append(r'http://clashroyaledeckbuilder.com/clashroyale/deckViewer/highestRated'+str(i))
pool = Threadpool(25)
pool.map(getdata, urls)
pool.close()
pool.join()
目前没有回答
相关问题 更多 >
编程相关推荐