Python网络抓取无效

2024-09-30 06:12:49 发布

您现在位置:Python中文网/ 问答频道 /正文

我是Python新手,正在尝试进行一些web抓取。我正在尝试从一个关于游戏《皇家冲突》的网站上获取诸如牌组名称、用户名、长生不老药成本和卡的信息。我获取数据,然后将其发送到项目目录中名为“data”的文件夹中。文件创建得很好,但是每个.json文件中都有空括号[]。我不知道我做错了什么。任何帮助都将不胜感激。谢谢!代码如下:

from bs4 import BeautifulSoup
import requests
import uuid
import json
import os.path
from multiprocessing.dummy import Pool as Threadpool


def getdata(url):
    save_path=r'/Users/crazy4byu/PycharmProjects/Final/Data'
    clashlist=[]
    html = requests.get(url).text
    soup = BeautifulSoup(html,'html5lib')
    clash = soup.find_all('div',{'class':'row result'})
    for clashr in clash:
        clashlist.append(
                {
                    'Deck Name':clashr.find('a').text,
                    'User':clashr.find('td',{'class':'user center'}).text,
                    'Elixir Cost':clashr.find('span',{'class':'elixir_cost'}).text,
                    'Card':clashr.find('span',{'class':None}).text
                }
        )

    decks = soup.find_all('div',{'class':' row result'})
    for deck in decks:
        clashlist.append(
                {
                    'Deck Name':clashr.find('a').text,
                    'User':clashr.find('td',{'class':'user center'}).text,
                    'Elixir Cost':clashr.find('span',{'class':'elixir_cost'}).text,
                    'Card':clashr.find('span',{'class':None}).text

                }
        )

    with open(os.path.join(save_path,'data_'+str(uuid.uuid1())+'.json'),'w') as outfile:
        json.dump(clashlist,outfile)


if'__main__' == __name__:
    urls=[]
    urls.append(r'http://clashroyaledeckbuilder.com/clashroyale/deckViewer/highestRated')
    for i in range(20,990,10):
        urls.append(r'http://clashroyaledeckbuilder.com/clashroyale/deckViewer/highestRated'+str(i))

    pool = Threadpool(25)
    pool.map(getdata, urls)

    pool.close()
    pool.join()

Tags: pathtextinimportjsonforfindurls

热门问题