我想用scrapy刮取的数据打印一个合适的表格

# the output that i get {'EXG': (['17.00', '10.00', '90.00', '9.00', '13.00', '17.00', '16.00', '43.00', '125.00', '16.00', '11.00', '150.00', '17.00', '24.00', '15.00', '24.00', '21.00', '36.00', '270.00', '280.00'],), 'G': ['8.00', '5.00', '38.00', '2.00', '6.00', '7.00', '6.00', '20.00', '40.00', '7.00', '5.00', '70.00', '6.00', '12.00', '7.00', '12.00', '10.00', '15.00', '120.00', '140.00'], 'company': (['Milton Bradley', 'Lowell', 'Milton Bradley', 'Transogram', 'Milton Bradley', 'Transogram', 'Standard Toykraft', 'Ideal', 'Game Gems', 'Milton Bradley', 'Parker Brothers', 'CPC', 'Parker Brothers', 'Whitman', 'Ideal', 'Transogram', 'King Features', 'Westinghouse', 'Parker Brothers', 'Parker Brothers'],), 'mnm': (['26.00', '19.00', '195.00', '15.00', '30.00', '29.00', '31.00', '65.00', '204.00', '25.00', '22.00', '250.00', '27.00', '42.00', '23.00', '37.00', '40.00', '57.00', '415.00', '435.00'],), 'rarity': ([],), 'title': (['Beat the Clock', 'Beat the Clock', 'Beatles - Flip Your Wig', 'Ben Casey M.D.', 'Bermuda Triangle', 'Betsy Ross and the Flag', 'Beverly Hillbillies', 'Beware the Spider', 'Bewitched', 'Bewitched - Stymie Card Game', 'Bionic Woman', 'Blade Runner', 'Blondie', 'Blondie - Playing Card Game', 'Blondie - Sunday Funnies', 'Blondie - The Hurry Scurry Game', "Blondie and Dagwood's Race for the Office", 'Blondie Goes to Leisureland', 'Boom or Bust', 'Boom or Bust'],), 'year': (['1969', '1954', '1964', '1961', '1976', '1961', '1963', '1980', '1965', '1964', '1976', '1982', '1969', '1941', '1972', '1966', '1950', '1935', '1951', '1959'],)}

# the output that i want! {"EXG": ["17.00"], "MNM": ["26.00"], "year": ["1969"], "company": ["Milton Bradley"], "Title": ["Beat the Clock"] } {"EXG": ["10.00"], "MNM": ["19.00"], "year": ["1954"], "company": ["Lowell"], "Title": ["Beat the Clock"] } and then so on for all values.

import scrapy from ..items import RarityItem class RarityScraper(scrapy.Spider): name = "rarity" start_urls = [ "http://www.rarityguide.com/cbgames_view.php?FirstRecord=21" ] def parse(self, response): table = response.css( "form") items = RarityItem() for contents in table: title = contents.css("td:nth-child(2)::text").extract() company = contents.css("td:nth-child(3)::text").extract() year = contents.css("td:nth-child(4)::text").extract() rarity = contents.css("td:nth-child(5)::text").extract() mnm = contents.css("td:nth-child(6)::text").extract() EXG = contents.css("td:nth-child(7)::text").extract() G = contents.css("td:nth-child(8)::text").extract() items["title"] = title, items["company"] = company, items["year"] = year, items["rarity"] = rarity, items["mnm"] = mnm, items["EXG"] = EXG, items["G"] = G yield items

2条回答

网友

1楼 · 编辑于 2024-07-05 14:15:09

您需要遍历表中的每一行，并分别处理行数据。由于所有行的长度相同，您可以使用列表解包将数据写入dict项：

def parse(self, response):
    table = response.css(
        "form table")

    for row in table.css("tr"):
        i = {}
        _, i["title"], i["company"], i["year"], _, i["mnm"], i["EXG"], i["G"] = row.css("td::text").extract()
        i["rarity"] = row.css("td img::alt").extract_first("")
        yield i

网友

2楼 · 编辑于 2024-07-05 14:15:09

如果所有列表长度相同，则在此行之后

G = contents.css("td:nth-child(8)::text").extract()：

添加此ode代码段：

arr = []
for _ in range(len(title)):
    arr.append({
        'EXP': title[_], 'company': company[_], 'year': year[_], 'rarity': rarity[_],
        'MNM': mnm[_], 'EXG': EXG[_], 'G': G[_]})

然后键入以下内容：

for _ in arr:
    print(_)

查看输出数组的步骤

相关问题更多 >

编程相关推荐

热门问题

热门文章