我需要从Scrapy结果中获取python字典的列表
HTML/JS代码包含以下部分:
window.Matches = [
{
matchId: 402404,
leagueId: 1087,
leagueName: "אלוף האלופים",
leagueURL: "https://www.one.co.il/Soccer/league/1087",
isLeagueLinkable: true,
round: 1,
roundName: "- משחק 1",
date: new Date("2020-08-08T20:15:00"),
isTimeSeted: true,
isHaveScore:true,
dateDay: "שבת",
homeId: 22,
homeName: "הפועל ב\"ש",
homeURL: "https://www.one.co.il/Soccer/team/22",
homeScore: 0,
guestId: 3,
guestName: "מכבי ת\"א",
guestURL: "https://www.one.co.il/Soccer/team/3",
guestScore: 2,
arenaURL: "https://www.one.co.il/Article/20-21/1,1087,3,0/364600.html"
},{
matchId: 402405,
leagueId: 1087,
leagueName: "אלוף האלופים",
leagueURL: "https://www.one.co.il/Soccer/league/1087",
isLeagueLinkable: true,
round: 2,
roundName: "- גומלין",
date: new Date("2020-08-13T20:30:00"),
isTimeSeted: true,
isHaveScore:true,
dateDay: "חמישי",
homeId: 3,
homeName: "מכבי ת\"א",
homeURL: "https://www.one.co.il/Soccer/team/3",
homeScore: 2,
guestId: 22,
guestName: "הפועל ב\"ש",
guestURL: "https://www.one.co.il/Soccer/team/22",
guestScore: 0,
arenaURL: "https://www.one.co.il/Article/20-21/1,1087,3,0/364952.html"
}];
我试着做了以下几点:
import scrapy
import json
class One(scrapy.Spider):
name = "one"
start_urls = [
"https://www.one.co.il/Soccer/team/3/"
]
def parse(self, response):
resp = response.xpath('/html/head/script[3]/text()').extract_first()
yield{
'game': resp
}
但结果看起来像一个巨大的不可读字符串,如下所示:
[
{"game": "\r\n window.Matches = [\r\n {\r\n matchId: 402404,\r\n leagueId: 1087,\r\n leagueName: \"\u05d0\u05dc\u05d5\u05e3 \u05d4\u05d0\u05dc\u05d5\u05e4\u05d9\u05dd\",\r\n leagueURL: \"https://www.one.co.il/Soccer/league/1087\",\r\n isLeagueLinkable: true,\r\n round: 1,\r\n roundName: \"- \u05de\u05e9\u05d7\u05e7 1\",\r\n date: new Date(\"2020-08-08T20:15:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e9\u05d1\u05ea\",\r\n homeId: 22,\r\n homeName: \"\u05d4\u05e4\u05d5\u05e2\u05dc \u05d1\\\"\u05e9\",\r\n homeURL: \"https://www.one.co.il/Soccer/team/22\",\r\n homeScore: 0,\r\n guestId: 3,\r\n guestName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n guestURL: \"https://www.one.co.il/Soccer/team/3\",\r\n guestScore: 2,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,1087,3,0/364600.html\"\r\n },{\r\n matchId: 402405,\r\n leagueId: 1087,\r\n leagueName: \"\u05d0\u05dc\u05d5\u05e3 \u05d4\u05d0\u05dc\u05d5\u05e4\u05d9\u05dd\",\r\n leagueURL: \"https://www.one.co.il/Soccer/league/1087\",\r\n isLeagueLinkable: true,\r\n round: 2,\r\n roundName: \"- \u05d2\u05d5\u05de\u05dc\u05d9\u05df\",\r\n date: new Date(\"2020-08-13T20:30:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05d7\u05de\u05d9\u05e9\u05d9\",\r\n homeId: 3,\r\n homeName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n homeURL: \"https://www.one.co.il/Soccer/team/3\",\r\n homeScore: 2,\r\n guestId: 22,\r\n guestName: \"\u05d4\u05e4\u05d5\u05e2\u05dc \u05d1\\\"\u05e9\",\r\n guestURL: \"https://www.one.co.il/Soccer/team/22\",\r\n guestScore: 0,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,1087,3,0/364952.html\"\r\n },{\r\n matchId: 405477,\r\n leagueId: 22,\r\n leagueName: \"\u05de\u05d5\u05e7\u05d3\u05de\u05d5\u05ea \u05dc\u05d9\u05d2\u05ea \u05d4\u05d0\u05dc\u05d5\u05e4\u05d5\u05ea\",\r\n leagueURL: \"\",\r\n isLeagueLinkable: false,\r\n round: 1,\r\n roundName: \"- \u05e1\u05d9\u05d1\u05d5\u05d1 \u05e8\u05d0\u05e9\u05d5\u05df\",\r\n date: new Date(\"2020-08-19T20:00:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e8\u05d1\u05d9\u05e2\u05d9\",\r\n homeId: 3,\r\n homeName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n homeURL: \"\",\r\n homeScore: 2,\r\n guestId: 5590,\r\n guestName: \"\u05e8\u05d9\u05d2\u05d4\",\r\n guestURL: \"\",\r\n guestScore: 0,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,1,3,63766/365369.html\"\r\n },{\r\n matchId: 406083,\r\n leagueId: 667,\r\n leagueName: \"\u05d2\u05d1\u05d9\u05e2 \u05d4\u05d8\u05d5\u05d8\u05d5\",\r\n leagueURL: \"https://www.one.co.il/Soccer/league/667\",\r\n isLeagueLinkable: true,\r\n round: 4,\r\n roundName: \"- \u05d2\u05de\u05e8\",\r\n date: new Date(\"2020-08-22T20:30:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e9\u05d1\u05ea\",\r\n homeId: 17,\r\n homeName: \"\u05d1\u05e0\u05d9 \u05e1\u05db\u05e0\u05d9\u05df\",\r\n homeURL: \"https://www.one.co.il/Soccer/team/17\",\r\n homeScore: 0,\r\n guestId: 3,\r\n guestName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n guestURL: \"https://www.one.co.il/Soccer/team/3\",\r\n guestScore: 2,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,667,3,0/365537.html\"\r\n },{\r\n matchId: 406275,\r\n leagueId: 22,\r\n leagueName: \"\u05de\u05d5\u05e7\u05d3\u05de\u05d5\u05ea \u05dc\u05d9\u05d2\u05ea \u05d4\u05d0\u05dc\u05d5\u05e4\u05d5\u05ea\",\r\n leagueURL: \"\",\r\n isLeagueLinkable: false,\r\n round: 2,\r\n roundName: \"\u05e1\u05d9\u05d1\u05d5\u05d1 \u05e9\u05e0\u05d9\",\r\n date: new Date(\"2020-08-26T19:00:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e8\u05d1\u05d9\u05e2\u05d9\",\r\n homeId: 5415,\r\n homeName: \"\u05e1\u05d5\u05d3\u05d5\u05d1\u05d4\",\r\n homeURL: \"\",\r\n homeScore: 0,\r\n guestId: 3,\r\n guestName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n guestURL: \"\",\r\n guestScore: 3,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,22,3,0/365806.html\"\r\n },{\r\n matchId: 402498,\r\n leagueId: 1,\r\n leagueName: \"\u05dc\u05d9\u05d2\u05ea \u05d4\u05e2\u05dc\",\r\n leagueURL: \"https://www.one.co.il/Soccer/league/1\",\r\n isLeagueLinkable: true,\r\n round: 1,\r\n roundName: \"\u05de\u05d7\u05d6\u05d5\u05e8 1\",\r\n date: new Date(\"2020-08-30T21:00:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e8\u05d0\u05e9\u05d5\u05df\",\r\n homeId: 3,\r\n homeName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n homeURL: \"https://www.one.co.il/Soccer/team/3\",\r\n homeScore: 1,\r\n guestId: 10,\r\n guestName: \"\u05de\u05db\u05d1\u05d9 \u05e4\\\"\u05ea\",\r\n guestURL: \"https://www.one.co.il/Soccer/team/10\",\r\n guestScore: 2,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,1,1,0/366079.html\"\r\n },{\r\n matchId: 402504,\r\n leagueId: 1,\r\n leagueName: \"\u05dc\u05d9\u05d2\u05ea \u05d4\u05e2\u05dc\",\r\n leagueURL: \"https://www.one.co.il/Soccer/league/1\",\r\n isLeagueLinkable: true,\r\n round: 2,\r\n roundName: \"\u05de\u05d7\u05d6\u05d5\u05e8 2\",\r\n date: new Date(\"2020-09-12T20:30:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e9\u05d1\u05ea\",\r\n homeId: 11,\r\n homeName: \"\u05d1\u05e0\u05d9 \u05d9\u05d4\u05d5\u05d3\u05d4\",\r\n homeURL: \"https://www.one.co.il/Soccer/team/11\",\r\n homeScore: 2,\r\n guestId: 3,\r\n guestName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n guestURL: \"https://www.one.co.il/Soccer/team/3\",\r\n guestScore: 2,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,1,3,0/366927.html\"\r\n },{\r\n matchId: 408093,\r\n leagueId: 22,\r\n leagueName: \"\u05de\u05d5\u05e7\u05d3\u05de\u05d5\u05ea \u05dc\u05d9\u05d2\u05ea \u05d4\u05d0\u05dc\u05d5\u05e4\u05d5\u05ea\",\r\n leagueURL: \"\",\r\n isLeagueLinkable: false,\r\n round: 3,\r\n roundName: \"- \u05e1\u05d9\u05d1\u05d5\u05d1 \u05e9\u05dc\u05d9\u05e9\u05d9\",\r\n date: new Date(\"2020-09-16T20:00:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e8\u05d1\u05d9\u05e2\u05d9\",\r\n homeId: 3,\r\n homeName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n homeURL: \"\",\r\n homeScore: 1,\r\n guestId: 5580,\r\n guestName: \"\u05d3\u05d9\u05e0\u05de\u05d5 \u05d1\u05e8\u05e1\u05d8\",\r\n guestURL: \"\",\r\n guestScore: 0,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,1,3,0/367232.html\"\r\n },{\r\n matchId: 409037,\r\n leagueId: 22,\r\n leagueName: \"\u05de\u05d5\u05e7\u05d3\u05de\u05d5\u05ea \u05dc\u05d9\u05d2\u05ea \u05d4\u05d0\u05dc\u05d5\u05e4\u05d5\u05ea\",\r\n leagueURL: \"\",\r\n isLeagueLinkable: false,\r\n round: 4,\r\n roundName: \"- \u05e4\u05dc\u05d9\u05d9\u05d0\u05d5\u05e3 \u05de\u05e9\u05d7\u05e7 1\",\r\n date: new Date(\"2020-09-22T22:00:00\"),\r\n isTimeSeted: true,\r\n isHaveScore:true,\r\n dateDay: \"\u05e9\u05dc\u05d9\u05e9\u05d9\",\r\n homeId: 3,\r\n homeName: \"\u05de\u05db\u05d1\u05d9 \u05ea\\\"\u05d0\",\r\n homeURL: \"\",\r\n homeScore: 1,\r\n guestId: 5120,\r\n guestName: \"\u05e8\u05d3 \u05d1\u05d5\u05dc \u05d6\u05dc\u05e6\u05d1\u05d5\u05e8\u05d2\",\r\n guestURL: \"\",\r\n guestScore: 2,\r\n arenaURL: \"https://www.one.co.il/Article/20-21/1,22,3,0/367637.html\"
有谁能告诉我如何从这些细节中创建一个包含scrapy的有效python词典列表吗
[{
matchId: 402404,
leagueId: 1087,
leagueName: "אלוף האלופים",
leagueURL: "https://www.one.co.il/Soccer/league/1087",
isLeagueLinkable: true,
round: 1,
roundName: "- משחק 1",
date: new Date("2020-08-08T20:15:00"),
isTimeSeted: true,
isHaveScore:true,
dateDay: "שבת",
homeId: 22,
homeName: "הפועל ב\"ש",
homeURL: "https://www.one.co.il/Soccer/team/22",
homeScore: 0,
guestId: 3,
guestName: "מכבי ת\"א",
guestURL: "https://www.one.co.il/Soccer/team/3",
guestScore: 2,
arenaURL: "https://www.one.co.il/Article/20-21/1,1087,3,0/364600.html"
},{
matchId: 402405,
leagueId: 1087,
leagueName: "אלוף האלופים",
leagueURL: "https://www.one.co.il/Soccer/league/1087",
isLeagueLinkable: true,
round: 2,
roundName: "- גומלין",
date: new Date("2020-08-13T20:30:00"),
isTimeSeted: true,
isHaveScore:true,
dateDay: "חמישי",
homeId: 3,
homeName: "מכבי ת\"א",
homeURL: "https://www.one.co.il/Soccer/team/3",
homeScore: 2,
guestId: 22,
guestName: "הפועל ב\"ש",
guestURL: "https://www.one.co.il/Soccer/team/22",
guestScore: 0,
arenaURL: "https://www.one.co.il/Article/20-21/1,1087,3,0/364952.html"
}];
提前谢谢
你已经完成了大部分工作。您所需要做的就是使用regex获取确切的变量值,然后由于它是javascript,所以不能使用
json.loads()
directy。相反,您需要将其作为js进行评估下面是经过轻微修改的代码版本,应该可以使用:
这将为您提供变量的精确值。现在你只需要评估一下。为此,我将使用
js2py
库顺便说一句,在XPath中使用索引不是一个好的尝试
Contains
会更安全相关问题 更多 >
编程相关推荐