擅长:python、mysql、java
<pre><code>import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
class SoccerwaySpider(scrapy.Spider):
name = "Soccerway"
start_urls = ['https://fr.soccerway.com/teams/france/olympique-de-marseille/890/']
custom_settings={"FEEDS": {"Soccerway.json": {"format": "json"}}}
def start_requests(self):
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:48.0) Gecko/20100101 Firefox/48.0'
}
for url in self.start_urls:
yield scrapy.Request(url, headers=headers, callback=self.parse)
def parse(self, response):
yield {
'score': str.strip(response.css("table.matches").css('td.score-time.score').css('a::text').get()),
}
if __name__ == "__main__":
process = CrawlerProcess(get_project_settings())
process.crawl('Soccerway')
process.start()
</code></pre>
<p>Soccerway.json:</p>
<pre><code>[
{"score": "2 - 2"}
]
</code></pre>