使用PyQt5解析JS dynamicpage

2024-09-27 04:21:48 发布

您现在位置:Python中文网/ 问答频道 /正文

我试图用PyQt5解析js动态页面。执行后,我得到一个错误:进程结束,退出代码为-1073741819(0xC0000005)。代码如下:

import sys
import requests
from bs4 import BeautifulSoup
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
import pandas as pd

class Page(QWebEnginePage):
    def __init__(self, url):
        self.app = QApplication(sys.argv)
        QWebEnginePage.__init__(self)
        self.html = ''
        self.loadFinished.connect(self._on_load_finished)
        self.load(QUrl(url))
        self.app.exec_()

    def _on_load_finished(self):
        self.html = self.toHtml(self.Callable)
        print('Load finished')

    def Callable(self, html_str):
        self.html = html_str
        self.app.quit()


url = 'https://www.racingpost.com'

page = Page(url)
soup = BeautifulSoup(page.html, 'html.parser')
courses = soup.find_all('a', {'class': 'hidden-sm-down rh-cardsMatrix__courseTitle ui-link'})
for course in courses:
    CurrentCourse = course.text.strip()
    Courses_URL = url + course.get('href')
    response1 = requests.get(Courses_URL)
    soup1 = BeautifulSoup(response1.text, 'html.parser')
    horses = soup1.find_all('a', {'class': 'RC-runnerName'})
    for horse in horses:
        horses_url = url + horse.get('href')
        page1 = Page(horses_url)
        soup2 = BeautifulSoup(page1.html, 'html.parser')
        data = soup2.find_all('a', {'class': 'ui-link ui-link_table js-popupLink'})

你可以帮助我如何返工,或调整此代码,以获得正确的信息,我想要的


Tags: 代码fromimportselfappurldefhtml

热门问题