import sys
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
from PyQt5.QtWebEngineWidgets import QWebEngineView
from bs4 import BeautifulSoup
class Render(QWebEngineView):
def __init__(self, url):
self.html = None
self.first_pass = True
self.app = QApplication(sys.argv)
QWebEngineView.__init__(self)
self.loadFinished.connect(self._load_finished)
self.load(QUrl(url))
self.app.exec_()
def _load_finished(self, result):
if self.first_pass:
self._first_finished()
self.first_pass = False
else:
self._second_finished()
def _first_finished(self):
self.page().runJavaScript("document.getElementById('ctl00_txt_stock_code').value = '5';")
self.page().runJavaScript("document.getElementById('ctl00_sel_DateOfReleaseFrom_y').value='1999';")
self.page().runJavaScript("preprocessMainForm();")
self.page().runJavaScript("document.forms[0].submit();")
def _second_finished(self):
self.page().toHtml(self.callable)
def callable(self, data):
self.html = data
self.app.quit()
url = "http://www3.hkexnews.hk/listedco/listconews/advancedsearch/search_active_main.aspx"
web = Render(url)
soup = BeautifulSoup(web.html, 'html.parser')
for news in soup.find_all(class_ = 'news'):
print(news.text)
输出:
Voting Rights and Capital
Next Day Disclosure Return
NOTICE OF REDEMPTION AND CANCELLATION OF LISTING
THIRD INTERIM DIVIDEND FOR 2018
Notification of Transactions by Persons Discharging Managerial Responsibilities
Next Day Disclosure Return
THIRD INTERIM DIVIDEND FOR 2018
Monthly Return of Equity Issuer on Movements in Securities for the month ended 31 October 2018
Voting Rights and Capital
PUBLICATION OF BASE PROSPECTUS SUPPLEMENT
3Q 2018 EARNINGS RELEASE AUDIO WEBCAST AND CONFERENCE CALL
3Q EARNINGS RELEASE - HIGHLIGHTS
Scrip Dividend Circular
2018 Third Interim Dividend; Scrip Dividend
THIRD INTERIM DIVIDEND FOR 2018 SCRIP DIVIDEND ALTERNATIVE
NOTIFICATION OF MAJOR HOLDINGS
EARNINGS RELEASE FOR THIRD QUARTER 2018
NOTIFICATION OF MAJOR HOLDINGS
Monthly Return of Equity Issuer on Movements in Securities for the month ended 30 September 2018
THIRD INTERIM DIVIDEND FOR 2018; DIVIDEND ON PREFERENCE SHARES
您有多种选择:
1)你可以用硒。首先安装Selenium。在
然后获取一个驱动程序https://sites.google.com/a/chromium.org/chromedriver/downloads(根据您的操作系统,您可能需要指定驱动程序的位置)
^{pr2}$2)或将PyQt与QWebEngineView一起使用。在
在Ubuntu上安装PyQt:
^{3}$或者在其他操作系统上(64位版本的Python)
基本上你在第一页加载表单。通过运行JavaScript填写表单,然后提交。loadFinished()信号被调用了两次,第二次是因为您提交了表单,以便可以使用if语句来区分调用。在
输出:
或者,您可以使用Scrapy splashhttps://github.com/scrapy-plugins/scrapy-splash
或请求HTMLhttps://html.python-requests.org/。在
但我不确定你将如何使用最后两种方法填写表格。在
更新了如何阅读下一页:
相关问题 更多 >
编程相关推荐