擅长:python、mysql、java
<p>您可以阅读PDF并在页面中搜索您要查找的内容:</p>
<pre class="lang-py prettyprint-override"><code># pip install pyPDF2
import io
import requests
import PyPDF2
URI = "https://www.environment.gov.au/system/files/resources/7f15bfc1-ed3d-40b6-a177-c81349028ef6/files/aust-national-guidelines-whale-dolphin-watching-2017.pdf"
r = requests.get(URI)
with io.BytesIO(r.content) as f:
reader = PyPDF2.PdfFileReader(f)
num_pages = reader.numPages
data = []
# place page text to data
for page in range(num_pages):
page_data = reader.getPage(page)
data.append(page_data.extractText())
# look up
search_words = set(["orca", "killer whale", "humpback"])
# get pages containing your lookup
wanted_page = []
for page_contents in data:
for word in search_words:
if word in page_contents.lower():
wanted_page.append(page_contents)
print(wanted_page)
</code></pre>