擅长:python、mysql、java
<p>通过r/learnpython解决方案</p>
<pre><code>SCRIPT_DIR = Path(sys.executable).parent
def getDataFromPdf():
pdf_file = SCRIPT_DIR / 'records.pdf'
print(pdf_file.resolve())
with open(pdf_file.resolve(),'rb') as records:
acctNumberRegex = re.compile(r'\d\d\d\d\d-\d\d\d-\d\d\d\d')
reader = PyPDF2.PdfFileReader(records)
for pageNum in range(0,reader.numPages):
page = reader.getPage(pageNum).extractText()
accounts = acctNumberRegex.findall(page)
for acct in accounts:
if acct not in results:
results.append(acct)
print(str(len(results)) + " account numbers pulled from PDF")
</code></pre>