[Python][Adobe Acrobat]将PDF另存为Excel问题

2024-09-30 01:25:40 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在使用adobe acrobat Pro和Python将PDF转换为Excel。最近,该程序可以打开adobe acrobat,但可以将pdf保存为excel文件。Acrobat弹出一条错误消息

The specified file could not be written to.It may be in use.

我不知道是什么问题


import win32com.client, win32com.client.makepy, os, winerror, pandas as pd, errno, re
from win32com.client.dynamic import ERRORS_BAD_CONTEXT



class Acrobat_scan_automation:


    def __init__(self):
        self.directory_path = r'D:\Desktop\Desktop_2\1.Python_project\ACROBAT_SCAN_AUTOMATION'
        os.chdir(self.directory_path)
        self.folder_name = {
            "output": r'output',
            "source": r'source',
            }
        self.excel_file = r'output.xlsx'
        self.output_csv = r'output_1.csv'
        self.pdf_file = r'arabic.pdf'


        return

    def pdf2excel(self,pdf_file,excel_file):
        try:
            ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)
            src = os.path.abspath(os.path.join(self.folder_name['source'],pdf_file))
            print('[INFO] abspath=',src)
            win32com.client.makepy.GenerateFromTypeLibSpec('Acrobat')
            adobe = win32com.client.DispatchEx('AcroExch.App')
            avDoc = win32com.client.DispatchEx('AcroExch.AVDoc')
            print('[INFO] Open',src)
            avDoc.Open(src, src)
            pdDoc = avDoc.GetPDDoc()
            print('[INFO] pdDoc=',pdDoc)
            jsObject = pdDoc.GetJSObject()
            print('[INFO] jObject=',jsObject)
            export_file = os.path.abspath(os.path.join(self.folder_name['output'],excel_file))
            jsObject.SaveAs(export_file, 'com.adobe.acrobat.xlsx')   # < ---- Error

        except Exception as e:
            print(str(e))

        finally:        
            # avDoc.Close(True)
            jsObject = None
            pdDoc = None
            avDoc = None
            return

    def main(self):
        print('[INFO] Start using acrobat')
        self.pdf2excel(self.pdf_file, self.excel_file)
        return


if __name__ == '__main__':
    Program = Acrobat_scan_automation()
    Program.main()

enter image description here


Tags: pathselfinfosrcclientoutputpdfos

热门问题