使用pywin32控制Adobe Acrob时出现“未实现”异常

2条回答

网友

1楼 · 编辑于 2024-09-30 01:23:43

Blish，this thread包含您要查找的解决方案的密钥：https://mail.python.org/pipermail/python-win32/2002-March/000260.html

我承认上面的帖子不是最容易找到的（可能是因为谷歌根据内容的年龄得分很低？）。在

具体地说，应用建议的this piece将为您运行：https://mail.python.org/pipermail/python-win32/2002-March/000265.html

作为参考，完整的一段代码，不需要您手动修补动态.py（代码片段应该是开箱即用的）：

# gets all files under ROOT_INPUT_PATH with FILE_EXTENSION and tries to extract text from them into ROOT_OUTPUT_PATH with same filename as the input file but with INPUT_FILE_EXTENSION replaced by OUTPUT_FILE_EXTENSION
from win32com.client import Dispatch
from win32com.client.dynamic import ERRORS_BAD_CONTEXT

import winerror

# try importing scandir and if found, use it as it's a few magnitudes of an order faster than stock os.walk
try:
    from scandir import walk
except ImportError:
    from os import walk

import fnmatch

import sys
import os

ROOT_INPUT_PATH = None
ROOT_OUTPUT_PATH = None
INPUT_FILE_EXTENSION = "*.pdf"
OUTPUT_FILE_EXTENSION = ".txt"

def acrobat_extract_text(f_path, f_path_out, f_basename, f_ext):
    avDoc = Dispatch("AcroExch.AVDoc") # Connect to Adobe Acrobat

    # Open the input file (as a pdf)
    ret = avDoc.Open(f_path, f_path)
    assert(ret) # FIXME: Documentation says "-1 if the file was opened successfully, 0 otherwise", but this is a bool in practise?

    pdDoc = avDoc.GetPDDoc()

    dst = os.path.join(f_path_out, ''.join((f_basename, f_ext)))

    # Adobe documentation says "For that reason, you must rely on the documentation to know what functionality is available through the JSObject interface. For details, see the JavaScript for Acrobat API Reference"
    jsObject = pdDoc.GetJSObject()

    # Here you can save as many other types by using, for instance: "com.adobe.acrobat.xml"
    jsObject.SaveAs(dst, "com.adobe.acrobat.accesstext")

    pdDoc.Close()
    avDoc.Close(True) # We want this to close Acrobat, as otherwise Acrobat is going to refuse processing any further files after a certain threshold of open files are reached (for example 50 PDFs)
    del pdDoc

if __name__ == "__main__":
    assert(5 == len(sys.argv)), sys.argv # <script name>, <script_file_input_path>, <script_file_input_extension>, <script_file_output_path>, <script_file_output_extension>

    #$ python get.txt.from.multiple.pdf.py 'C:\input' '*.pdf' 'C:\output' '.txt'

    ROOT_INPUT_PATH = sys.argv[1]
    INPUT_FILE_EXTENSION = sys.argv[2]
    ROOT_OUTPUT_PATH = sys.argv[3]
    OUTPUT_FILE_EXTENSION = sys.argv[4]

    # tuples are of schema (path_to_file, filename)
    matching_files = ((os.path.join(_root, filename), os.path.splitext(filename)[0]) for _root, _dirs, _files in walk(ROOT_INPUT_PATH) for filename in fnmatch.filter(_files, INPUT_FILE_EXTENSION))

    # patch ERRORS_BAD_CONTEXT as per https://mail.python.org/pipermail/python-win32/2002-March/000265.html
    global ERRORS_BAD_CONTEXT
    ERRORS_BAD_CONTEXT.append(winerror.E_NOTIMPL)

    for filename_with_path, filename_without_extension in matching_files:
        print "Processing '{}'".format(filename_without_extension)
        acrobat_extract_text(filename_with_path, ROOT_OUTPUT_PATH, filename_without_extension, OUTPUT_FILE_EXTENSION)

我已经在winpythonx64 2.7.6.3，acrobatxpro上测试过

网友

2楼 · 编辑于 2024-09-30 01:23:43

在makepy.py是win32com python包附带的脚本。在

运行它进行安装，将python“连接”到Windows中的COM/OLE对象中。下面是一些代码的节选，我曾经与Excel交谈并在其中做一些事情。此示例获取当前工作簿中工作表1的名称。如果出现异常，它将自动运行makepy：

import win32com;
import win32com.client;
from win32com.client import selecttlb;

def attachExcelCOM():
   makepyExe = r'python C:\Python25\Lib\site-packages\win32com\client\makepy.py';
   typeList = selecttlb.EnumTlbs();
   for tl in typeList:
      if (re.match('^Microsoft.*Excel.*', tl.desc, re.IGNORECASE)):
          makepyCmd = "%s -d \"%s\"" % (makepyExe, tl.desc);
          os.system(makepyCmd);
      # end if
   # end for
# end def

def getSheetName(sheetNum):
   try:
      xl = win32com.client.Dispatch("Excel.Application");
      wb = xl.Workbooks.Item(sheetNum);
   except Exception, detail:
      print 'There was a problem attaching to Excel, refreshing connect config...';
      print Exception, str(detail);
      attachExcelCOM();
      try:
         xl = win32com.client.Dispatch("Excel.Application");
         wb = xl.Workbooks.Item(sheetNum);
      except:
         print 'Could not attach to Excel...';
         sys.exit(-1);
      # end try/except
   # end try/except

   wsName = wb.Name;
   if (wsName == 'PERSONAL.XLS'):
      return( None );
   # end if
   print 'The target worksheet is:';
   print '      ', wsName;
   print 'Is this correct? [Y/N]',;
   answer = string.strip( sys.stdin.readline() );
   answer = answer.upper();
   if (answer != 'Y'):
      print 'Sheet not identified correctly.';
      return(None);
   # end if
   return( (wb, wsName) );
# end def

#   Main  
sheetInfo = getSheetName(sheetNum);
if (sheetInfo == None):
   print 'Sheet not found';
   sys.exit(-1);
else:
   (wb, wsName) = sheetInfo;
# end if

相关问题更多 >

编程相关推荐

热门问题

热门文章

使用pywin32控制Adobe Acrob时出现“未实现”异常

相关问题 更多 >

编程相关推荐

热门问题

热门文章

相关问题更多 >