用PyPDF2去除PDF中的水印

from PyPDF2 import PdfFileReader, PdfFileWriter from PyPDF2.pdf import ContentStream from PyPDF2.generic import TextStringObject, NameObject from PyPDF2.utils import b_ >The watermark says SAMPLE on it so I've tried different capitalization cases wm_text = 'Sample' replace_with = '' >I'm hoping to just replace the SAMPLE watermark with nothing so a space could suffice > Load PDF into pyPDF source = PdfFileReader(open('input.pdf', "rb")) output = PdfFileWriter() > For each page for page in range(source.getNumPages()): # Get the current page and it's contents page = source.getPage(page) content_object = page["/Contents"].getObject() content = ContentStream(content_object, source) > Loop over all pdf elements for operands, operator in content.operations:

1条回答

网友

1楼 · 发布于 2024-10-02 02:37:01

使用这里问题中的代码是一个在Python3中工作的函数。在

def removeWatermark(wm_text, inputFile, outputFile):
    from PyPDF4 import PdfFileReader, PdfFileWriter
    from PyPDF4.pdf import ContentStream
    from PyPDF4.generic import TextStringObject, NameObject
    from PyPDF4.utils import b_

    with open(inputFile, "rb") as f:
        source = PdfFileReader(f, "rb")
        output = PdfFileWriter()

        for page in range(source.getNumPages()):
            page = source.getPage(page)
            content_object = page["/Contents"].getObject()
            content = ContentStream(content_object, source)

            for operands, operator in content.operations:
                if operator == b_("Tj"):
                    text = operands[0]

                    if isinstance(text, str) and text.startswith(wm_text):
                        operands[0] = TextStringObject('')

            page.__setitem__(NameObject('/Contents'), content)
            output.addPage(page)

        with open(outputFile, "wb") as outputStream:
            output.write(outputStream)

wm_text = 'wm_text'
inputFile = r'input.pdf'
outputFile = r"output.pdf"
removeWatermark(wm_text, inputFile, outputFile)

相关问题更多 >

编程相关推荐

热门问题

热门文章