在执行程序之前,在GUI中显示用户选择的文件名

2024-05-06 01:54:51 发布

您现在位置:Python中文网/ 问答频道 /正文

在我的程序中,我试图让GUI显示我选择的PDF的名称,然后让程序实际执行它应该对该PDF执行的操作。现在,个人可以打开一个文件目录,选择一个文件,然后在GUI中按run按钮执行程序,但是用户看不到他们选择的文件的名称

我尝试过放置一个简单的“print”函数,但似乎不起作用,还添加了“selector.filename”ato lbl1,并将其作为自己的标签放置在不同的区域,包括几乎在所有代码的末尾,但是似乎不起作用

import os
import PyPDF2
import pandas
import webbrowser
import tkinter as tk
from tkinter import ttk
from tkinter import filedialog
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.tokenize import word_tokenize

#----Functions----#

#Method that a PDF that is read into the program goes through to eliminate any unwanted words or symbols#
def preprocess(text):
    #Filters out punctuation from paragraph witch becomes tokenized to words and punctuation#
    tokenizer = RegexpTokenizer(r'\w+')
    result = tokenizer.tokenize(text)

    #Makes all words lowercase#
    words = [item.lower() for item in result]

    #Removes all remaining tokens that are not alphabetic#
    result = [word for word in words if word.isalpha()]

    #Imports stopwords to be removed from paragraph#
    stop_words = set(stopwords.words("english"))

    #Removes the stop words from the paragraph#
    filtered_sent = []
    for w in result:
        if w not in stop_words:
            filtered_sent.append(w)

    #Return word to root word/chop-off derivational affixes#
    ps = PorterStemmer()
    stemmed_words = []
    for w in filtered_sent:
        stemmed_words.append(ps.stem(w))

    #Lemmatization, which reduces word to their base word, which is linguistically correct lemmas#
    lem = WordNetLemmatizer()
    lemmatized_words = ' '.join([lem.lemmatize(w,'n') and lem.lemmatize(w,'v') for w in filtered_sent])

    #Re-tokenize lemmatized words string#
    tokenized_word = word_tokenize(lemmatized_words)
    return tokenized_word

#Wraps two functions inside an object which allows both functions to use filename#
class PDFSelector:
    #Creates global variable 'filename'#
    def __init(self):
        self.filename = ""

    #Allows user to select PDF to use in program#
    def select_PDF(self):
        #Opens file directory to select a file, and shows both folders and PDF files only#
        self.filename = filedialog.askopenfilename(initialdir = "/", title = "Select file", filetypes = (("pdf files", "*.pdf"), ("all files", "*.*")))
        print(self.filename)

    #Method for PDF to run through to convert it into text, then print it out in a browser#
    def run_program(self):    
        #Loads in PDF into program#
        PDF_file = open(self.filename, 'rb')
        read_pdf = PyPDF2.PdfFileReader(PDF_file)

        #Determines number of pages in PDF file and sets the document content to 'null'#
        number_of_pages = read_pdf.getNumPages()
        doc_content = ""

        #Extract text from the PDF file#
        for i in range(number_of_pages):
            page = read_pdf.getPage(0)
            page_content = page.extractText()
            doc_content += page_content

        #Turns the text drawn from the PDF file into data the remaining code can understand#
        tokenized_words = preprocess(doc_content)

        #Determine frequency of words tokenized + lemmatized text#
        from nltk.probability import FreqDist
        fdist = FreqDist(tokenized_words)
        final_list = fdist.most_common(len(fdist))

        #Organize data into two columns and export the data to an html that automatically opens#
        df = pandas.DataFrame(final_list, columns = ["Word", "Frequency"])
        df.to_html('word_frequency.html')
        webbrowser.open('file://' + os.path.realpath('word_frequency.html'))      

#----Main----#

#Creates an instance of the wrapped functions to use the GUI#        
selector = PDFSelector()

#Creats the GUI that will be used to select inputs#
window = tk.Tk()
window.geometry("385x130")
window.resizable(0, 0)
window.title("Word Frequency Program")

#Code literally just to make the GUI look better#
lblfilla = tk.Label(window, text = "   ").grid(row = 0, column = 0)
lblfillb = tk.Label(window, text = "   ").grid(row = 0, column = 1)
lblfillc = tk.Label(window, text = "   ").grid(row = 0, column = 2)
lblfilld = tk.Label(window, text = "   ").grid(row = 0, column = 3)
lblfille = tk.Label(window, text = "   ").grid(row = 0, column = 4)
lblfillf = tk.Label(window, text = "   ").grid(row = 1, column = 0)
lblfillg = tk.Label(window, text = "   ").grid(row = 2, column = 0)
lblfillh = tk.Label(window, text = "   ").grid(row = 3, column = 0)
lblfilli = tk.Label(window, text = "   ").grid(row = 4, column = 0)

#Just a simple label on the GUI#
lbl1 = tk.Label(window, text = "File Selected: ").grid(row = 1, column = 1)

#Label asking for input to determine number of words to be displayed in the data table# (NOT IMPLEMENTED YET)
lbl2 = tk.Label(window, text = "Number of Words: ").grid(row = 2, column = 1)
lbl2a = tk.Entry(window).grid(row = 2, column = 2, columnspan = 2)

#Calls the select_PDF method to choose a PDF for the program to read#
button1 = ttk.Button(window, text = "Select File", command = selector.select_PDF).grid(row = 1, column = 4)

#Quits out of the program when certain button clicked#
button2 = ttk.Button(window, text = "Quit", command = window.quit).grid(row = 3, column = 2)

#Button to make the program execute#
button3 = ttk.Button(window, text = "Run", command = selector.run_program).grid(row = 3, column = 3)

lbl4 = tk.Label(window, text = selector.filename).grid(row = 1, column = 2)


window.mainloop()
window.destroy()

从现在起,在选择PDF文件后,它应该显示在GUI中“file selected:”旁边


Tags: thetotextinfromimportpdfcolumn