在执行程序之前，在GUI中显示用户选择的文件名

import os import PyPDF2 import pandas import webbrowser import tkinter as tk from tkinter import ttk from tkinter import filedialog from nltk.tokenize import RegexpTokenizer from nltk.corpus import stopwords from nltk.stem import PorterStemmer from nltk.stem.wordnet import WordNetLemmatizer from nltk.tokenize import word_tokenize #----Functions----# #Method that a PDF that is read into the program goes through to eliminate any unwanted words or symbols# def preprocess(text): #Filters out punctuation from paragraph witch becomes tokenized to words and punctuation# tokenizer = RegexpTokenizer(r'\w+') result = tokenizer.tokenize(text) #Makes all words lowercase# words = [item.lower() for item in result] #Removes all remaining tokens that are not alphabetic# result = [word for word in words if word.isalpha()] #Imports stopwords to be removed from paragraph# stop_words = set(stopwords.words("english")) #Removes the stop words from the paragraph# filtered_sent = [] for w in result: if w not in stop_words: filtered_sent.append(w) #Return word to root word/chop-off derivational affixes# ps = PorterStemmer() stemmed_words = [] for w in filtered_sent: stemmed_words.append(ps.stem(w)) #Lemmatization, which reduces word to their base word, which is linguistically correct lemmas# lem = WordNetLemmatizer() lemmatized_words = ' '.join([lem.lemmatize(w,'n') and lem.lemmatize(w,'v') for w in filtered_sent]) #Re-tokenize lemmatized words string# tokenized_word = word_tokenize(lemmatized_words) return tokenized_word #Wraps two functions inside an object which allows both functions to use filename# class PDFSelector: #Creates global variable 'filename'# def __init(self): self.filename = "" #Allows user to select PDF to use in program# def select_PDF(self): #Opens file directory to select a file, and shows both folders and PDF files only# self.filename = filedialog.askopenfilename(initialdir = "/", title = "Select file", filetypes = (("pdf files", "*.pdf"), ("all files", "*.*"))) print(self.filename) #Method for PDF to run through to convert it into text, then print it out in a browser# def run_program(self): #Loads in PDF into program# PDF_file = open(self.filename, 'rb') read_pdf = PyPDF2.PdfFileReader(PDF_file) #Determines number of pages in PDF file and sets the document content to 'null'# number_of_pages = read_pdf.getNumPages() doc_content = "" #Extract text from the PDF file# for i in range(number_of_pages): page = read_pdf.getPage(0) page_content = page.extractText() doc_content += page_content #Turns the text drawn from the PDF file into data the remaining code can understand# tokenized_words = preprocess(doc_content) #Determine frequency of words tokenized + lemmatized text# from nltk.probability import FreqDist fdist = FreqDist(tokenized_words) final_list = fdist.most_common(len(fdist)) #Organize data into two columns and export the data to an html that automatically opens# df = pandas.DataFrame(final_list, columns = ["Word", "Frequency"]) df.to_html('word_frequency.html') webbrowser.open('file://' + os.path.realpath('word_frequency.html')) #----Main----# #Creates an instance of the wrapped functions to use the GUI# selector = PDFSelector() #Creats the GUI that will be used to select inputs# window = tk.Tk() window.geometry("385x130") window.resizable(0, 0) window.title("Word Frequency Program") #Code literally just to make the GUI look better# lblfilla = tk.Label(window, text = " ").grid(row = 0, column = 0) lblfillb = tk.Label(window, text = " ").grid(row = 0, column = 1) lblfillc = tk.Label(window, text = " ").grid(row = 0, column = 2) lblfilld = tk.Label(window, text = " ").grid(row = 0, column = 3) lblfille = tk.Label(window, text = " ").grid(row = 0, column = 4) lblfillf = tk.Label(window, text = " ").grid(row = 1, column = 0) lblfillg = tk.Label(window, text = " ").grid(row = 2, column = 0) lblfillh = tk.Label(window, text = " ").grid(row = 3, column = 0) lblfilli = tk.Label(window, text = " ").grid(row = 4, column = 0) #Just a simple label on the GUI# lbl1 = tk.Label(window, text = "File Selected: ").grid(row = 1, column = 1) #Label asking for input to determine number of words to be displayed in the data table# (NOT IMPLEMENTED YET) lbl2 = tk.Label(window, text = "Number of Words: ").grid(row = 2, column = 1) lbl2a = tk.Entry(window).grid(row = 2, column = 2, columnspan = 2) #Calls the select_PDF method to choose a PDF for the program to read# button1 = ttk.Button(window, text = "Select File", command = selector.select_PDF).grid(row = 1, column = 4) #Quits out of the program when certain button clicked# button2 = ttk.Button(window, text = "Quit", command = window.quit).grid(row = 3, column = 2) #Button to make the program execute# button3 = ttk.Button(window, text = "Run", command = selector.run_program).grid(row = 3, column = 3) lbl4 = tk.Label(window, text = selector.filename).grid(row = 1, column = 2) window.mainloop() window.destroy()

1条回答

网友

1楼 · 发布于 2024-05-06 01:54:51

我建议将标签变量textvariable中的lbl1设置为StringVar。然后您可以在select_PDF()中设置变量，并让tk为您处理更新GUI

看这个answer

相关问题更多 >

编程相关推荐

热门问题

热门文章