
2024-10-01 04:50:55 发布

您现在位置:Python中文网/ 问答频道 /正文


TypeError                                 Traceback (most recent call last)
<ipython-input-36-4ae1bb3ffd5e> in <module>
----> 1 text2wordlengthPDF("R095-Big-data-vrije-veilige-samenleving.txt")

<timed exec> in text2wordlengthPDF(text)

TypeError: cannot unpack non-iterable int object


def text2wordlengthPDF(text):
    '''Read in the text document `text`, tokenize it using re.split and regex \W+, and create 
    the histogram of wordlenghts using the Counter method. Return this histogram. 
    The histogram is a dict showing for each wordlength how many words with that length are in the input text.'''

    #.read() is a way to retrieve strings from file object
    tokens = re.split(r'\W+', open(text, "r").read())
    tokens_counter = Counter(tokens)

    # create list of wordlength for items in Counter
    wordlength = list(dict.fromkeys([len(w) for w in tokens_counter ]))

    # Create dictionary with wordlength as key and occurrence as value
    dict_histogram = {i:0 for i in wordlength}
    for k,v in dict_histogram.items():
        if (k == len(w) for w in tokens_counter):
            k[v] = +1


# run and plot    
#pdf= text2wordlengthPDF(linktopdf())
#pdfS= pd.Series(pdf).sort_index()

#pdfS[pdfS>5].plot(kind='bar' ) #plot only the wordlenghts occurring more then 5 times.
#print(pdf) ```

#This is where I run my code with the input text

Tags: andthetextinforinputiswith
1楼 · 发布于 2024-10-01 04:50:55


for k,v in dict_histogram.items():
    if (k == len(w) for w in tokens_counter):
        k[v] = +1

毫无意义k,关键字(每个单词的长度)不是字典。(此外,您可能是指k[v] += 1。)


for k,v in dict_histogram.items():
    if (k == len(w) for w in tokens_counter):
        dict_histogram[k] += v



import re
from collections import Counter

def text2wordlengthPDF(text):
    '''Read in the text document `text`, tokenize it using re.split and regex \W+, and create 
    the histogram of wordlenghts using the Counter method. Return this histogram. 
    The histogram is a dict showing for each wordlength how many words with that length are in the input text.'''

    #.read() is a way to retrieve strings from file object
    tokens = re.split(r'\W+', open(text, "r", encoding="utf8").read())
    tokens_counter = Counter(tokens)

    # create list of wordlength for items in Counter
    wordlength = list(dict.fromkeys([len(w) for w in tokens_counter ]))

    # Create dictionary with wordlength as key and occurrence as value
    dict_histogram = {i:0 for i in wordlength}
    for key,occurrence in tokens_counter.items():
        dict_histogram[len(key)] += occurrence

    pprint.pprint(sorted(dict_histogram.items()), compact=True)



[(0, 2), (1, 57262), (2, 54080), (3, 95251), (4, 132448), (5, 29969),
 (6, 62938), (7, 46593), (8, 23929), (9, 14645), (10, 12943), (11, 10708),
 (12, 2940), (13, 2742), (14, 1807), (15, 827), (16, 312), (17, 17965),
 (18, 91), (19, 118), (20, 147), (21, 24), (22, 35), (23, 7), (24, 13), (25, 1),
 (26, 24), (28, 1), (29, 24), (34, 1)]


相关问题 更多 >