以10为基数的int()的无效文本到底意味着什么?

2024-09-27 07:24:10 发布

您现在位置:Python中文网/ 问答频道 /正文

我正在写一个电影情感分析,在代码中我得到一个错误,那就是:以10为基数的int()的文本无效。代码读取一个单独的文本文件,其中包含电影评论及其分数。这部电影很棒。谢谢你的帮助!编辑:错误出现在这里:第38行score=int(lineSplits[0].strip())

import re
class WordStatistic:
    def __init__(self, keyword, averageScore = 0, occurences = 0):
        self.keyword = keyword
        self.averageScore = averageScore
        self.occurences = occurences

    def getWord(self) :
        return self.keyword

    def getAverageScore(self) :
        return self.averageScore

    def getOccurences(self) :
        return self.occurences

    def addNewScore(self, newScore) :
        oldScoreSum = self.averageScore * self.occurences
        self.occurences = self.occurences + 1
        self.averageScore = (oldScoreSum + newScore) / (self.occurences)

    def printWordStatistic(self) :
           print ("Word          : ", self.keyword)
           print ("Occurences    : ", self.occurences)
           print ("Average Score : ", self.occurences, "\n\n")
# "teaching" the code
wordDictionary = {}
fileInstance = open("movieReviews.txt",'r')
fileText = fileInstance.read()

# formatting and splitting 
reviewSplits = fileText.split("movieReviews")
for review in reviewSplits :
        review = review.strip()
        if review == "" :
            continue
        lineSplits = review.split("\n")
        score = int(lineSplits[0].strip())
        for i in range(1, len(lineSplits)) :
            wordSplits = re.split("\t| ", lineSplits[i])
            for word in wordSplits :
                if word == "" :
                    continue
                # If it is already present, then update the score and count
                # Otherwise just add the new entry to the dictionary
                if wordDictionary in(word) :
                    wordStatistic = wordDictionary.get(word)
                    wordStatistic.addNewScore(score)
                else :
                    wordStatistic = WordStatistic(word, score, 1)
                    wordDictionary[word] = wordStatistic
# print the stats of the words
def printAllWordStatistic(wordDictionary) :
    for wordStatistic in wordDictionary.values() :
        wordStatistic.printWordStatistic()
# rating the actual review
def calculateAverageOfReview(review) :
    review.replace("\t", " ")
    review.replace("\n", " ")
    wordSplits = review.split(" ")
    averageScore = 0.0
    totalCount = 0;
    for word in wordSplits :
        if wordDictionary in (word) :
            averageScore += wordDictionary.get(word).getAverageScore()
            totalCount = totalCount + 1
    if totalCount != 0 :
        return averageScore / totalCount
    return -1
# getting user input and append multi lines of case of multi line review
while (True) :
    print ("\nEnter a review : ");
    multiLines = []
    while True:
        line = input()
        if line:
            multiLines.append(line)
        else:
            break
    inputReview = '\n'.join(multiLines)
    averageScore = calculateAverageOfReview(inputReview)
    if averageScore != -1 :
        if averageScore >= 2.50 :
            print ("Positive Review");
        else :
            print ("Negative Review");
    else :
        print ("Unable to rate the review");
    if input("\nDo you want to continue ? (Y/N) : ") != "Y" :
        print ("Quitting the session.");
        exit()

Tags: theinselfifdefkeywordreviewword
1条回答
网友
1楼 · 发布于 2024-09-27 07:24:10

这意味着int不知道如何处理0-9以外的字符。如果要从中提取数字的任意字符串,可以使用regex,因此不必:

score = int(lineSplits[0].strip())

有点像

^{pr2}$

这将获取第一组数字。在

相关问题 更多 >

    热门问题