Python TextBlob翻译问题

import nltk nltk.download('stopwords') from nltk import word_tokenize from nltk.corpus import stopwords import string from textblob import TextBlob, Word import urllib.request from bs4 import BeautifulSoup response = urllib.request.urlopen('https://es.wikipedia.org/wiki/Valencia') html = response.read() soup = BeautifulSoup(html,'html5lib') text = soup.get_text(strip = True) tokens = word_tokenize(text) tokens = [w.lower() for w in tokens] table = str.maketrans('', '', string.punctuation) stripped = [w.translate(table) for w in tokens] words = [word for word in stripped if word.isalpha()] stop_words = set(stopwords.words('spanish')) words = [w for w in words if not w in stop_words] with open('palabras.txt', 'w') as f: for word in words: f.write(" " + word) with open('palabras.txt', 'r') as myfile: texto=myfile.read().replace('\n', '') textFinal=TextBlob(texto) print (textFinal.sentiment) freq = nltk.FreqDist(words) freq.plot(20, cumulative=False)

1条回答

网友

1楼 · 发布于 2024-09-30 04:38:57

看看包langdetect。您可以检查输入的页面的语言，如果页面语言与翻译语言匹配，则跳过翻译。如下所示：

import string
import urllib.request

import nltk
from bs4 import BeautifulSoup
from langdetect import detect
from nltk import word_tokenize
from nltk.corpus import stopwords
from textblob import TextBlob, Word

nltk.download("stopwords")
# nltk.download("punkt")

response = urllib.request.urlopen("https://es.wikipedia.org/wiki/Valencia")
html = response.read()

soup = BeautifulSoup(html, "html5lib")
text = soup.get_text(strip=True)
lang = detect(text)

tokens = word_tokenize(text)
tokens = [w.lower() for w in tokens]

table = str.maketrans("", "", string.punctuation)
stripped = [w.translate(table) for w in tokens]
words = [word for word in stripped if word.isalpha()]

stop_words = set(stopwords.words("spanish"))

words = [w for w in words if w not in stop_words]

with open("palabras.txt", "w", encoding="utf-8") as f:
    for word in words:
        f.write(" " + word)

with open("palabras.txt", "r", encoding="utf-8") as myfile:
    texto = myfile.read().replace("\n", "")


textFinal = TextBlob(texto)

translate_to = "es"
if lang != translate_to:
    textFinal = textFinal.translate(to=translate_to)

print(textFinal.sentiment)

freq = nltk.FreqDist(words)

freq.plot(20, cumulative=False)

相关问题更多 >

编程相关推荐

热门问题

热门文章