使用beautifulsoup查找段落标记中的唯一单词数

import urllib def getLength(url): r=urllib.request.urlopen(url).read() soup = BeautifulSoup(r, 'html.parser') links = soup.find_all('p') k=[] for p in links: if not p.find('a'): pText = p.get_text() k=k.append(pText) k=k.lower() translator=str.translate(str.maketrans('','',string.punctuation)) k=k.translate(translator) #missing code getLength("https://en.wikipedia.org/wiki/Google")

import urllib def getLength(url): r=urllib.request.urlopen(url).read() soup = BeautifulSoup(r, 'html.parser') links = soup.find_all('p') for p in links: pText = p.get_text() pText=pText.lower() transpText=pText.translate(pText.maketrans('','',string.punctuation)) print(transpText) newdata=transpText.split() length=len(newdata) return length getLength("https://en.wikipedia.org/wiki/Google")

1条回答

网友

1楼 · 发布于 2024-05-02 13:38:32

import numpy as np
import urllib
def getLength(url):
    r=urllib.request.urlopen(url).read()
    soup = BeautifulSoup(r, 'html.parser')
    links = soup.find_all('p')
    k=[]
for p in links:
    pText = p.get_text()
    pText=pText.lower()
    transpText=pText.translate(pText.maketrans('','',string.punctuation))
    newdata=transpText.split()
    k += newdata
n=np.unique(k)
return len(n)
getLength("https://en.wikipedia.org/wiki/Google")

在尝试了多次之后…这段代码就是我遇到的，它似乎可以正确地用于各种测试用例

相关问题更多 >

编程相关推荐

热门问题

热门文章