beautifulsoup web爬网搜索id lis

import requests from bs4 import BeautifulSoup def get_html(url): """get the content of the url""" response = requests.get(url) response.encoding = 'utf-8' return response.text def get_pmid(html): soup = BeautifulSoup(html, 'lxml') for texts in soup.select('body'): text = texts.get_text() print text url_ncbi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=%22D-PANTOTHENIC+ACID%22&retmax=2000&usehistory=y&field=Title/Abstracts" html = get_html(url_ncbi) get_pmid(html)

1条回答

网友

1楼 · 发布于 2024-06-14 09:22:39

要获取所有ID标记，可以使用find_all()函数：

import requests
from bs4 import BeautifulSoup  


def get_html(url):  
  """get the content of the url"""  
  response = requests.get(url)
  response.encoding = 'utf-8'  
  return response.text  


def get_pmid(html):   
    soup = BeautifulSoup(html, 'lxml') 
    rv = []
    for id_tag in soup.find_all('id'):
        rv.append(id_tag.text)
    return rv

url_ncbi = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=%22D-PANTOTHENIC+ACID%22&retmax=2000&usehistory=y&field=Title/Abstracts"  
html = get_html(url_ncbi)
all_ids = get_pmid(html)
print(all_ids)

印刷品：

['29737393', '29209902', '24632028', '23727638', '22536244', '22052867', '15371742', '12204559', '10885798', '16348362', '3096335', '3734807', '6247641', '6997858', '761345', '108510', '355840', '1003285', '4676550', '5804470', '6076800', '6076775', '6012920', '14091285']

相关问题更多 >

编程相关推荐

热门问题

热门文章