在beautiful soup中将数据刮取并保存到csv中

from bs4 import BeautifulSoup import urllib.request urls=['https://www.agtta.co.in/individuals.php'] for url in urls: req = urllib.request.Request( url, headers={ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' } ) resp= urllib.request.urlopen(req) soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'),features='html.parser') scrape_data = soup.find('section', class_='b-branches') to_list = scrape_data .find_all_next(string=True)

2条回答

网友

1楼 · 编辑于 2024-05-06 04:14:48

以下是执行此操作的完整代码：

from bs4 import BeautifulSoup
import requests
import pandas as pd

headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
        }
r = requests.get('https://www.agtta.co.in/individuals.php',headers = headers).text

soup = BeautifulSoup(r,'html5lib')

sections = soup.find_all('section',class_ = "b-branches")

names = []
phone_numbers = []
emails = []

for section in sections:
    name = section.h3.text
    names.append(name)
    phone_number = section.p.text
    phone_number = phone_number.split('Mobile No ')[1]
    phone_numbers.append(phone_number)
    try:
        email = section.find_all('div')[3].text
        email = email.split('Email ')[1]
        emails.append(email)
    except:
        emails.append(None)

details_dict = {"Names":names,
                "Phone Numbers":phone_numbers,
                "Emails":emails}
df = pd.DataFrame(details_dict)
df.to_csv("Details.csv",index = False)

输出：

希望这有帮助

网友

2楼 · 编辑于 2024-05-06 04:14:48

from bs4 import BeautifulSoup
import urllib.request
import pandas as pd


urls=['https://www.agtta.co.in/individuals.php']
headers = {
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
    }
for url in urls:
    req = urllib.request.Request(url, headers=headers)
    resp= urllib.request.urlopen(req)
    soup = BeautifulSoup(resp, from_encoding=resp.info().get_param('charset'),features='html.parser')
    result = []
    for individual in soup.findAll("section", {"class": "b-branches"}):
        name = individual.h3.text
        phone_data = individual.find('p')
        phone = phone_data.text.replace("Mobile No","").strip() if phone_data else ""
        email_data = individual.select('div:contains("Email")')
        email = email_data[0].text.replace("Email","").strip() if email_data else ""
        result.append({"Name":name, "Phone": phone, "Email":email})
    output = pd.DataFrame(result)
    output.to_csv("Details.csv",index = False)

相关问题更多 >

编程相关推荐

热门问题

热门文章