如何编辑此代码以在循环中导入CSV,而不是在循环结束时导入?

2024-09-27 00:12:25 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个使用熊猫创建数据帧的代码,然后将结果输出到csv。我想让它做的是在csv中导入循环的每个迭代,这样,如果出现错误,例如连接丢失,我仍然可以得到一些结果

import requests
from googlesearch import search 
import csv
import pandas
from bs4 import BeautifulSoup
import numpy as np
import os
from datetime import datetime
import time
import os


start_time = time.time()
emptyWebPageSet = []
emptySetTitle = []
emptysetGenre = []
infoSet = []
date = []
colnames = ['title']
data = pandas.read_csv('D:/Desktop/imdbWebScrape/mediaDataForGenreScrape.csv', names=colnames, header=None)
my_list = data["title"]
my_list = list(my_list)
my_list = my_list[1:]
length = len(my_list)
for film in my_list:
    filmIndex = my_list.index(film) + 1
    query = film + " imdb"
    for j in search(query, tld="co.in", num=10, stop=1, pause=2):

        page = requests.get(j)
        response = page.status_code
        if response == 200:
            soup = BeautifulSoup(page.content, "lxml")
            genreData = soup.find_all("div",{"class":"subtext"})
            summaryText = soup.find("div", {"class":"summary_text"})
            summaryText = summaryText.string
            infoSet.append(summaryText)
            filmtitle = soup.find("h1")
            filmtitle = filmtitle.contents[0].strip() 
            emptySetTitle.append(filmtitle)
            links = []
            genres = []
            for h in genreData:
                a = h.find_all('a')
                aLength = len(a)
                a1 = a[0]
                for b in range(0,aLength - 1):
                    r = a[b].string
                    genres.append(r)
                print (str(filmIndex) + " " + str(filmtitle))
                emptysetGenre.append(genres)
        emptyWebPageSet.append(j)
lst1 = [item[0] for item in emptysetGenre]
lst2 = [i[1] if len(i) > 1 else '' for i in emptysetGenre]
df = pandas.DataFrame({"imdbPage": emptyWebPageSet,
                       "title": emptySetTitle,
                       "genre1": lst1,
                       "info":infoSet
                       })
df.to_csv("movieDetails.csv", encoding='utf-8', index=False)

Tags: csvinfromimportpandasfortimemy
1条回答
网友
1楼 · 发布于 2024-09-27 00:12:25

最后一部分

import requests
from googlesearch import search 
import csv
import pandas
from bs4 import BeautifulSoup
import numpy as np
import os
from datetime import datetime
import time
import os


start_time = time.time()
emptyWebPageSet = []
emptySetTitle = []
emptysetGenre = []
infoSet = []
date = []
colnames = ['title']
data = pandas.read_csv('D:/Desktop/imdbWebScrape/mediaDataForGenreScrape.csv', names=colnames, header=None)
my_list = data["title"]
my_list = list(my_list)
my_list = my_list[1:]
length = len(my_list)
for film in my_list:
    filmIndex = my_list.index(film) + 1
    query = film + " imdb"
    for j in search(query, tld="co.in", num=10, stop=1, pause=2):

        page = requests.get(j)
        response = page.status_code
        if response == 200:
            soup = BeautifulSoup(page.content, "lxml")
            genreData = soup.find_all("div",{"class":"subtext"})
            summaryText = soup.find("div", {"class":"summary_text"})
            summaryText = summaryText.string
            infoSet.append(summaryText)
            filmtitle = soup.find("h1")
            filmtitle = filmtitle.contents[0].strip() 
            emptySetTitle.append(filmtitle)
            links = []
            genres = []
            for h in genreData:
                a = h.find_all('a')
                aLength = len(a)
                a1 = a[0]
                for b in range(0,aLength - 1):
                    r = a[b].string
                    genres.append(r)
                print (str(filmIndex) + " " + str(filmtitle))
                emptysetGenre.append(genres)
        emptyWebPageSet.append(j)
       lst1 = [item[0] for item in emptysetGenre]
       lst2 = [i[1] if len(i) > 1 else '' for i in emptysetGenre]
       df = pandas.DataFrame({"imdbPage": emptyWebPageSet,
                       "title": emptySetTitle,
                       "genre1": lst1,
                       "info":infoSet
                       })
       df.to_csv("movieDetails.csv", encoding='utf-8', index=False)

相关问题 更多 >

    热门问题