For循环返回结果twi

2024-10-17 00:30:17 发布

您现在位置:Python中文网/ 问答频道 /正文

除了一个问题之外,下面的代码工作得非常完美。运行之后,它会返回每个URL的数据两次,而不是一次。有人能解释一下我做错了什么吗?你知道吗

import requests
import csv
from random import choice
import pandas as pd

url_template = "https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=SpeedDistance&Season=2017-18&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="

lineup_df = pd.DataFrame()

df = pd.read_csv('NBADates.csv')
df.to_dict('series')

url_list=[url_template.format(date=date) for date in df.loc[ : ,"Date"]]

for url in url_list:

    data = requests.get(url, headers={
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',})

    headers = data.json()['resultSets'][0]['headers']        
    stats = data.json()['resultSets'][0]['rowSet']

    stats_df = pd.DataFrame(stats, columns=headers)
    stats_df=[stats_df.assign(Date2=Date2) for Date2 in df.loc[ : ,"Date2"]]

    # Append to the big dataframe
    lineup_df = lineup_df.append(stats_df, ignore_index=True)

lineup_df.to_csv("Stats.csv")

编辑:这是文件

print(df)

         Date       Date2
0  10%2F17%2F2017  10/17/2017
1  10%2F18%2F2017  10/18/2017

Tags: csvtoinimporturldffordata
1条回答
网友
1楼 · 发布于 2024-10-17 00:30:17

这就是问题所在:

stats_df=[stats_df.assign(Date2=Date2) for Date2 in df.loc[ : ,"Date2"]]

这是为输入文件中的每个Date2复制stats_df中的行。我想您只是想得到与您刚才下载的URL相对应的Date2,而不是df中的每个Date2。使用url_list中的索引访问df的对应行。你知道吗

import requests
import csv
from random import choice
import pandas as pd

url_template = "https://stats.nba.com/stats/leaguedashptstats?College=&Conference=&Country=&DateFrom={date}&DateTo={date}&Division=&DraftPick=&DraftYear=&GameScope=&Height=&LastNGames=0&LeagueID=00&Location=&Month=0&OpponentTeamID=0&Outcome=&PORound=0&PerMode=Totals&PlayerExperience=&PlayerOrTeam=Player&PlayerPosition=&PtMeasureType=SpeedDistance&Season=2017-18&SeasonSegment=&SeasonType=Regular+Season&StarterBench=&TeamID=0&VsConference=&VsDivision=&Weight="

lineup_df = pd.DataFrame()

df = pd.read_csv('NBADates.csv')
df.to_dict('series')

url_list=[url_template.format(date=date) for date in df.loc[ : ,"Date"]]

for index, url in enumerate(url_list):

    data = requests.get(url, headers={
    'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0',})

    headers = data.json()['resultSets'][0]['headers']        
    stats = data.json()['resultSets'][0]['rowSet']

    stats_df = pd.DataFrame(stats, columns=headers)
    stats_df = stats_df.assign(Date2=df.loc[index, "Date2"])

    # Append to the big dataframe
    lineup_df = lineup_df.append(stats_df, ignore_index=True)

lineup_df.to_csv("Stats.csv")

相关问题 更多 >