从字典创建数据帧,其中包含具有相同长度的数组

2024-10-17 08:27:01 发布

您现在位置:Python中文网/ 问答频道 /正文

我试图提取一些YouTube视频的细节,当我从字典中创建数据帧时,我遇到了这个错误。有人能帮我吗

def youtube_search(q, max_results=50,order="relevance", token=None, location=None, location_radius=None):

search_response = youtube.search().list(
q=q,
type="video",
pageToken=token,
order = order,
part="id,snippet", # Part signifies the different types of data you want 
maxResults=max_results,
location=location,
locationRadius=location_radius).execute()

title = []
channelId = []
channelTitle = []
categoryId = []
videoId = []
viewCount = []
likeCount = []
dislikeCount = []
commentCount = []
category = []
tags = []
videos = []

for search_result in search_response.get("items", []):
    if search_result["id"]["kind"] == "youtube#video":

        title.append(search_result['snippet']['title']) 
        videoId.append(search_result['id']['videoId'])

        response = youtube.videos().list(
            part='statistics, snippet',
            id=search_result['id']['videoId']).execute()

        channelId.append(response['items'][0]['snippet']['channelId'])
        channelTitle.append(response['items'][0]['snippet']['channelTitle'])
        categoryId.append(response['items'][0]['snippet']['categoryId'])                   
        viewCount.append(response['items'][0]['statistics']['viewCount'])
        likeCount.append(response['items'][0]['statistics']['likeCount'])
        dislikeCount.append(response['items'][0]['statistics']['dislikeCount'])
        
        
        
    if 'commentCount' in response['items'][0]['statistics'].keys():
        commentCount.append(response['items'][0]['statistics']['commentCount'])
    else:
        commentCount.append([])

    if 'tags' in response['items'][0]['snippet'].keys():
        tags.append(response['items'][0]['snippet']['tags'])
    else:
        tags.append([])
        #Not every video has likes/dislikes enabled so they won't appear in JSON response
        try:
            likeCount.append(response['items'][0]['statistics']['likeCount'])
        except:
   #Good to be aware of Channels that turn off their Likes
            print("Video titled {0}, on Channel {1} Likes Count is not available".format(stats['items'][0]['snippet']['title'],
                                                                                         stats['items'][0]['snippet']['channelTitle']))
            print(response['items'][0]['statistics'].keys())
    #Appends "Not Available" to keep dictionary values aligned
            likeCount.append("Not available")
            
        try:
            dislikeCount.append(response['items'][0]['statistics']['dislikeCount'])     
        except:
            #Good to be aware of Channels that turn off their Likes
            print("Video titled {0}, on Channel {1} Dislikes Count is not available".format(stats['items'][0]['snippet']['title'],
                                                                                            stats['items'][0]['snippet']['channelTitle']))
            print(response['items'][0]['statistics'].keys())
            dislikeCount.append("Not available")


#youtube_dict = {'tags':tags,'channelId': channelId,'channelTitle': channelTitle,'categoryId':categoryId,'title':title,'videoId':videoId,'viewCount':viewCount,'likeCount':likeCount,'dislikeCount':dislikeCount,'commentCount':commentCount,'favoriteCount':favoriteCount}
youtube_dict = {'tags':tags,'channelTitle': channelTitle,
                'title':title,'videoId':videoId,'viewCount':viewCount, 
                'likeCount':likeCount, 'dislikeCount':dislikeCount, 'commentCount':commentCount,  }

return youtube_dict
q = "covid19 vaccine"

test = youtube_search(q, max_results=100,order="relevance", token=None, location=None, location_radius=None)
import pandas as pd
df = pd.DataFrame(data=test)

df.head()

ValueError:数组的长度必须相同。我试图补充 df=pd.DataFrame.from_dict(data=test,orient='index'),但我也不工作,我遇到了另一个错误 TypeError:init()获得意外的关键字参数“orient”

任何帮助都将不胜感激


Tags: searchtitleyoutuberesponsetagsitemslocationsnippet