非常长的代码。。 需要解析屏幕名称:
<script type="text/javascript" charset="utf-8" nonce="YjJmNTAwODgtODBmMy00YzQ5LWJhODItMmQwNTk0Yjg4MTI1">window.__INITIAL_STATE__={"optimist":[],"urt":{},"toasts":[],"needs_phone_verification":false,"normal_followers_count":2,"notifications":false,"pinned_tweet_ids_str":[],"profile_image_url_https":"https://pbs.twimg.com/profile_images/1174197230003208192/qK5cqalJ_normal.jpg","profile_interstitial_type":"","protected":false,"featureSwitch":{"config":{"2fa_multikey_management_enabled":{"value":false},"screen_name":"Vickson25435099","always_use_https":true,"use_cookie_personalization":false,"sleep_time":{"enabled":false,"end_time":null,"start_time":null},"geo_enabled":false,"language":"en","discoverable_by_email":true,"discoverable_by_mobile_phone":true,"personalized_trends":true,"allow_media_tagging":"none","allow_contributor_request":"all","allow_ads_personalization":true,"allow_logged_out_device_personalization":true,"allow_location_history_personalization":true,"allow_sharing_data_for_third_party_personalization":false,"allow_dms_from":"following","allow_dm_groups_from":"following","translator_type":"none","country_code":"us","nsfw_user":false,"nsfw_admin":false,"ranked_timeline_setting":1,"ranked_timeline_eligible":null,"address_book_live_sync_enabled":false,"universal_quality_filtering_enabled":"enabled","dm_receipt_setting":"all_disabled","alt_text_compose_enabled":null,"mention_filter":"unfiltered","allow_authenticated_periscope_requests":true,"protect_password_reset":false,"require_password_login":false,"requires_login_verification":false,"dm_quality_filter":"enabled","autoplay_disabled":false,"settings_metadata":{}},"fetchStatus":"loaded"},"dataSaver":{"dataSaverMode":false},"transient":{"dtabBarInfo":{"hide":false},"loginPromptShown":false,"lastViewedDmInboxPath":"/messages","themeFocus":""}},"devices":{"browserPush":{"fetchStatus":"none","pushNotificationsPrompt":{"dismissed":false,"fetchStatus":"none"},"subscribed":false,"supported":null},"devices":{"data":{"emails":[],"phone_numbers":[]},"fetchStatus":"none"},"notificationSettings":{"push_settings":{"error":null,"fetchStatus":"none"},"push_settings_template":{"template":{"settings":[]}},"sms_settings":{"error":null,"fetchStatus":"none"},"sms_settings_template":{"template":{"settings":[]}},"checkin_time":null}},"audio":{"conversationLookup":{}},"hashflags":{"fetchStatus":"none","hashflags":{}},"friendships":{"pendingFollowers":{"acceptedIds":[],"ids":[],"fetchStatus":{"bottom":"none","top":"none"},"hydratedIds":[]}},"homeTimeline":{"useLatest":false,"fetchStatus":"none"},"multiAccount":{"fetchStatus":"none","users":[],"badgeCounts":{},"addAccountFetchStatus":"none"},"badgeCount":{"unreadDMCount":0},"ocf_location":{"startLocation":{}},"navigation":{},"teams":{"fetchStatus":"none","teams":{}},"cardState":{},"promotedContent":{}};window.__META_DATA__={"env":"prod","isLoggedIn":true,"isRTL":false,"hasMultiAccountCookie":false,"uaParserTags":["m2","rweb","msw"],"serverDate":1614578006755,"sha":"9921d3a6d626dc45b0f5a65681ef95c891d815cd"};window.__PREFETCH_DATA__={"items":[{"key":"dataUsageSettings","payload":{"dataSaverMode":false}}],"timestamp":1614578006700};</script>
我正在尝试这种方法
import requests
import json
from bs4 import BeautifulSoup
x = requests.get('https://twitter.com/home')
b = BeautifulSoup(x.text, 'html.parser')
for b in b.find_all('script'):
wis = x.text.split('window.__INITIAL_STATE__=')
if len(wis) > 1:
data = json.loads(wis[1].split(';')[0])
print(data["screen_name"])
结果:KeyError“屏幕名称” 这种方法也不起作用:
import requests
import json
x = requests.get('https://twitter.com/home')
html = x.text.split('window.__INITIAL_STATE__=')[0]
html = html.split(';</script>')[0]
data = json.loads(html)
print(data['screen_name'])
结果
Traceback (most recent call last):
File "<string>", line 8, in <module>
File "/usr/lib/python3.8/json/__init__.py", line 357, in loads
return _default_decoder.decode(s)
File "/usr/lib/python3.8/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/usr/lib/python3.8/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
>
使用完整源html进行更新
您将无法获取
screen_name
只有当前登录的用户才能获取数据,您必须使用有效的cookie获取requests
顺便说一句,例如上面的例子,它有多个变量(json),您需要在
window.__INITIAL_STATE__=
和,"devices"
之间使用json相关问题 更多 >
编程相关推荐