Web抓取内容类型:JSON

2024-09-26 22:43:48 发布

您现在位置:Python中文网/ 问答频道 /正文

我正试图从中获取位置详细信息 here.
使用Beatifulsoup,我得到了空的[]列表。问题是我要刮取的数据在viewpagesource中不可用。在Developertool>;网络,内容类型为JSON。因此,我尝试了以下代码:

from bs4 import BeautifulSoup
import requests
import pandas as pd
import json

url = 'https://ngc.taleo.net/careersection/ng_pro_intl_aujobs/jobsearch.ftl?lang=en_GB&location=756140022608&radiusType=K&searchExpanded=true&radius=1&portal=34140031600&_ga=2.197392303.1699610010.1604351575-1311873605.1579627290'

s = requests.Session()

cookies = {
    'locale': 'en-GB',
    '_gcl_au': '1.1.79711829.1614933155',
    '_ga': 'GA1.2.693390019.1614933178',
    '__atssc': 'google^%^3B1',
    '_gid': 'GA1.2.1213481278.1618077337',
    '__atuvc': '1^%^7C10^%^2C0^%^7C11^%^2C9^%^7C12^%^2C14^%^7C13^%^2C28^%^7C14',
    '__atuvs': '6071e67dc413e3d6001',
}

headers = {
    'Connection': 'keep-alive',
    'Pragma': 'no-cache',
    'Cache-Control': 'no-cache',
    'sec-ch-ua': '^\\^Google',
    'tzname': 'Asia/Calcutta',
    'sec-ch-ua-mobile': '?0',
    'tz': 'GMT+05:30',
    'Content-Type': 'application/json',
    'Accept': 'application/json, text/javascript, */*; q=0.01',
    'User-Agent': '###MY USER AGENT HERE####',
    'X-Requested-With': 'XMLHttpRequest',
    'Origin': 'https://ngc.taleo.net',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-Mode': 'cors',
    'Sec-Fetch-Dest': 'empty',
    'Referer': 'https://ngc.taleo.net/careersection/ng_pro_intl_aujobs/jobsearch.ftl?lang=en_GB&location=756140022608&radiusType=K&searchExpanded=true&radius=1&portal=34140031600&_ga=2.197392303.1699610010.1604351575-1311873605.1579627290',
    'Accept-Language': 'en-US,en;q=0.9',
}

params = (
    ('lang', 'en_GB'),
    ('portal', '34140031600'),
)
data = '^{^\\^multilineEnabled^\\^:true,^\\^sortingSelection^\\^:^{^\\^sortBySelectionParam^\\^:^\\^3^\\^,^\\^ascendingSortingOrder^\\^:^\\^false^\\^^},^\\^fieldData^\\^:^{^\\^fields^\\^:^{^\\^KEYWORD^\\^:^\\^^\\^,^\\^LOCATION^\\^:^\\^756140022608^\\^,^\\^JOB_TITLE^\\^:^\\^^\\^^},^\\^valid^\\^:true^},^\\^filterSelectionParam^\\^:^{^\\^searchFilterSelections^\\^:^[^{^\\^id^\\^:^\\^POSTING_DATE^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^LOCATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_FIELD^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_TYPE^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_SCHEDULE^\\^,^\\^selectedValues^\\^:^[^]^}^]^},^\\^advancedSearchFiltersSelectionParam^\\^:^{^\\^searchFilterSelections^\\^:^[^{^\\^id^\\^:^\\^ORGANIZATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^LOCATION^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_FIELD^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_NUMBER^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^URGENT_JOB^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^EMPLOYEE_STATUS^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^STUDY_LEVEL^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^WILL_TRAVEL^\\^,^\\^selectedValues^\\^:^[^]^},^{^\\^id^\\^:^\\^JOB_SHIFT^\\^,^\\^selectedValues^\\^:^[^]^}^]^},^\\^pageNo^\\^:1^}'


response = s.post(url, headers=headers, cookies=cookies, data=data).json()

#res_json = json.loads(response)
#print(response.status_code)

但在响应行中,我从None得到了一个错误,名为JSONDecodeError(“期望值”,s,err.value) json.decoder.JSONDecodeError:期望值:第1行第1列(字符0)

非常感谢您在这方面的帮助

不幸的是,我目前仅限于使用请求或其他流行的python库

提前谢谢


Tags: httpsimportidjsontruelangnetjob
1条回答
网友
1楼 · 发布于 2024-09-26 22:43:48

如果需要发送JSON数据,可以将json = data与python请求模块一起使用。您还需要将数据格式化到字典中:

import requests

r = requests.post("https://ngc.taleo.net/careersection/rest/jobboard/searchjobs",
                  params={
                      "lang": "en_GB",
                      "location": "756140022608",
                      "radiusType": "K",
                      "searchExpanded": "true",
                      "radius": "1",
                      "portal": "34140031600"
                  },
                  headers={
                      "tzname": "Asia/Calcutta",
                      "tz": "GMT+05:30"
                  },
                  json={
                      "multilineEnabled": True,
                      "sortingSelection": {
                          "sortBySelectionParam": "3",
                          "ascendingSortingOrder": "false"
                      },
                      "fieldData": {
                          "fields": {
                              "KEYWORD": "",
                              "LOCATION": "756140022608",
                              "JOB_TITLE": ""
                          },
                          "valid": True
                      },
                      "filterSelectionParam": {
                          "searchFilterSelections": [{
                              "id": "POSTING_DATE",
                              "selectedValues": []
                          }, {
                              "id": "LOCATION",
                              "selectedValues": []
                          }, {
                              "id": "JOB_FIELD",
                              "selectedValues": []
                          }, {
                              "id": "JOB_TYPE",
                              "selectedValues": []
                          }, {
                              "id": "JOB_SCHEDULE",
                              "selectedValues": []
                          }]
                      },
                      "advancedSearchFiltersSelectionParam": {
                          "searchFilterSelections": [{
                              "id": "ORGANIZATION",
                              "selectedValues": []
                          }, {
                              "id": "LOCATION",
                              "selectedValues": []
                          }, {
                              "id": "JOB_FIELD",
                              "selectedValues": []
                          }, {
                              "id": "JOB_NUMBER",
                              "selectedValues": []
                          }, {
                              "id": "URGENT_JOB",
                              "selectedValues": []
                          }, {
                              "id": "EMPLOYEE_STATUS",
                              "selectedValues": []
                          }, {
                              "id": "STUDY_LEVEL",
                              "selectedValues": []
                          }, {
                              "id": "WILL_TRAVEL",
                              "selectedValues": []
                          }, {
                              "id": "JOB_SHIFT",
                              "selectedValues": []
                          }]},
                      "pageNo": 1
                  })

print(r.json())

相关问题 更多 >

    热门问题