<pre><code>import time
import requests
from bs4 import BeautifulSoup
soup = BeautifulSoup(
requests.get("https://www.bayt.com/en/international/jobs/executive-chef-jobs/").content,
"lxml"
)
links = []
for a in soup.select("h2.m0.t-regular a"):
if a['href'] not in links:
links.append("https://www.bayt.com"+ a['href'])
joineddd = []
for link in links:
print(link)
s = BeautifulSoup(requests.get(link).content, "lxml")
jobdesc=s.select_one("div[class='card-content is-spaced'] p")
print(jobdesc.text)
alldt = [dt.text for dt in s.select("div[class='card-content is-spaced'] dt")]
dt_Job_location = alldt[0]
dt_Job_Company_Industry = alldt[1]
dt_Job_Company_Type = alldt[2]
if len(alldt[3])>0:
dt_Job_Job_Role = alldt[3]
elif len(dt_Job_Employment_Type)>0:
dt_Job_Employment_Type = alldt[4]
alldd = [dd.text for dd in s.select("div[class='card-content is-spaced'] dd")]
dd_job_location = alldd[0]
dd_job_Company_Industry = alldd[1]
dd_job_Company_Type = alldd[2]
if len(alldd[3])>0:
dd_job_Job_Role = alldd[3]
elif len(dd_job_Employment_Type)>0:
dd_job_Employment_Type = alldd[4]
print(f"{dt_Job_location}:{dd_job_location}\n{dt_Job_Company_Industry}:{dd_job_Company_Industry}\n\n")
print("-" * 80)
joineddd.extend([link,jobdesc,alldd])
</code></pre>
<p>数据帧:</p>
<pre><code>import pandas as pd
import numpy as np
array1 = ["value1", "value2"]
array2 = ["value1"]
df = dict( A = np.array(array1), B = np.array(array2 ) )
_df = pd.DataFrame(dict([ (k,pd.Series(v)) for k,v in df.items() ]))
print(_df)
_df.to_csv("filename.csv", index=False, encoding="utf-8")
</code></pre>
<p>输出:</p>
<pre><code> A B
0 value1 value1
1 value2 NaN
</code></pre>