关于使用seleniumpython提取“文本”的问题

2024-09-30 10:34:09 发布

您现在位置:Python中文网/ 问答频道 /正文

我试图从这个页面的每个下拉菜单中收集列表数据。我可以使用SeleniumPython3.6访问'li'标记部分并收集'href'数据。但问题是我不能得到每个列表的文本数据。你知道吗

我的代码如下:

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException

from bs4 import BeautifulSoup
from time import sleep


link = 'http://www.bobaedream.co.kr/cyber/CyberCar.php?gubun=I'
driver = webdriver.PhantomJS()
driver.set_window_size(1920, 1080)
driver.get(link)
sleep(.75)

soup = BeautifulSoup(driver.page_source, "html.parser", from_encoding='utf-8')

manufacturers = [
    ('%s' % o.text, '%s' % o.get_attribute('href'))
    for o
    in driver.find_elements_by_css_selector("#layer_maker ul.list li a")
    if o.text != '전체']

for manufacturer in manufacturers:
    print(manufacturer) 

我的结果如下:

('', "javascript:selChange('maker', '0', '%EC%A0%84%EC%B2%B4');")
('', "javascript:selChange('maker', '1', 'BMW');")
('', "javascript:selChange('maker', '21', '%EB%B2%A4%EC%B8%A0');")
('', "javascript:selChange('maker', '32', '%EC%95%84%EC%9A%B0%EB%94%94');")
('', "javascript:selChange('maker', '44', '%ED%8F%AD%EC%8A%A4%EB%B0%94%EA%B2%90');")
('', "javascript:selChange('maker', '13', '%EB%A0%89%EC%84%9C%EC%8A%A4');")
('', "javascript:selChange('maker', '97', '%EB%AF%B8%EB%8B%88');")
('', "javascript:selChange('maker', '2', 'GM');")
('', "javascript:selChange('maker', '77', 'GMC');")
('', "javascript:selChange('maker', '5', '%EB%8B%9B%EC%82%B0');")
('', "javascript:selChange('maker', '6', '%EB%8B%A4%EC%9D%B4%ED%95%98%EC%93%B0');")
('', "javascript:selChange('maker', '7', '%EB%8B%B7%EC%A7%80');")
('', "javascript:selChange('maker', '9', '%EB%8F%84%EC%9A%94%ED%83%80');")
('', "javascript:selChange('maker', '10', '%EB%9E%80%EC%B9%98%EC%95%84');")
('', "javascript:selChange('maker', '11', '%EB%9E%8C%EB%B3%B4%EB%A5%B4%EA%B8%B0%EB%8B%88');")
('', "javascript:selChange('maker', '12', '%EB%9E%9C%EB%93%9C%EB%A1%9C%EB%B2%84');")
('', "javascript:selChange('maker', '14', '%EB%A1%9C%EB%B2%84');")
('', "javascript:selChange('maker', '15', '%EB%A1%9C%ED%84%B0%EC%8A%A4');")
('', "javascript:selChange('maker', '16', '%EB%A1%A4%EC%8A%A4%EB%A1%9C%EC%9D%B4%EC%8A%A4');")
('', "javascript:selChange('maker', '61', '%EB%A5%B4%EB%85%B8');")
('', "javascript:selChange('maker', '17', '%EB%A7%81%EC%BB%A8');")
('', "javascript:selChange('maker', '18', '%EB%A7%88%EC%84%B8%EB%9D%BC%ED%8B%B0');")
('', "javascript:selChange('maker', '19', '%EB%A7%88%EC%AF%94%EB%8B%A4');")
('', "javascript:selChange('maker', '1003', '%EB%A7%A5%EB%9D%BC%EB%A0%8C');")
('', "javascript:selChange('maker', '60', '%EB%A8%B8%ED%81%90%EB%A6%AC');")
('', "javascript:selChange('maker', '20', '%EB%AF%B8%EC%93%B0%EB%B9%84%EC%8B%9C');")
('', "javascript:selChange('maker', '82', '%EB%AF%B8%EC%AF%94%EC%98%A4%EC%B9%B4');")
('', "javascript:selChange('maker', '22', '%EB%B2%A4%ED%8B%80%EB%A6%AC');")
('', "javascript:selChange('maker', '23', '%EB%B3%BC%EB%B3%B4');")
('', "javascript:selChange('maker', '1009', '%EB%B6%81%EA%B8%B0%EC%9D%80%EC%83%81');")
('', "javascript:selChange('maker', '88', '%EB%B6%80%EA%B0%80%ED%8B%B0');")
('', "javascript:selChange('maker', '24', '%EB%B7%B0%EC%9D%B5');")
('', "javascript:selChange('maker', '99', '%EB%B9%84%EC%9D%B4%EC%8A%A4%EB%A7%8C');")
('', "javascript:selChange('maker', '25', '%EC%82%AC%EB%B8%8C');")
('', "javascript:selChange('maker', '94', '%EC%83%88%ED%84%B4');")
('', "javascript:selChange('maker', '29', '%EC%89%90%EB%B3%B4%EB%A0%88');")
('', "javascript:selChange('maker', '27', '%EC%8A%A4%EB%B0%94%EB%A3%A8');")
('', "javascript:selChange('maker', '28', '%EC%8A%A4%EC%A6%88%ED%82%A4');")
('', "javascript:selChange('maker', '103', '%EC%8A%A4%EC%B9%B4%EB%8B%88%EC%95%84');")
('', "javascript:selChange('maker', '93', '%EC%8A%A4%ED%8C%8C%EC%9D%B4%EC%BB%A4');")
('', "javascript:selChange('maker', '30', '%EC%8B%9C%ED%8A%B8%EB%A1%9C%EC%97%A5');")
('', "javascript:selChange('maker', '33', '%EC%95%8C%ED%8C%8C%EB%A1%9C%EB%A9%94%EC%98%A4');")
('', "javascript:selChange('maker', '62', '%EC%95%A0%EC%8A%A4%ED%84%B4%EB%A7%88%ED%8B%B4');")
('', "javascript:selChange('maker', '95', '%EC%96%B4%ED%81%90%EB%9D%BC');")
('', "javascript:selChange('maker', '34', '%EC%98%A4%ED%8E%A0');")
('', "javascript:selChange('maker', '1011', '%EC%98%A4%EC%8A%A4%ED%8B%B4');")
('', "javascript:selChange('maker', '35', '%EC%98%AC%EC%A6%88%EB%AA%A8%EB%B9%8C');")
('', "javascript:selChange('maker', '83', '%EC%9B%A8%EC%8A%A4%ED%8A%B8%ED%95%84%EB%93%9C');")
('', "javascript:selChange('maker', '36', '%EC%9D%B4%EC%8A%A4%EC%A6%88');")
('', "javascript:selChange('maker', '81', '%EC%9D%B8%ED%94%BC%EB%8B%88%ED%8B%B0');")
('', "javascript:selChange('maker', '37', '%EC%9E%AC%EA%B7%9C%EC%96%B4');")
('', "javascript:selChange('maker', '96', '%EC%A7%80%ED%94%84');")
('', "javascript:selChange('maker', '1006', '%ED%85%8C%EC%8A%AC%EB%9D%BC');")
('', "javascript:selChange('maker', '38', '%EC%BA%90%EB%94%9C%EB%9D%BD');")
('', "javascript:selChange('maker', '89', '%EC%BD%94%EB%8B%89%EC%84%B8%ED%81%AC');")
('', "javascript:selChange('maker', '39', '%ED%81%AC%EB%9D%BC%EC%9D%B4%EC%8A%AC%EB%9F%AC');")
('', "javascript:selChange('maker', '84', '%ED%8C%8C%EA%B0%80%EB%8B%88');")
('', "javascript:selChange('maker', '41', '%ED%8E%98%EB%9D%BC%EB%A6%AC');")
('', "javascript:selChange('maker', '42', '%ED%8F%AC%EB%93%9C');")
('', "javascript:selChange('maker', '43', '%ED%8F%AC%EB%A5%B4%EC%89%90');")
('', "javascript:selChange('maker', '1008', '%ED%8F%AC%ED%86%A4');")
('', "javascript:selChange('maker', '45', '%ED%8F%B0%ED%8B%B0%EC%95%85');")
('', "javascript:selChange('maker', '46', '%ED%91%B8%EC%A1%B0');")
('', "javascript:selChange('maker', '91', '%ED%94%BC%EC%8A%A4%EC%BB%A4');")
('', "javascript:selChange('maker', '47', '%ED%94%BC%EC%95%84%ED%8A%B8');")
('', "javascript:selChange('maker', '48', '%ED%97%88%EB%A8%B8');")
('', "javascript:selChange('maker', '50', '%ED%98%BC%EB%8B%A4');")
('', "javascript:selChange('maker', '76', '%ED%99%80%EB%8D%B4');")
('', "javascript:selChange('maker', '4', '%EA%B8%B0%ED%83%80 %EC%88%98%EC%9E%85%EC%B0%A8');") 

这是HTML源的捕获图像: enter image description here

我不明白为什么文本部分是空的,所有的韩文字母都被打断了(韩文字母是j中的第三个元素)脚本:selChange). 我想做的是完成课文部分并更正韩文字母。你知道吗

请帮忙。你知道吗


Tags: 数据fromimportsupportdriverseleniumjavascriptb0
1条回答
网友
1楼 · 发布于 2024-09-30 10:34:09

尝试使用以下代码:

from urllib import parse

...
manufacturers = [
(o.get_attribute('text'), parse.unquote(o.get_attribute('href')))
for o
in driver.find_elements_by_css_selector("#layer_maker ul.list li a")
if o.get_attribute('text') != '전체']

对于制造商中的制造商: 打印(制造商)

输出:

('BMW', "javascript:selChange('maker', '1', 'BMW');")
('벤츠', "javascript:selChange('maker', '21', '벤츠');")
('아우디', "javascript:selChange('maker', '32', '아우디');")
('폭스바겐', "javascript:selChange('maker', '44', '폭스바겐');")
...

相关问题 更多 >

    热门问题