`Bluetooth:Bluetooth\u adapter\u winrt.cc:1073获取默认适配器失败`Windows VM Python多处理程序

2024-10-02 08:22:11 发布

您现在位置:Python中文网/ 问答频道 /正文

我有一个使用python和selenium创建的爬虫程序:见下文(请随意测试并留下评论/提示!)。这在我的Mac(本地)上运行得很好,但是它说提取完整数据大约需要6天。因此,我决定添加多处理以缩短时间。这在我的Mac电脑上仍然可以完美运行,但当我尝试在windows VM(Azure D8s_v3)上运行它时,我发现错误:

DevTools listening on ws://127.0.0.1:56800/devtools/browser/de9e5088-9659-4604-b43f-8ea1fae02a66 [11728:11308:0805/085310.771:ERROR:device_event_log_impl.cc(214)] [08:53:10.782] Bluetooth: bluetooth_adapter_winrt.cc:1073 Getting Default Adapter failed.

你们在windows上运行时也会遇到错误吗?先谢谢你

# Jonathan Augustin

# BELOW IS THE LINK WE WOULD LIKE YOU TO SCRAPE AS A TEST OF YOUR ABILITY:
# Dixie State University : https://registration.dixie.edu/transfer-guide/

# Please write a python script to extract the “To” and “From” transfer information from the highlighted link.
# The output should be in .JSON format. We would also like you to send the python script as well.
# We want ALL of the transfer information “TO” Dixie State University, “FROM” every other institution in every state.

import requests
from bs4 import BeautifulSoup
import json
import time
from itertools import chain
import logging
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import pprint
import time
import progressbar
import threading
import multiprocessing

jsonClass = {
    "from_school": "",
    "from_course_department": "",
    "from_course_code": "",
    "from_course_name": "",
    "from_course_credit_hours": "",
    "from_extra_department": "",
    "from_extra_code": "",
    "from_extra_name": "",
    "from_extra_credit_hours": "",
    "to_school": "Dixie State University",
    "to_course_department": "",
    "to_course_code": "",
    "to_course_name": "",
    "to_course_credit_hours": "",
    "to_extra_department": "",
    "to_extra_code": "",
    "to_extra_name": "",
    "to_extra_credit_hours": ""
}

states = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']
  

class searchPage(object):
    def __init__(self, number):
    #        #^ The first variable is the class instance in methods.  
    #        #  This is called "self" by convention, but could be any name you want.
    #^ double underscore (dunder) methods are usually special.  This one 
    #  gets called immediately after a new instance is created
        PATH = "/Users/jonathanaugustin/Desktop/chromedriver"
        options = Options()
        self.number = number
        self.driver = webdriver.Chrome(PATH, options=options)
        url = 'https://widgets.collegetransfer.net/EquivWidget?institution=2734&name=Dixie%20State%20University&theme=/Content/Themes/Selene/jquery-ui-1.8.17.custom.css&direction=receiver&zip=84770-3876'
        self.driver.get(url)


    def getSchools(self):
        beforeScroll = 'initial'
        afterScroll = ''
        schoolsText = ''
        while (beforeScroll != afterScroll):
            beforeScroll = afterScroll
            schools = self.driver.find_element_by_id('schoolsbyname')
            self.driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', schools)
            time.sleep(0.4)
            afterScroll = schools.text[-20:]
            schoolsText = schools
        mylist = schools.find_elements_by_class_name('selectableContainer')
        return mylist
    
    def updateJson(self, numthreads):
        # print('numthreads', numthreads)
        mylist = self.getSchools()
        with progressbar.ProgressBar(max_value=len(mylist)) as bar:
            for x in range(len(mylist)):
                bar.update(x)
                # print('x',x, 'numthreads', numthreads,'mod', (x % numthreads))
                # print('self.number', self.number)
                if x % numthreads == self.number:
                    #check to see if school is in US
                    address = mylist[x].find_element_by_class_name('address').text.split(', ')[1]
                    # print('SURE', address)
                    # print(address)
                    if address not in states:
                        # print('no')
                        continue
                    # print('yes')
                    jsonClass["from_school"] = mylist[x].get_attribute("data-sendername")

                    #First click
                    mylist[x].click()
                    time.sleep(2)

                    equivList = self.driver.find_element_by_id('equivcontent')
                    equivalencies = equivList.find_elements_by_class_name('selectableContainer')
                    i = 0
                    for i in range(len(equivalencies)):

                        fromCourse = equivalencies[i].find_element_by_class_name('equivSourceContainer')
                        # print(fromCourse.text)
                        courses = fromCourse.find_elements_by_class_name('course')
                        course = courses[0].find_element_by_class_name('courseId').text.split()
                        jsonClass["from_course_department"] = course[0]
                        jsonClass["from_course_code"] = course[1]
                        jsonClass["from_course_name"] = fromCourse.find_element_by_class_name('courseTitle').text
                        if len(courses) > 1:
                            from_extra_departments = []
                            from_extra_codes = []
                            from_extra_names = []
                            for eClass in courses[1:]:
                                eClass1 = eClass.find_element_by_class_name('courseId').text.split()
                                from_extra_departments.append(eClass1[0])
                                from_extra_codes.append(eClass1[1])
                                from_extra_names.append(eClass.find_element_by_class_name('courseTitle').text)
                            jsonClass["from_extra_department"] = str(from_extra_departments)
                            jsonClass["from_extra_code"] = str(from_extra_codes)
                            jsonClass["from_extra_name"] = str(from_extra_names)
                        else:
                            jsonClass["from_extra_department"] = ""
                            jsonClass["from_extra_code"] = ""
                            jsonClass["from_extra_name"] = ""


                        toCourse = equivalencies[i].find_element_by_class_name('equivTargetContainer')
                        toCourses = toCourse.find_elements_by_class_name('course')
                        tcourse = toCourses[0].find_element_by_class_name('courseId').text.split()
                        jsonClass["to_course_department"] = tcourse[0]
                        jsonClass["to_course_code"] = tcourse[1]
                        jsonClass["to_course_name"] = toCourse.find_element_by_class_name('courseTitle').text
                        if len(toCourses) > 1:
                            to_extra_departments = []
                            to_extra_codes = []
                            to_extra_names = []
                            for eClass in toCourses[1:]:
                                eClass1 = eClass.find_element_by_class_name('courseId').text.split()
                                to_extra_departments.append(eClass1[0])
                                to_extra_codes.append(eClass1[1])
                                to_extra_names.append(eClass.find_element_by_class_name('courseTitle').text)
                            jsonClass["to_extra_department"] = str(to_extra_departments)
                            jsonClass["to_extra_code"] = str(to_extra_codes)
                            jsonClass["to_extra_name"] = str(to_extra_names)
                        else:
                            jsonClass["to_extra_department"] = ""
                            jsonClass["to_extra_code"] = ""
                            jsonClass["to_extra_name"] = ""


                        #Second click
                        equivalencies[i].click()
                        time.sleep(2)

                        transferList = self.driver.find_elements_by_class_name('courseListContainer')[0]
                        details = transferList.find_elements_by_class_name('courseDetailContainer')
                        try:
                            creditsCont = transferList.find_element_by_class_name('courseCreditsLine')
                            credits = creditsCont.find_elements_by_tag_name('span')
                            # print(credits[0].text)
                            if credits[0].text == "Credits:":
                                jsonClass["from_course_credit_hours"] = credits[1].text

                            if len(details) > 1:
                                from_extra_credit_hours = []
                                for detail in details[1:]:
                                    try:
                                        detCont = detail.find_element_by_class_name('courseCreditsLine')
                                        detcredits = detCont.find_elements_by_tag_name('span')
                                        # print(detcredits[0].text)
                                        if detcredits[0].text == "Credits:":
                                            from_extra_credit_hours.append(detcredits[1].text)
                                    except:
                                        pass
                                        # print("No credit")
                                jsonClass["from_extra_credit_hours"] = str(from_extra_credit_hours)
                            else:
                                jsonClass["from_extra_credit_hours"] = ""

                        except:
                            # print("No credits")
                            pass

                        dixieList = self.driver.find_elements_by_class_name('courseListContainer')[1]
                        todetails = dixieList.find_elements_by_class_name('courseDetailContainer')
                        try:
                            creditsCont = dixieList.find_element_by_class_name('courseCreditsLine')
                            credits = creditsCont.find_elements_by_tag_name('span')
                            # print(credits[0].text)
                            if credits[0].text == "Credits:":
                                jsonClass["to_course_credit_hours"] = credits[1].text

                            if len(todetails) > 1:
                                from_extra_credit_hours = []
                                for detail in todetails[1:]:
                                    try:
                                        detCont = detail.find_element_by_class_name('courseCreditsLine')
                                        detcredits = detCont.find_elements_by_tag_name('span')
                                        # print(detcredits[0].text)
                                        if detcredits[0].text == "Credits:":
                                            from_extra_credit_hours.append(detcredits[1].text)
                                    except:
                                        # print("No credit")
                                        pass
                                jsonClass["to_extra_credit_hours"] = str(from_extra_credit_hours)
                            else:
                                jsonClass["to_extra_credit_hours"] = ""
                        except:
                            # print("No credits")
                            pass
                        
                        # pp = pprint.PrettyPrinter(indent=4)
                        # pp.pprint(jsonClass)

                        my_file = open("dixie.json", "a")        # Open a file
                        my_file.write(json.dumps(jsonClass, indent=4))    # write a line to the file
                        my_file.write(",") 
                        my_file.close()                        
                        
                        self.driver.find_element_by_id('detail').find_element_by_class_name('ui-corner-top').click()
                        time.sleep(2)
                        
                    self.driver.find_element_by_id('equivs').find_element_by_class_name('ui-state-default').click()
                    time.sleep(2)
                    

    def tearDown(self):
        self.driver.close()


if __name__ == "__main__":
    my_file = open("dixie.json", "w")        # Open a file  # write a line to the file
    my_file.write("[") 
    my_file.close()  
    a = searchPage(0)
    b = searchPage(1)
    c = searchPage(2)
    d = searchPage(3)
    e = searchPage(4)
    f = searchPage(5)
    g = searchPage(6)

    t1 = multiprocessing.Process(target=a.updateJson, args=[7])
    t2 = multiprocessing.Process(target=b.updateJson, args=[7])
    t3 = multiprocessing.Process(target=c.updateJson, args=[7])
    t4 = multiprocessing.Process(target=d.updateJson, args=[7])
    t5 = multiprocessing.Process(target=e.updateJson, args=[7])
    t6 = multiprocessing.Process(target=f.updateJson, args=[7])
    t7 = multiprocessing.Process(target=g.updateJson, args=[7])
    

    t1.start()
    t2.start()
    t3.start()
    t4.start()
    t5.start()
    t6.start()
    t7.start()


    t1.join()
    t2.join()
    t3.join()
    t4.join()
    t5.join()
    t6.join()
    t7.join()
    
    a.tearDown()
    b.tearDown()
    c.tearDown()
    d.tearDown()
    e.tearDown()
    f.tearDown()
    g.tearDown()

    my_file = open("dixie.json", "a")        # Open a file  # write a line to the file
    my_file.write("]") 
    my_file.close()                        
          

Tags: totextnamefromselfbyelementfind

热门问题