管理我的班级有困难吗

from lxml import html import requests class app_crawler: starturl = "https://itunes.apple.com/us/app/candy-crush-saga/id553834731?mt=8" def crawler(self): self.get_app(self.starturl) def get_app(self, link): page = requests.get(link) tree = html.fromstring(page.text) links = tree.xpath('//div[@class="lockup-info"]//*/a[@class="name"]/@href') for link in links: return link # I wish to make this link penetrate through the App class but can't get any idea class App(app_crawler): def __init__(self, link): self.links = [link] def process_links(self): for link in self.links: self.get_item(link) def get_item(self, url): page = requests.get(url) tree = html.fromstring(page.text) name = tree.xpath('//h1[@itemprop="name"]/text()')[0] developer = tree.xpath('//div[@class="left"]/h2/text()')[0] price = tree.xpath('//div[@itemprop="price"]/text()')[0] print(name, developer, price) if __name__ == '__main__': parse = App(app_crawler.starturl) parse.crawler() parse.process_links()

2条回答

网友

1楼 · 编辑于 2024-05-20 00:38:00

您的代码有几个问题：

App继承自app_crawler，但是您提供了一个app_crawler实例给App.__init__。
App.__init__调用app_crawler.__init__，而不是super().__init__()。
不仅app_crawler.get_app实际上没有返回任何东西，它还创建了一个全新的App对象。

这会导致代码将app_crawler对象传递给requests.get，而不是url字符串。你知道吗

代码中的封装太多了。你知道吗

请考虑以下代码，它比不工作的代码短，更干净，而且不需要不必要地传递对象：

from lxml import html
import requests

class App:
    def __init__(self, starturl):
        self.starturl = starturl
        self.links = []

    def get_links(self):
        page = requests.get(self.starturl)
        tree = html.fromstring(page.text)
        self.links = tree.xpath('//div[@class="lockup-info"]//*/a[@class="name"]/@href')

    def process_links(self):
        for link in self.links:
            self.get_docs(link)

    def get_docs(self, url):
        page = requests.get(url)
        tree = html.fromstring(page.text)
        name = tree.xpath('//h1[@itemprop="name"]/text()')[0]
        developper = tree.xpath('//div[@class="left"]/h2/text()')[0]
        price = tree.xpath('//div[@itemprop="price"]/text()')[0]
        print(name, developper, price)

if __name__ == '__main__':
    parse = App("https://itunes.apple.com/us/app/candy-crush-saga/id553834731?mt=8")
    parse.get_links()
    parse.process_links()

输出

Cookie Jam By Jam City, Inc. Free
Zombie Tsunami By Mobigame Free
Flow Free By Big Duck Games LLC Free
Bejeweled Blitz By PopCap Free
Juice Jam By Jam City, Inc. Free
Candy Crush Soda Saga By King Free
Bubble Witch 3 Saga By King Free
Candy Crush Jelly Saga By King Free
Farm Heroes Saga By King Free
Pet Rescue Saga By King Free

网友

2楼 · 编辑于 2024-05-20 00:38:00

我希望我的代码应该是这样的：

from lxml import html
import requests

class app_crawler:

    starturl = "https://itunes.apple.com/us/app/candy-crush-saga/id553834731?mt=8"

    def __init__(self):
        self.links = [self.starturl]


    def crawler(self):
        for link in self.links:
            self.get_app(link)


    def get_app(self, link):
        page = requests.get(link)
        tree = html.fromstring(page.text)
        links = tree.xpath('//div[@class="lockup-info"]//*/a[@class="name"]/@href')
        for link in links:
            if not len(self.links)>=5:
                self.links.append(link)


class App(app_crawler):

    def __init__(self):
        app_crawler.__init__(self)


    def process_links(self):
        for link in self.links:
            self.get_item(link)

    def get_item(self, url):
        page = requests.get(url)
        tree = html.fromstring(page.text)
        name = tree.xpath('//h1[@itemprop="name"]/text()')[0]
        developer = tree.xpath('//div[@class="left"]/h2/text()')[0]        
        price = tree.xpath('//div[@itemprop="price"]/text()')[0]
        print(name, developer, price)

if __name__ == '__main__':

    scrape = App()
    scrape.crawler()
    scrape.process_links()

相关问题更多 >

编程相关推荐

热门问题

热门文章