无法从Python Scrapy方法写入全局变量

class QuotesSpider(scrapy.Spider): name = "quotes" currentClassTitle = 'unchanged class title' def start_requests(self): urls = [ #here goes my list of urls to scrape ] for url in urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): title=response.xpath('/html/head/title').getall() title=str(title) title=title[9:-10] print(title) #So far so good, the title is correctly extracted and printed #I intend to write the title to both global currentTitle variable # and to class variable currentClassTitle, using method update for the latter: global currentTitle currentTitle = title QuotesSpider.update(title) def update(value): QuotesSpider.currentClassTitle = value

def crawl (): process = CrawlerProcess({ 'USER_AGENT': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' }) process.crawl(QuotesSpider) process.start() # the script will block here until the crawling is finished time.sleep(2) def run_spider(spider): def f(q): try: runner = crawler.CrawlerRunner() deferred = runner.crawl(spider) deferred.addBoth(lambda _: reactor.stop()) reactor.run() q.put(None) except Exception as e: q.put(e) q = Queue() p = Process(target=f, args=(q,)) p.start() result = q.get() p.join() if result is not None: raise result

def on_snapshot(col_snapshot, changes, read_time): docCounter = 0 for doc in col_snapshot: print(u'{}'.format(doc.id)) thisDoc_ref = db.collection(u'urls').document(doc.id) thisDoc_ref.update({u'capital': 'sample capital name'}) thisDoc_ref.update({u'crawled': True}) run_spider(QuotesSpider) sleep(5) #Just to ensure that I give the crawler enough time to process, the function sleeps after triggering the spider. #Not the best practice, but good enough for testing functionality for the moment print(QuotesSpider.currentClassTitle) #I get 'unchanged class title' print(currentTitle) #I get 'unchanged global title' thisDoc_ref.update({u'title': currentTitle}) #I intend to store the document's title in Firestore using the value of currentTitle, which will not work #because I cannot retrieve the value of title

1条回答

网友

1楼 · 发布于 2024-05-03 06:52:32

如果您想在Python类的一个类方法中访问特定于Python类的实例的变量，您需要使用

self.currentClassTitle

使用QuotesSpider.currentClassTitle访问默认类变量（当您的类未初始化时）

更多信息请点击此处： https://www.geeksforgeeks.org/self-in-python-class/

相关问题更多 >

编程相关推荐

热门问题

热门文章