Posted in Python onApril 07, 2015
本文实例讲述了Python实现从脚本里运行scrapy的方法。分享给大家供大家参考。具体如下:
#!/usr/bin/python import os os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'project.settings') #Must be at the top before other imports from scrapy import log, signals, project from scrapy.xlib.pydispatch import dispatcher from scrapy.conf import settings from scrapy.crawler import CrawlerProcess from multiprocessing import Process, Queue class CrawlerScript(): def __init__(self): self.crawler = CrawlerProcess(settings) if not hasattr(project, 'crawler'): self.crawler.install() self.crawler.configure() self.items = [] dispatcher.connect(self._item_passed, signals.item_passed) def _item_passed(self, item): self.items.append(item) def _crawl(self, queue, spider_name): spider = self.crawler.spiders.create(spider_name) if spider: self.crawler.queue.append_spider(spider) self.crawler.start() self.crawler.stop() queue.put(self.items) def crawl(self, spider): queue = Queue() p = Process(target=self._crawl, args=(queue, spider,)) p.start() p.join() return queue.get(True) # Usage if __name__ == "__main__": log.start() """ This example runs spider1 and then spider2 three times. """ items = list() crawler = CrawlerScript() items.append(crawler.crawl('spider1')) for i in range(3): items.append(crawler.crawl('spider2')) print items
希望本文所述对大家的Python程序设计有所帮助。
Python实现从脚本里运行scrapy的方法
- Author -
pythoner声明:登载此文出于传递更多信息之目的,并不意味着赞同其观点或证实其描述。
Reply on: @reply_date@
@reply_contents@