from datetime import datetime
from scrapy.exporters import JsonItemExporter, CsvItemExporterimport pymongoimport redisfrom .settings import REDIS_HOST, REDIS_PORT, MONGO_HOST, MONGO_PORT# 数据源的管道class AqiDataPipeline(object): def process_item(self, item, spider): # 记录爬取时间 item['crawl_time'] = datetime.utcnow() # 记录爬虫 item['spider'] = spider.name return item# Json的管道class AqiJsonPipeline(object): def open_spider(self, spider): self.file = open("aqi.json", 'wb') self.write = JsonItemExporter(self.file) self.write.start_exporting() def process_item(self, item, spider): self.write.export_item(item) return item def close_spider(self, spider): self.write.finish_exporting() self.file.close()# Csv的管道class AqiVscPipeline(object): def open_spider(self, spider): self.file = open("aqi.csv", 'wb') self.write = CsvItemExporter(self.file) self.write.start_exporting() def process_item(self, item, spider): self.write.export_item(item) return item def close_spider(self, spider): self.write.finish_exporting() self.file.close()# mongodb数据库管道class AqiMongoPipeline(object): def open_spider(self, spider): self.client = pymongo.MongoClient(host=MONGO_HOST, port=MONGO_PORT) self.db = self.client['Aqi'] self.collection = self.db['aqi'] def process_item(self, item, spider): self.collection.insert(dict(item)) return item def close_spider(self, spider): self.client.close()# redis数据库管道class AqiRedisPipeline(object): def open_spider(self, spider): self.client = redis.Redis(host=REDIS_HOST, port=REDIS_PORT) def process_item(self, item, spider): self.client.lpush('aqi', dict(item)) return item