-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathitems.py
More file actions
89 lines (70 loc) · 1.71 KB
/
items.py
File metadata and controls
89 lines (70 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
#
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
from abc import ABCMeta, abstractmethod
class PointSearchItem(scrapy.Item):
# define the fields for your item here like:
# name = scrapy.Field()
pass
"""
具体的 items,请看 sites 文件夹中的网站 items:
域名 + 内容 + Item
"""
class BaseItem(metaclass=ABCMeta):
"""
基础的每个 Item 都应该实现的接口
"""
# 字段名
field_list = []
# @abstractmethod
# def field_define(self):
# """
# TODO: 定义 Item 应该具有的字段,暂时不会如何给类塞进去成员,未完待续。
# :return:
# """
# pass
@abstractmethod
def clean_data(self):
"""
对于原始提取字段进行清理
:return:
"""
pass
@staticmethod
@abstractmethod
def help_fields(fields: list):
"""
帮助生成field定义字段代码。
:param fields:
:return:
"""
pass
class MysqlItem(BaseItem):
"""
数据存取至mysql数据库应该实现的接口
"""
# 数据库表名
table_name = ""
# 重复插入时,需要更新的字段
duplicate_key_update = []
@abstractmethod
def save_to_mysql(self):
"""
生成插入数据库的 sql 语句
:return:
"""
pass
class ElasticSearchItem(BaseItem):
"""
数据存取至 ElasticSearch 应该实现的接口
"""
@abstractmethod
def save_to_es(self):
"""
对于数据保存到 es 中
:return:
"""
pass