同步重庆时时彩的数据到本地数据库

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python
#-*-coding:utf-8-*-
"""
    从 cp.360.cn 同步时时彩的数据到表 haoma
"""
import datetime
from datetime import timedelta
import time
import torndb
import sys
import requests
import re
from mylogger import get_logger
 
reload(sys)
sys.setdefaultencoding('utf-8')
 
DBHOST = "localhost:3306"
SCHEMA = "CAIPIAO"
DBUSER = "user"
DBPASSWD = "passwd"
 
db = torndb.Connection(host=DBHOST, database=SCHEMA, user=DBUSER, password=DBPASSWD)
cplog = get_logger("caipiao")
 
class Data_Sync(object):
    ssc_re = re.compile(r'<td class=\'gray\'>(.*?)</td>(<td class=\'red big\'>|<td style=\'width:65px\'>)(.*?)</td>.*?<tr>')
 
    def __init__(self, start_date="20150101", sleep_secs = 10, run_ever=True):
        self.start_date = start_date if start_date > "20130101" else "20150101"
        self.run_ever = run_ever
        self.base_url = "http://chart.cp.360.cn/kaijiang/kaijiang?lotId=255401&spanType=2&span="
        self.latest_date = ''
        self.latest_period = ''
        self.need_sleep = False
        self.sleep_secs = sleep_secs
 
    def run(self):
        while True:
            if self.need_sleep:
                time.sleep(self.sleep_secs)
                self.need_sleep = False
            else:
                self.sync_data_from_360()
 
    def sync_data_from_360(self):
        """ 根据数据库中最新一条数据,从 cp.360.com 同步数据至最新数据 """
        self.get_latest_haoma_from_mysql()
 
        if not self.latest_date:
            cplog.info("db has no data, so start at {0}".format(self.start_date))
            self.latest_date = self.start_date
            self.latest_period = "000"
 
        cplog.info("in db, item_date={0}, period={1}".format(self.latest_date, self.latest_period))
 
        if self.latest_date:
            cur_date = datetime.datetime.utcnow() + timedelta(hours=8)
            latest_date = datetime.datetime.strptime(self.latest_date, "%Y%m%d")
            """
                更新规则:
                1、检查是否同一天,如果不是,就下载数据,执行步骤2,增加天数,直到数据库日期与当前日一致;
                2、检查数据库中的期数与下载回来的数据的最新期是否一致,一致,检查日期是否一致,是就跳过,否则插入数据;
            """
            dl_times = 0
            while (cur_date - latest_date).days > 0:
                if int(self.latest_period) < 120:
                    dl_date = latest_date.strftime("%Y-%m-%d")
                    dl_url = self.base_url + dl_date + "_" + dl_date
                    data = self.download_with_requests(dl_url)
                    if not data:
                        if dl_times < 3:
                            dl_times += 1
                            time.sleep(2)
                            continue
                        else:
                            latest_date += timedelta(1)
                            continue
 
                    dl_times = 0
                    self.latest_date = latest_date.strftime('%Y%m%d')
                    lottery_numbers = data[int(self.latest_period):]
                    self.insert_into_mysql(self.latest_date, lottery_numbers)
                    latest_date += timedelta(1)
                else:
                    latest_date += timedelta(1)
                    self.latest_period = "000"
 
            """ 更新当日数据 """
            dl_date = latest_date.strftime("%Y-%m-%d")
            dl_url = self.base_url + dl_date + "_" + dl_date
            data = self.download_with_requests(dl_url)
            if data:
                lottery_numbers = data[int(self.latest_period):]
                self.latest_date = latest_date.strftime('%Y%m%d')
                self.insert_into_mysql(self.latest_date, lottery_numbers)
 
    def insert_into_mysql(self, item_date, datas):
        insert_datas = []
        for data in datas:
            period = data[0]
            date_period = item_date + period
            lottery_number = data[2]
            if not re.search('\d+', lottery_number):
                continue
            a, b, c, d, e = list(lottery_number)
            insert_data = (item_date, period, date_period, lottery_number, a, b, c, d, e)
            insert_datas.append(insert_data)
 
        if insert_datas:
            cplog.info("current insert into haoma:{0}, {1}".format(item_date, datas))
            sql = "insert into haoma(item_date, period, date_period, lottery_number, a, b, c, d, e) values(%s, %s, %s, %s, %s,  %s, %s, %s, %s)"
            try:
                db.executemany(sql, insert_datas)
            except Exception as e:
                print e
                sys.exit(1)
        else:
            cplog.info("no more new data to sync, wait for {0} seconds".format(self.sleep_secs))
            self.need_sleep = True
 
    def get_latest_haoma_from_mysql(self):
        sql = "select * from haoma order by date_period desc limit 1"
        ret = db.get(sql)
        if ret:
            self.latest_date = ret.item_date
            self.latest_period = ret.period
 
    def download_with_requests(self, url):
        cplog.info("download: {0}".format(url))
        data = []
        try:
            r = requests.get(url, timeout=10)
            if r.status_code == 200:
                data = self.ssc_re.findall(r.content)
            else:
                cplog.info("download err, http status_code:{0}".format(r.status_code))
        except Exception as e:
            cplog.info("call requests raise Exception: {0}".format(e))
        finally:
            return data
 
def run():
    sync = Data_Sync(start_date="20140101", sleep_secs=30)
    sync.run()
 
if __name__ == "__main__":
    run()