forked from irusist/quant
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdate_index_csindex.py
189 lines (156 loc) · 9.55 KB
/
update_index_csindex.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# -*- coding: utf-8 -*-
import pandas as pd
import pymysql
import urllib2
conn = pymysql.connect(host='localhost', port=3306, user='quant', passwd='123456', db='quant', charset='utf8')
# 中证指数
# 去掉债券, 多资产, 基金
# sql = "select id, index_code from index_basic_info where index_series = 1 and assert_type not in (2, 3, 5) order by index_code"
# TODO 新三板指数 899001 899002 没有跟踪标的, 暂时不做处理
# TODO 中华交易系列指数, 8个, 没有跟踪标的, 暂不处理
# TODO AMAC系列指数 没有跟踪标的, 暂不处理
# sql = "select id, index_code from index_basic_info where index_series = 5 order by index_code "
# 上证指数
sql = "select id, index_code from index_basic_info where index_series in (1, 2) and assert_type not in (2, 3, 5) order by index_code "
df = pd.read_sql(sql, conn, index_col="id")
index_code_list = list(df['index_code'])
print(len(index_code_list))
# TODO 固定债券
# index_code_list = ['000832', '000833', '000845','000923','930780','930786','930787','930788','930823',
# '930824','930825','930826','930827','930828','930829','930830','930831','930832','930833','930834',
# '930849','930865','930866','930870','930871','930872','930873','930874','930909','930916','930951',
# '930954','930996','931010','H11001', 'H11002','H11003','H11004','H11005','H11006','H11007','H11008',
# 'H11009','H11010','H11014','H11015','H11016','H11017','H11019','H11070','H11071','H11072','H11073',
# 'H11074','H11075','H11076','H11078','H11079','H11087','H11088','H11089','H11090','H11091','H11092',
# 'H11093','H11094','H11096','H11097','H11099','H11185','H30396', 'H30521']
# TODO 去掉不一致的,暂时不能获取最新的成分股进出记录,先去掉
code_not_equals_wind_csindex = ['000891', '930667', '930764', '930794', '930798', '930802', '930899', '930912', '930914', '930917',
'930919', '930921', '930930', '930932', '930945', '930957', '930959', '930960', '930961', '930962', '930963', '930964',
'930965', '930966', '930967', '930968', '930969', 'H11100', 'H11102', 'H11104',
'H11105', 'H11106', 'H11108', 'H11113', 'H11123', 'H11132', 'H11134', 'H11136',
'H11140', 'H11152', 'H11156', 'H11160', 'H11162', 'H11167', 'H11181', 'H11183',
'H30103', 'H30107', 'H30131', 'H30133', 'H30135', 'H30232', 'H30233', 'H30236',
'H30238', 'H30251', 'H30252', 'H30255', 'H30257', 'H30369', 'H30374', 'H30375',
'H30376', 'H30377', 'H30378', 'H30379', 'H30380', 'H30381', 'H30382', 'H30383',
'H30384', 'H30418', 'H30422', 'H30457', 'H30464', 'H30484', 'H30533', 'H30547',
'H30551', 'H30564']
# joinquant有数据,由于没有跟踪标的,以下内容暂时不补齐,等有时间再补齐
# 000902 中证流通
# 000926 中证央企
# 000938 中证名企
# 000953 中证地企
# 000955 中证国企
code_not_equals_wind_csindex += ['000902', '000926', '000938', '000953', '000955']
# 950090 50AH优选 wind不正确
# H50069 上证港股通指数, wind不正确
code_not_equals_wind_csindex += ['950090', 'H50069']
# 000052 50基本面, 没有历史记录, 没有标的, 暂时去掉
# 000053 180基本面, 没有历史记录, 没有标的, 暂时去掉
# TODO 399983 地产等权, 没有历史记录, 有标的!!!!! 暂时去掉
code_not_equals_wind_csindex += ['000052', '000053', '399983']
# TODO 000852 中证1000, 有标的, 官网remove了好多,确认?
# TODO 930796 全球中国互联网 没有标的, 官网有变动, wind没有
# TODO 930931 港股通50 没有标的, 官网有变动, wind没变动,少 1997
# TODO 930992 港股深红利地波 没有标的, wind, 官网不一致
# TODO 930998 中证港股通粤港澳大湾区发展主题指数 没有标的, 官网,wind不一致, wind无变动
# TODO 930999 中证沪港深粤港澳大湾区发展主题指数 没有标的, 官网,wind不一致, wind无变动
# TODO H11110 中证锐联香港基本没50指数 没有标的, 官网,wind不一致, wind无变动, 官网增加 1997
# TODO H11164 中证香港300港币 没有标的, 官网,wind不一致, wind无变动
# TODO H11165 中证香港200港币 没有标的, 官网,wind不一致, wind无变动
# TODO H11172 中证香港300价值指数(港币) 没有标的, 官网,wind不一致, wind无变动
# TODO H11174 中证香港300相对价值指数(港币) 没有标的, 官网,wind不一致, wind无变动
# TODO H30031 wind找不到!! 有标的
# TODO H30086 wind找不到!! 有标的
# TODO H30104 中证香港300非周期指数(港币) 没有标的, 官网,wind不一致, wind无变动
# TODO H30105 中证香港300地产指数(港币) 没有标的, 官网,wind不一致, wind无变动
# TODO H30234 中证香港300动态指数(港币) 没有标的, 官网,wind不一致, wind无变动
# TODO H30332 中证香港50指数(HKD) 没有标的, 官网,wind不一致, wind无变动
# TODO H30419 中证香港200动量指数 没有标的, 官网,wind不一致, wind无变动
# TODO H30455 中证沪深港400指数 没有标的, 官网,wind不一致, wind无变动
# TODO H30456 中证沪深港700指数 没有标的, 官网,wind不一致, wind无变动
# TODO H30469 中证沪深港700原材料指数 没有标的, 官网,wind不一致, wind无变动
# TODO H30479 中证沪深港1100原材料指数 没有标的, 官网,wind不一致, wind无变动
# TODO H30548 中证沪深港1100非周期指数 没有标的, 官网,wind不一致, wind无变动
# TODO H30549 中证沪深港1100地产指数 没有标的, 官网,wind不一致, wind无变动
index_code_list = list(set(index_code_list).difference(set(code_not_equals_wind_csindex)))
index_code_list.sort()
print("found %s index from db" % str(len(index_code_list)))
def convert_code(code, exchange):
code_len = len(str(code))
if exchange == 'HKG' and code_len < 4:
zero_len = 4 - code_len
return '0' * zero_len + code
return code
def remove_suffix(code):
index = code.find('.')
if index > 0:
return code[0:index]
return code
def get_stock_code_from_db(code):
# sql = "select id, code from stock_hs where biz_date='2017-09-29' and trade_status not in ('终止上市', '暂停上市') and code not like '90%' and code not like '20%' order by code"
# get index constituent from database
sql = "select id, stock_code from index_constituent_current where index_code = '" + code + "'"
df = pd.read_sql(sql, conn, index_col="id")
stock_code_list_db = list(df['stock_code'])
stock_code_list_db = map(remove_suffix, stock_code_list_db)
stock_code_list_db = set(stock_code_list_db)
return stock_code_list_db
def append_suffix(code, exchange):
if exchange == 'SHH':
return code + '.SH'
elif exchange == 'SHZ':
return code + '.SZ'
elif exchange == 'CPT':
return code + '.IB'
def update(index_code):
# print("index_code: " + index_code)
# get stock code from db
stock_code_list_db = get_stock_code_from_db(index_code)
# get index constituent from csindex website
try:
df = pd.read_excel('http://www.csindex.com.cn/uploads/file/autofile/cons/' + index_code + 'cons.xls', converters={4: str})
df = df.set_index(df.columns[4])
except urllib2.HTTPError as error:
db_len = len(stock_code_list_db)
if db_len == 0:
# maybe 404 error
print('index %s occurs exception: %s, db size is 0' % (index_code, str(error)))
else:
print('index %s occurs exception: %s, db size is %s' % (index_code, str(error), str(db_len)))
return
except StandardError as error:
print('index %s occurs exception: %s' % (index_code, str(error)))
return
exchange_list = df.iloc[:, 6]
stock_name_list = df.iloc[:, 4]
stock_code_list_new = df.index
stock_code_list_new = map(convert_code, stock_code_list_new, exchange_list)
stock_code_list_new = set(stock_code_list_new)
# to be added
stock_code_add = stock_code_list_new.difference(stock_code_list_db)
# to be removed
stock_code_remove = stock_code_list_db.difference(stock_code_list_new)
if len(stock_code_remove) > 0:
print("index %s to be removed %s" % (index_code, stock_code_remove))
if len(stock_code_add) > 0:
print("index %s to be added %s" % (index_code, stock_code_add))
# for code in stock_code_add:
# cursor = conn.cursor()
# sql = "insert into index_constituent_current (index_code, stock_code, stock_name) values (%s, %s, %s) "
# # 入库
# cursor.execute(sql, [index_code, append_suffix(code, exchange_list[code]), stock_name_list[code]])
# # 提交
# conn.commit()
# sql = "insert into index_constituent_history (index_code, biz_date, stock_code, stock_name, status) values (%s, %s, %s, %s, %s) "
# # 入库
# cursor.execute(sql, [index_code, '2017-10-30', append_suffix(code, exchange_list[code]), stock_name_list[code], '1'])
# # 提交
# conn.commit()
# cursor.close()
# start = time.time()
# pool = threadpool.ThreadPool(10)
# requests = threadpool.makeRequests(update, index_code_list)
# [pool.putRequest(req) for req in requests]
# pool.wait()
for index_code in index_code_list:
update(index_code)