仿聚宽模拟交易系统之三

博主: Simon Lin 创建于: Jun 15, 2020 更新于: Jun 15, 2020
分类: stock
标签: finance stock

仿聚宽模拟交易系统之三–数据源问题

前文说过,数据源的问题很多。这里来具体说说是怎么解的?

我获取数据采用了免费的baostock,用了一段时间后的感受,老实说,最大好处就是免费。数据完整性,数据质量等真的都是一般,只是要找几年前的数据,从网上直接爬取的方式效率太低,而且数据不好找。

别的废话不多说,先上代码,前面的是数据获取的代码,另一个是存取数据库的代码,数据库是用本地建的mysql,表结构先不另贴了。之后找个时间把全套搞到github上去。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
import baostock as bs
import functools
import pandas as pd
import os
from datetime import datetime,date,timedelta
import numpy as np
import decimal

try:
from data import database,file_store
except:
import database,file_store

def get_all_stock_basic(type=None,status=None):
data_list = None
data_list = file_store.get_stock_basic_from_file()
if data_list is None:
data_list = fetch_all_stock_basic()
if not type is None:
data_list=data_list.loc[data_list["type"] == type]
if not status is None:
data_list=data_list.loc[data_list["status"] == status]
data_list['code']=data_list['code'].map(lambda x: "{:0>6d}".format(x))
return data_list

def fetch_all_stock_basic():
bs.login()
rs = bs.query_stock_basic()
bs.logout()
data_list = rs.get_data()
data_list['code']=data_list['code'].map(lambda x:x[-6:])
data_list["type"]=data_list['type'].astype(int)
data_list["status"]=data_list['status'].astype(int)
#格式化
database.insert_stock_basic(data_list)
data_list = format_stock_basic(data_list)
file_store.save_stock_basic_to_file(data_list)
data_list = file_store.get_stock_basic_from_file()
return data_list

#code:可以是股票代码或者名称
def query_stock_basic(code=None,name=None):
data_list = None
if code is None and name is None:
data_list = get_all_stock_basic() #取全量
if data_list is None:
db = database.get_stock_basic(code=code_translate(baostock_code=code),code_name=name)
#格式化
data_list = format_stock_basic(db)
return data_list

def format_stock_basic(df):
#格式化
df['code']=df['code'].map(lambda x:x[-6:])
df["ipoDate"]=pd.to_datetime(df['ipoDate'])
df["outDate"]=pd.to_datetime(df['outDate'])
df["type"]=df['type'].astype(int)
df["status"]=df['status'].astype(int)
return df

def format_stock_price(df):
#格式化
#df['code']=df['code'].map(lambda x:x[-6:])
df["open"]=df['open'].astype(float)
df["close"]=df['close'].astype(float)
df["preclose"]=df['preclose'].astype(float)
return df

def fetch_all_stock_industry():
bs.login()
rs = bs.query_stock_industry()
bs.logout()
data_list = rs.get_data()
data_list['code']=data_list['code'].map(lambda x:x[-6:])
database.insert_stock_industry(data_list)
return data_list

def get_all_stock_industry():
return database.get_stock_industry()

def fetch_history_price(code,field='date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,isST',start_date=None,end_date=None,frequency='d',adjustflag='3'):
'''获取价格相关数据\n
code:股票代码,输入直接的股票代码,如招商银行600036\n
start_date=None,查询开始时间\n
end_date=None,查询终止时间\n
frequency:频率,默认,天(d)\n
'''
data_list=None
#以下会强制更新
bs.login()
rs = bs.query_history_k_data_plus(code_translate(code=code,type='baostock'),field,start_date=start_date, end_date=end_date,frequency=frequency, adjustflag=adjustflag)
bs.logout()
data = []
while (rs.error_code == '0') & rs.next():
row=rs.get_row_data()
data.append(row)
data_list = pd.DataFrame(data, columns=rs.fields)
if not data_list.empty:
#规范化
data_list['code']= data_list['code'].map(lambda x:x[-6:])
data_list = data_list.loc[data_list["tradestatus"] == "1"] #排除停牌日期
database.insert_stock_daily_price(data_list,code=code,start_date=start_date,end_date=end_date)
#用日期做索引,并格式化
data_list = data_list.set_index(keys='date')

else:
print(code+'未获取到数据')
data_list = None
return data_list

def get_history_price(code,field='date,open,high,low,close,volume,amount',start_date=None,end_date=None,frequency='d',adjustflag='3'):
'''获取价格相关数据\n
code:股票代码,输入直接的股票代码,如招商银行600036\n
start_date=None,查询开始时间\n
end_date=None,查询终止时间\n
frequency:频率,默认,天(d)\n
'''
code = code_translate(baostock_code= code)
data_list=None
#检查是否已在数据库,取到任意一部分数据,也返回
data_list = database.get_stock_daily_price(code,start_date,end_date)
if (not data_list is None) and (not data_list.empty):
data_list = data_list.set_index(keys='date')
if adjustflag !='3':
data_list = restore_right(df = data_list,code=code)
return data_list
return data_list

def restore_right(df,code,field='open,high,low,close,volume,preclose',adjustflag='2'):
'''获取复权相关数据\n
df: 不复权的数据集
code:股票代码,输入直接的股票代码,如招商银行600036\n
field:需要做复权的字段
adjustflag=2 复权类型,2为前复权,1为后复权\n
'''
factors = get_stock_adjust_factor(code=code)
if factors is None:
df['close']=df['close'].astype('float64')
df['high']=df['high'].astype('float64')
df['low']=df['low'].astype('float64')
df['open']=df['open'].astype('float64')
df['volume']=df['volume'].astype('float64')
df['preclose']=df['preclose'].astype('float64')
return df
data_list = df[['open','high','low','close','volume','preclose']]
data_list = pd.merge(data_list, factors, how='outer', left_index=True,right_on='dividOperateDate')
data_list.set_index(inplace=True,keys='dividOperateDate')
data_list.sort_index(inplace=True)
data_list['foreAdjustFactor'] = data_list['foreAdjustFactor'].fillna(method='ffill')
data_list['backAdjustFactor'] = data_list['backAdjustFactor'].fillna(method='ffill')
data_list['foreAdjustFactor'] = data_list['foreAdjustFactor'].astype('float64')
data_list['backAdjustFactor'] = data_list['backAdjustFactor'].astype('float64')
#复权变换
if adjustflag=='2':
data_list['close']=data_list['close'].astype('float64')*data_list['foreAdjustFactor']
data_list["high"]=data_list['high'].astype('float64')*data_list['foreAdjustFactor']
data_list["low"]=data_list['low'].astype('float64')*data_list['foreAdjustFactor']
data_list['open']=data_list['open'].astype('float64')*data_list['foreAdjustFactor']
data_list['volume']=data_list['volume'].astype('float64')/data_list['foreAdjustFactor']
data_list['preclose']=data_list['preclose'].astype('float64')*data_list['foreAdjustFactor']
elif adjustflag =='1':
data_list['close']=data_list['close'].astype('float64')*data_list['backAdjustFactor']
data_list["high"]=data_list['high'].astype('float64')*data_list['backAdjustFactor']
data_list["low"]=data_list['low'].astype('float64')*data_list['backAdjustFactor']
data_list['open']=data_list['open'].astype('float64')*data_list['backAdjustFactor']
data_list['volume']=data_list['volume'].astype('float64')/data_list['backAdjustFactor']
data_list['preclose']=data_list['preclose'].astype('float64')*data_list['backAdjustFactor']
data_list = data_list[~pd.isna(data_list['open'])]
df['open'] = data_list['open']
df['close'] = data_list['close']
df['low'] = data_list['low']
df['high'] = data_list['high']
df['volume'] = data_list['volume']
df['preclose'] = data_list['preclose']
return df

get_daily_price = functools.partial(get_history_price,field='date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,isST',frequency='d',adjustflag='2')

def get_daily_price_limit_ext(code,start_date=None,end_date=None):
'''处理涨停跌停价格的逻辑,首先处理ST股,再处理新股涨幅限制'''
data_list = get_daily_price(code=code,start_date=start_date,end_date=end_date)
if data_list is None or data_list.empty:
return data_list
#区别是否是ST股
data_list['high_limit'] = data_list.apply(lambda x:round(1.05*x['preclose'],2) if x['isST']==1 else round(1.1*x['preclose'],2),axis=1)
data_list['low_limit'] = data_list.apply(lambda x:round(0.95*x['preclose'],2) if x['isST']==1 else round(0.9*x['preclose'],2),axis=1)
if code is not None:
#处理新股首日44%
basic = query_stock_basic(code=code)
if not basic.empty:
start = basic.iloc[0]['ipoDate']
if start in data_list.index:
data_list.loc[start]['high_limit']=round(1.44*data_list.loc[start]['preclose'],2)
data_list.loc[start]['low_limit']=round(0.56*data_list.loc[start]['preclose'],2)
return data_list

def code_translate(code=None,baostock_code=None,easymoney_code=None,type='baostock'):
'''将证券编号转换成baostock的编号或者反向转换'''
if baostock_code is not None:
if len(str(baostock_code))>6:
last = baostock_code[-6:]
else:
last = baostock_code
elif code is not None:
if type=='baostock':
if code[0]=='6':
last = 'sh.'+code
elif code[0]=='3':
last = 'sz.'+code
elif code[0]=='0':
last = 'sz.'+code
else:
last = code
elif type=='easymoney':
if code[0]=='6':
last = '1.'+code
elif code[0]=='3':
last = '0.'+code
elif code[0]=='0':
last = '0.'+code
else:
last = code
elif easymoney_code is not None:
if len(str(easymoney_code))>6:
last = easymoney_code[-6:]
else:
last = easymoney_code
return last


def fetch_stock_adjust_factor(code,start_date='2010-01-01',end_date=None):
code = code_translate(code=code,type='baostock')
bs.login()
rs = bs.query_adjust_factor(code,start_date=start_date,end_date=end_date)
bs.logout()
data_list = rs.get_data()
if not data_list.empty:
data_list['code']=data_list['code'].map(lambda x:x[-6:])
print(code+'因子获取中')
database.insert_adjust_factor(data_list)
return data_list

def get_stock_adjust_factor(code):
df = database.get_adjust_factor(code=code)
return df

if __name__ == '__main__':
a = fetch_stock_adjust_factor('600320',start_date='2000-01-01', end_date='2020-04-30')
#a = query_stock_basic()
#a = get_all_stock_industry()
print(a)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# 使用with简化连接过程,每次都连接关闭很麻烦,使用上下文管理,简化连接过程
import pymysql
import contextlib
import pandas as pd
import traceback
from datetime import datetime,date,timedelta
from sqlalchemy import create_engine

# 定义上下文管理器,连接后自动关闭连接
@contextlib.contextmanager
def mysql(host='127.0.0.1', port=3306, user='root', passwd='123456', db='quant', charset='utf8'):
conn = pymysql.connect(host=host, port=port, user=user, passwd=passwd, db=db, charset=charset)
cursor = conn.cursor(cursor=pymysql.cursors.DictCursor)
try:
yield cursor
conn.commit()
except Exception as e:
print('database error:'+str(e.args))
print(traceback.format_exc())
conn.rollback()
finally:
cursor.close()
conn.close()


def get_stock_basic(code=None, code_name=None):
'''从数据库获取股票基本信息'''
with mysql() as cursor:
sql = "select code,code_name,ipo_date as ipoDate,out_date as outDate,type,status from `quant`.`stock_basic` where 1=1 "
if code is not None:
sql = sql + " and code='"+code+"'"
if code_name is not None:
sql = sql + " and code_name='"+code_name+"'"
cursor.execute(sql)
data = cursor.fetchall()
df = pd.DataFrame.from_dict(data,orient='columns')
#df.columns=['code', 'code_name', 'ipoDate', 'outDate', 'type', 'status']
return df


def insert_stock_basic(df):
# 执行sql
with mysql() as cursor:
sql = "select code,code_name,ipo_date as ipoDate,out_date as outDate,type,status from `quant`.`stock_basic` where 1=1 "
cursor.execute(sql)
old = cursor.fetchall()
old_df = pd.DataFrame.from_dict(old,orient='columns')

sql = "INSERT INTO `quant`.`stock_basic`(`code`,`code_name`,`ipo_date`,`out_date`,`type`,`status`)VALUES(%s,%s,%s,%s,%s,%s);"

#df2 = df.astype(object).where(pd.notnull(df), None)
df2=df
df2['outDate'] = df2['outDate'].map(lambda x:None if x=='' else x)
df2 = df2.append(old_df)
df2.drop_duplicates(subset=['code'],keep=False,inplace=True)
val = df2.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)


def get_stock_daily_price(code=None,start_date=None,end_date=None):
'''从数据库获取股票价格'''
with mysql() as cursor:
sql = "select date,code,open,high,low,close,preclose,volume,amount,adjust_flag as adjustFlag,turn,trade_status as tradeStatus,is_st as isST from `quant`.`stock_daily_price` where 1=1 "

if code is not None:
sql = sql + " and code='"+code+"'"

if start_date is not None:
sql = sql + " and date>='"+start_date+"'"

if end_date is not None:
sql = sql + " and date<='"+end_date+"'"

sql = sql + " order by `date` asc "

cursor.execute(sql)
data = cursor.fetchall()
if len(data)>0:
df = pd.DataFrame.from_dict(data,orient='columns')
#df.columns=['date','code','open','high','low','close','preclose','volume','amount','adjustFlag','turn','tradeStatus','isST']
else:#空数据集
df = pd.DataFrame(columns=['date','code','open','high','low','close','preclose','volume','amount','adjustFlag','turn','tradeStatus','isST'])
# df.set_index(keys='date')
return df

def insert_stock_daily_price(df,code=None,start_date=None,end_date=None):
if code is not None:#start_date,end_date为none也没事
old_df = get_stock_daily_price(code,start_date,end_date)
# 执行sql
with mysql() as cursor:
df2=pd.DataFrame(columns=['date','code','open','high','low','close','preclose','volume','amount','adjustFlag','turn','tradeStatus','isST'])
df2['code']= df['code'].map(lambda x:x[-6:])
df2['date']=df['date']
df2['open']=df['open'].astype(float)
df2['high']=df['high'].astype(float)
df2['low']=df['low'].astype(float)
df2['close']=df['close'].astype(float)
df2['preclose']=df['preclose'].astype(float)
df2['volume']=df['volume'].astype(int)
df2['amount']=df['amount'].astype(float)
df2['turn']=df['turn'].astype(float)
df2['adjustFlag']=df['adjustflag'].astype(int)
df2['tradeStatus']=df['tradestatus'].astype(int)
df2['isST']=df['isST'].map(lambda x: 0 if x=='' else int(x))
sql = "INSERT INTO `quant`.`stock_daily_price`(date,code,open,high,low,close,preclose,volume,amount,adjust_flag,turn,trade_status,is_st)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
df2 = df2.append(old_df)
df2['date']=df2['date'].map(lambda x:str(x))#日期型过滤似乎有问题,直接转str
df2.drop_duplicates(subset=['code','date'],keep=False,inplace=True)
if df2 is not None and not df2.empty:
val = df2.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)

def save_winner_list(df):
if df is not None and not df.empty:
try:
engine = create_engine('mysql+pymysql://root:123456@127.0.0.1:3306/quant')
df_db = df[['SCode', 'SName', 'ClosePrice', 'Chgradio', 'Dchratio', 'JmMoney', 'Turnover', 'Ntransac', 'Ctypedes', 'Oldid', 'Smoney', 'BMoney', 'ZeMoney', 'Tdate', 'JmRate',
'ZeRate', 'Ltsz', 'Rchange1dc', 'Rchange1do', 'Rchange2dc', 'Rchange2do', 'Rchange3dc', 'Rchange3do', 'Rchange1m', 'Rchange3m', 'Rchange6m', 'Rchange1y', 'DP']]
df_db.to_sql('winner_list', engine, index=False,if_exists='append')
except Exception as e:
print('龙虎榜列表保存发生错误:'+str(e.args))
print(traceback.format_exc())

def get_winner_list(date=None):
engine = create_engine('mysql+pymysql://root:123456@127.0.0.1:3306/quant')
sql_cmd = 'SELECT * FROM `quant`.`winner_list` where 1=1 '
if date is not None:
sql_cmd = sql_cmd + "and Tdate='"+date+"'"
df_db = pd.read_sql(sql=sql_cmd, con=engine)
return df_db

def get_winner_list_detail(date=None,code=None,type=None):
df_db = None
engine = create_engine('mysql+pymysql://root:123456@127.0.0.1:3306/quant')
sql_cmd = 'SELECT `date`,`code`,`type`,`order`,stock_exchange,buy_amount,buy_ratio,sell_amount,sell_ratio,net_amount FROM `quant`.`winner_list_detail` where 1=1'
if date is not None:
sql_cmd = sql_cmd + " and `date`='"+date+"'"
if code is not None:
sql_cmd = sql_cmd + " and `code`='"+code+"'"
if type is not None:
sql_cmd = sql_cmd + " and `type`='"+type+"'"
df_db = pd.read_sql(sql=sql_cmd, con=engine)
return df_db

def insert_winner_list_detail(df,date=None,code=None,type=None):
with mysql() as cursor:
sql = "INSERT INTO `quant`.`winner_list_detail`(`date`,`code`,`type`,`order`,stock_exchange,buy_amount,buy_ratio,sell_amount,sell_ratio,net_amount)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
if df is not None and not df.empty:
val = df.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)

def insert_money_flow(df,date=None,code=None):
df2 = pd.DataFrame(columns=df.columns)
for index,row in df.iterrows():
row_old = get_money_flow(date=row['date'],code=row['code'])
if row_old is None:
df2 = df2.append(row)

with mysql() as cursor:
sql = "INSERT INTO `quant`.`stock_money_flow`(`date`,`code`,`main_amount`,`small_amount`,`med_amount`,`big_amount`,`huge_amount`,`main_rate`,`small_rate`,`med_rate`,`big_rate`,`huge_rate`,`close`,`rate`)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
if df2 is not None and not df2.empty:
val = df2.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)

def get_money_flow(date=None,code=None):
with mysql() as cursor:
sql = "select `date`,`code`,`main_amount`,`small_amount`,`med_amount`,`big_amount`,`huge_amount`,`main_rate`,`small_rate`,`med_rate`,`big_rate`,`huge_rate`,`close`,`rate` from `quant`.`stock_money_flow` where 1=1 "
if date is not None:
sql = sql + " and `date`='"+date+"'"
if code is not None:
sql = sql + " and `code`='"+code+"'"
if date is not None and code is not None:
sql = sql + " limit 1"
cursor.execute(sql)
data = cursor.fetchall()
if len(data)>0:
df = pd.DataFrame.from_dict(data,orient='columns')
else:#空数据集
df = None
return df


def insert_main_finance(df,date=None,code=None):
df2 = pd.DataFrame(columns=df.columns)
for index,row in df.iterrows():
row_old = get_main_finance(date=row['date'],code=row['code'])
if row_old is None:
df2 = df2.append(row)

with mysql() as cursor:
sql = "INSERT INTO `quant`.`stock_main_finance`(`date`,`code`,`jbmgsy`,`kfmgsy`,`xsmgsy`,`mgjzc`,`mggjj`,`mgwfply`,`mgjyxjl`,`yyzsr`,`mlr`,`gsjlr`,`kfjlr`,`yyzsrtbzz`,`gsjlrtbzz`,`kfjlrtbzz`,`yyzsrgdhbzz`,`gsjlrgdhbzz`,`kfjlrgdhbzz`,`jqjzcsyl`,`tbjzcsyl`,`tbzzcsyl`,`mll`,`jll`,`sjsl`,`yskyysr`,`xsxjlyysr`,`jyxjlyysr`,`zzczzy`,`yszkzzts`,`chzzts`,`zcfzl`,`ldzczfz`,`ldbl`,`sdbl`)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"
if df2 is not None and not df2.empty:
val = df2.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)

def get_main_finance(date=None,code=None):
with mysql() as cursor:
sql = "select `date`,`code`,`jbmgsy`,`kfmgsy`,`xsmgsy`,`mgjzc`,`mggjj`,`mgwfply`,`mgjyxjl`,`yyzsr`,`mlr`,`gsjlr`,`kfjlr`,`yyzsrtbzz`,`gsjlrtbzz`,`kfjlrtbzz`,`yyzsrgdhbzz`,`gsjlrgdhbzz`,`kfjlrgdhbzz`,`jqjzcsyl`,`tbjzcsyl`,`tbzzcsyl`,`mll`,`jll`,`sjsl`,`yskyysr`,`xsxjlyysr`,`jyxjlyysr`,`zzczzy`,`yszkzzts`,`chzzts`,`zcfzl`,`ldzczfz`,`ldbl`,`sdbl` from `quant`.`stock_main_finance` where 1=1 "
if date is not None:
sql = sql + " and `date`='"+date+"'"
if code is not None:
sql = sql + " and `code`='"+code+"'"
if date is not None and code is not None:
sql = sql + " limit 1"
cursor.execute(sql)
data = cursor.fetchall()
if len(data)>0:
df = pd.DataFrame.from_dict(data,orient='columns')
else:#空数据集
df = None
return df

def insert_stock_industry(df):
df2 = pd.DataFrame(columns=df.columns)
for index,row in df.iterrows():
row_old = get_stock_industry(date=row['updateDate'],code=row['code'])
if row_old is not None:
old_update=row_old.iloc[0]['updateDate'].strftime('%Y-%m-%d')
if old_update==row['updateDate']:
continue
df2=df2.append(row)
with mysql() as cursor:
sql = "INSERT INTO `quant`.`stock_industry`(`updateDate`,`code`,`code_name`,`industry`,`industryClassification`)VALUES(%s,%s,%s,%s,%s);"
if df2 is not None and not df2.empty:
val = df2.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)

def get_stock_industry(date=None,code=None):
with mysql() as cursor:
sql = "select `updateDate`,`code`,`code_name`,`industry`,`industryClassification` from `quant`.`stock_industry` where 1=1 "
if date is not None:
sql = sql + " and `updateDate`<='"+date+"'"
if code is not None:
sql = sql + " and `code`='"+code+"'"
sql = sql + " order by updateDate desc"
cursor.execute(sql)
data = cursor.fetchall()
if len(data)>0:
df = pd.DataFrame.from_dict(data,orient='columns')
#去除code重复的记录,只取一条
df.drop_duplicates(subset=['code'],keep='first',inplace=True)
else:#空数据集
df = None
return df

def insert_adjust_factor(df):
df2 = pd.DataFrame(columns=df.columns)
for index,row in df.iterrows():
row_old = get_adjust_factor(date=row['dividOperateDate'],code=row['code'])
if row_old is not None:
old_update=row_old.iloc[0]['dividOperateDate'].strftime('%Y-%m-%d')
if old_update==row['dividOperateDate']:
continue
df2=df2.append(row)
with mysql() as cursor:
sql = "INSERT INTO `quant`.`stock_adjust_factor`(`code`,`dividOperateDate`,`foreAdjustFactor`,`backAdjustFactor`,`adjustFactor`)VALUES(%s,%s,%s,%s,%s);"
if df2 is not None and not df2.empty:
val = df2.apply(tuple, axis=1).tolist()
cursor.executemany(sql,val)

def get_adjust_factor(date=None,code=None):
with mysql() as cursor:
sql = "select `code`,`dividOperateDate`,`foreAdjustFactor`,`backAdjustFactor`,`adjustFactor` from `quant`.`stock_adjust_factor` where 1=1 "
if date is not None:
sql = sql + " and `dividOperateDate`='"+date+"'"
if code is not None:
sql = sql + " and `code`='"+code+"'"
sql = sql + " order by dividOperateDate desc"
cursor.execute(sql)
data = cursor.fetchall()
if len(data)>0:
df = pd.DataFrame.from_dict(data,orient='columns')
else:#空数据集
df = None
return df

if __name__ == '__main__':
a = get_stock_basic()
#a = query_stock_basic(type=1,status=1)
print(a)

哈哈哈,能看到这里的,我决定再给点干货。

获取股票数据有一个很关键的问题是复权。而在模拟交易时,最好的方法就是用不复权价格。可是一旦你的价格不复权,在碰到分红配股的时候,价格的突变就要对于持仓的数据进行调整。所以,必须要去获取所有股票的分红配股的全部数据,然后到那天的时候,像券商一样的给你的股票数量进行增加,账户余额进行变更。在这里,我先使用了前复权的方法,不处理这种券商操作,以降低复杂度。前复权也采用了简单的算法,并不精确。复权方法restore_right,调整系数从get_adjust_factor获取,简单的乘法。特别是时间越早,越不精确。小玩玩,对最近模拟一下就凑合用了。

另一个模拟碰到的数据问题是,最好有一个交易日数据。在模拟时钟运行时,很多非交易日是可以直接跳过的,所以有这样一个数据,可以大大的加快模拟的进程。有时候在算法里,也会有用到这个数据。当然,如果你的数据源不提供这个数据的时候,一个简单的替代方案是拉一下对应指数数据,指数有数据,就是交易日。


打赏 支付宝打赏 微信打赏

如果文章对你有帮助,欢迎点击上方按钮打赏作者