Przeglądaj źródła

stock data sync updated

xjconline 2 tygodni temu
rodzic
commit
1c0c07f420
2 zmienionych plików z 82 dodań i 68 usunięć
  1. 7 11
      src/tools/remove_duplicates.py
  2. 75 57
      src/tools/sync_stock_data.py

+ 7 - 11
src/tools/remove_duplicates.py

@@ -1,35 +1,31 @@
 # -*- encoding: utf-8 -*-
-import pymysql
+import MySQLdb
 
 
 def main():
-    con = pymysql.connect(host='172.29.110.52',
+    con = MySQLdb.connect(host='172.29.110.52',
                           user='wanzb',
                           password='wanzb890*()',
-                          database='wanzb')
+                          database='wanzb',
+                          charset='utf8mb4')
     with con:
         # remove duplicate stocks
         with con.cursor() as cur:
-            n_rows = cur.execute('delete from stock where name in (select name from (select name,count(name) as c from stock group by name having c>1) as t0) and code is NULL order by name')
-            print('{} stocks removed'.format(n_rows))
+            cur.execute('DELETE `s1` FROM `stock` `s1` INNER JOIN `stock` `s2` WHERE `s1`.`id` > `s2`.`id` AND `s1`.`name` = `s2`.`name`')
         con.commit()
 
         # remove duplicate player's records
         with con.cursor() as cur:
             # find target table
-            n_rows = cur.execute('show tables like "player_record_%"')
+            cur.execute('SHOW TABLES LIKE "player_record_%"')
             tables = [i[0] for i in cur.fetchall()]
             tables.sort(key=lambda x: int(x[14:]))
             target_table = tables[-1]
 
             # remove duplicate entries in `player_record` util not fund
-            n_rows = -1
-            while n_rows != 0:
-                n_rows = cur.execute('delete from {} where id in (select mid from (select max(id) mid,user_id,stock_date,count(user_id) c from {} group by user_id,stock_date having c > 1) t0)'.format(target_table, target_table))
-                print('{} player_records removed'.format(n_rows))
+            cur.execute('DELETE `r1` FROM `{0}` `r1` INNER JOIN `{0}` `r2` WHERE `r1`.`id` > `r2`.`id` AND `r1`.`user_id` = `r2`.`user_id` AND `r1`.`stock_date` = `r2`.`stock_date`'.format(target_table))
         con.commit()
 
 
 if __name__ == "__main__":
     main()
-

+ 75 - 57
src/tools/sync_stock_data.py

@@ -1,66 +1,84 @@
-#coding:utf-8
-import os,json
-import time
+# -*- encoding: utf-8 -*-
+
+import os
 import datetime
-import sys
-import django
-from django.core.cache import cache
-from django.db import connection
-from django.db.models import Q,Sum,Count,F
 
-sys.path.append('/mnt/wzbapi/src')
-os.environ['DJANGO_SETTINGS_MODULE'] = 'settings'
-django.setup()
+import pandas
+import MySQLdb
+import requests
+import tushare as ts
 
-import common.models as cm
-import common.common_functions as ccf
-from loadcode import get_stock_list
+_workdir = os.path.dirname(os.path.abspath(__file__))
 
 
-def sync_stock_data():
-    """
-    同步股票数据
-    """
-    stock_list = get_stock_list()
-    new_codes = []
-    
-    # update name by code
-    for item in stock_list:
-        code, name = item
-        objs = cm.Stock.objects.filter(code=code).order_by('pk')
-        if objs:
-            objs[0].name = name
-            objs[0].save()
-            # delete duplicate codes
-            for obj in objs[1:]:
-                obj.delete()
-        else:
-            new_codes.append((code, name))
+def download_stocks():
+    filename = os.path.join(_workdir, 'stocks.csv')
+    ts.set_token('99800760dfbbdf7d0b4124f6d4be39ebac6a093628f0bd19a7432486')
+    pro = ts.pro_api()
+    df = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')
+    df.to_csv(filename, index=False)
+    return df
 
-    # update code by new name
-    for item in new_codes:
-        code, name = item
-        objs = cm.Stock.objects.filter(name=name).order_by('pk')
-        if objs:
-            idx = 0
-            for obj in objs:
-                idx += 1
-                # find empty code, update, break
-                if obj.code is None:
-                    obj.code = code
-                    obj.save()
-                    break
-            # no empty code, create one
-            else:
-                cm.Stock.objects.create(code=code, name=name)
-            # delete duplicate names
-            for obj in objs[idx:]:
-                obj.delete()
-        # no name, create one
-        else:
-            cm.Stock.objects.create(code=code, name=name)
 
+def update_stock_names(stocks):
+    s = requests.Session()
+    s.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 Edg/126.0.0.0'
+    s.headers['Referer'] = 'https://finance.sina.com.cn'
+    for i in range(0, len(stocks), 100):
+        sub = [j.split('.') for j in stocks[i: i+100]['ts_code']]
+        query =  ','.join('{}{}'.format(j[1].lower(), j[0]) for j in sub)
+        url = 'https://hq.sinajs.cn/list={}'.format(query)
+        print(url)
+        r = s.get(url)
+        print(r.status_code)
+        if r != 200:
+            break
+        t = r.text
+        print(t)
+        if t == 'Forbidden':
+            break
+
+
+def download_bonds():
+    filename = os.path.join(_workdir, 'bonds.csv')
+    ts.set_token('74699ca03b99884912569393dbd401b983bbf4797dde89653ed9e184')
+    pro = ts.pro_api()
+    df = pro.cb_basic(fields="ts_code,bond_short_name,stk_code,stk_short_name,list_date,delist_date")
+    df.to_csv(filename, index=False)
+    return df
 
-if __name__ == "__main__":
-    sync_stock_data()
 
+def main():
+    stocks = download_stocks()
+    # update_stock_names(stocks)
+    bonds = download_bonds()
+    data = [(i.split('.')[0], j) for i, j in stocks[['ts_code', 'name']].values]
+    data.extend([(i.split('.')[0], j) for i, j in bonds[['ts_code', 'bond_short_name']].values])
+
+    with MySQLdb.connect(host='172.29.110.52',
+                         user='wanzb',
+                         password='wanzb890*()',
+                         database='wanzb',
+                         charset='utf8mb4') as con:
+        try:
+            with con.cursor() as cur:
+                data_new = []
+                for i, j in data:
+                    cur.execute('UPDATE `stock` SET `name` = %s WHERE `code` = %s', (j, i))
+                    if con.info().startswith('Rows matched: 0'):
+                        data_new.append((i, j))
+                cur.execute('DELETE `s1` FROM `stock` `s1` INNER JOIN `stock` `s2` WHERE `s1`.`id` > `s2`.`id` AND `s1`.`name` = `s2`.`name`')
+                data_newnew = []
+                now = datetime.datetime.now()
+                for i, j in data_new:
+                    cur.execute('UPDATE `stock` SET `code` = %s WHERE `name` = %s', (i, j))
+                    if con.info().startswith('Rows matched: 0'):
+                        data_newnew.append((i, j, now))
+                cur.executemany('INSERT INTO `stock` (`code`, `name`, `ctime`) VALUES (%s, %s, %s)', data_newnew)
+        except MySQLdb.IntegrityError:
+            con.rollback()
+        con.commit()
+
+
+if __name__ == "__main__":
+    main()