先从深交所开始:直接上传源码:

from bs4 import BeautifulSoup
from lxml import etree
import pandas as pd
import akshare as ak
import datetime
import requests
import csv
from contextlib import closing
import time
from urllib.request import urlopen
import requests
from urllib import request
from io import BytesIO
import gzip
import random

#设定获取数据的日期
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()

for j in date['trade_date']:
    print(j)
    #session = requests.Session()
    # s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')
    # s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')
    #url = 'http://www.szse.com/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=%(r)s'%{'j':j,'r':random.random()}
        #'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}
    url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=option_hyfxzb&TABKEY=tab1&txtSearchDate=%(j)s&random=0.5379373345285146'%{'j':j}
    print(url)
    response = requests.get(url)
    #print(response.content)
"""
这一块本人很不满意,先保存再读取,多此一举。主要是因为直接显示发现乱码,本人无论如何都无法解析为正常结果,
只能先放到xlsx,之后重新读取保存。请诸位大虾见到给小弟一点帮助,如何解决。多谢!!!!!!!!!!
""""
    with open('D:/结果存放3.xlsx', 'ab') as file_handle:  
        file_handle.write(response.content)  # 写入
        # file_handle.write('\n')
        df= pd.read_excel('D:/结果存放3.xlsx')
        df['trade_date'] = j
        df1 = df1.append(df)

df1.to_csv('szse.csv')

爬取上交所

import csv
from contextlib import closing
import time
from urllib.request import urlopen
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
#//query.sse.com.cn/derivative/downloadRisk.do?trade_date=20201207&productType=0
for j in date['trade_date']:
    s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')
    s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')
    url = 'http://query.sse.com.cn/derivative/downloadRisk.do?trade_date=%(YM)s%(D)s&productType=0'%{'YM':s,'D':s2}

    # 读取数据
    with closing(requests.get(url, stream=True)) as r:
        f = (line.decode('gbk') for line in r.iter_lines())
        reader = csv.reader(f,delimiter=',', quotechar=',')

        for row in reader:
            print(row)
            #print(row.reverse())
            df = pd.DataFrame(row)
            df1=df1.append(df.T)

df1.to_csv('sse.csv')

爬取中金所

import datetime
import requests
from lxml import etree
import pandas as pd
import akshare as ak
import time
date = ak.tool_trade_date_hist_sina()
date =date.loc[date['trade_date']>='2019-01-01']
df1 = pd.DataFrame()
for j in date['trade_date']:
    s = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%Y%m')
    s2 = datetime.datetime.strptime(j,'%Y-%m-%d').strftime('%d')
    url = 'http://www.cffex.com.cn/sj/hqsj/rtj/%(YM)s/%(D)s/index.xml?id=39'%{'YM':s,'D':s2}
    response = requests.get(url)
    p = etree.HTML((response.content))

    df = pd.DataFrame()
    for i in range(1,len(p.xpath('//dailydata'))):
        #print('//dailydata[{}]/instrumentid/text()'.format(i))
        # print(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))

        #df.loc[i,'instrument']=p.xpath('//dailydata[{}]/instrumentid/text()'.format(i))
        try:
            #print((p.xpath('//dailydata[{}]/instrumentid[1]/text()'))[i])
            df.loc[i,'instrumentid']=(p.xpath('//dailydata[{}]/instrumentid/text()'.format(i)))

        except:
            df.loc[i,'instrumentid']=0

        try:
            df.loc[i,'tradingday']=(p.xpath('//dailydata[{}]/tradingday/text()'.format(i)))

        except:
            df.loc[i,'tradingday']=0
        try:
            df.loc[i,'openprice']=(p.xpath('//dailydata[{}]/openprice/text()'.format(i)))

        except:
            df.loc[i,'openprice']=0
        try:
            df.loc[i,'highestprice']=(p.xpath('//dailydata[{}]/highestprice/text()'.format(i)))
        except:
            df.loc[i,'highestprice'] =0
        try:
            df.loc[i,'lowestprice']=(p.xpath('//dailydata[{}]/lowestprice/text()'.format(i)))
        except:
            df.loc[i,'lowestprice']=0
        try:
            df.loc[i,'closeprice']=(p.xpath('//dailydata[{}]/closeprice/text()'.format(i)))
        except:
            df.loc[i,'closeprice'] = 0
        try:
            df.loc[i,'preopeninterest']=(p.xpath('//dailydata[{}]/preopeninterest/text()'.format(i)))
        except:
            df.loc[i,'preopeninterest'] = 0
        try:
            df.loc[i,'openinterest']=(p.xpath('//dailydata[{}]/openinterest/text()'.format(i)))
        except:
            df.loc[i,'openinterest'] = 0
        try:
            df.loc[i,'presettlementprice']=(p.xpath('//dailydata[{}]/presettlementprice/text()'.format(i)))
        except:
            df.loc[i,'presettlementprice'] = 0
        try:
            df.loc[i,'settlementpriceif']=(p.xpath('//dailydata[{}]/settlementpriceif/text()'.format(i)))
        except:
            df.loc[i,'settlementpriceif'] = 0
        try:
            df.loc[i,'settlementprice']=(p.xpath('//dailydata[{}]/settlementprice/text()'.format(i)))
        except:
            df.loc[i,'settlementprice'] = 0
        try:
            df.loc[i,'volume']=(p.xpath('//dailydata[{}]/volume/text()'.format(i)))
        except:
            df.loc[i,'volume'] = 0
        try:
            df.loc[i,'turnover']=(p.xpath('//dailydata[{}]/turnover/text()'.format(i)))
        except:
            df.loc[i,'turnover'] = 0
        try:
            df.loc[i,'productid']=(p.xpath('//dailydata[{}]/productid/text()'.format(i)))
        except:
            df.loc[i,'productid'] = 0
        try:
            df.loc[i,'delta']=(p.xpath('//dailydata[{}]/delta/text()'.format(i)))
        except:
            df.loc[i,'delta'] = 0
        try:
            df.loc[i,'expiredate']=(p.xpath('//dailydata[i]/expiredate/text()'.format(i)))
        except:
            df.loc[i,'expiredate'] = 0

    df1 = df1.append(df)

df1.to_csv('cffex.csv')

以上是爬取三大交易所期权数据的源代码,可以直接使用,也可以修改保存至数据库。

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐