日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

A股数据日级前复权数据补全

發(fā)布時間:2023/12/18 编程问答 28 豆豆
生活随笔 收集整理的這篇文章主要介紹了 A股数据日级前复权数据补全 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

根據(jù)上一篇下載的數(shù)據(jù),現(xiàn)在每天更新day數(shù)據(jù)

需要保證股票當(dāng)日新上市下載,退市保留,停牌復(fù)牌識別、是否發(fā)生復(fù)權(quán)(決定前復(fù)權(quán)價格是否變化)

數(shù)據(jù)更新

1、大盤指數(shù)數(shù)據(jù)(399001, 399005, 399006, 399300,999999)

## Python
import pandas as pd import numpy as np import tushare as ts import os import shutilos.getcwd() os.chdir("C:/Users/~~~/Desktop/數(shù)據(jù)") #修改當(dāng)前工作目錄## 尋找上一數(shù)據(jù)日期 last_data_date = max([int(i[4:]) for i in os.listdir("ts大盤數(shù)據(jù)")]) ## 所有交易日期 all_date = [int(i.replace("-", "")) for i in ts.trade_cal()["calendarDate"][ts.trade_cal()["isOpen"] == 1]]## 尋找上一交易日期 def func(x, lst):i = 0while(lst[i] <= x):i += 1return lst[i-1] last_data_day = str(func(last_data_date, all_date))## 更新到新日期,創(chuàng)建新文件夾 new_date = "20171221" os.mkdir("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+new_date) shutil.copyfile("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+str(last_data_date)+"/"+"stock_info.csv","ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+new_date+"/"+"stock_info.csv")## 獲取數(shù)據(jù)起始與終止日期 start_date = last_data_day[0:4]+"-"+last_data_day[4:6]+"-"+last_data_day[6:8] end_date = new_date[0:4]+"-"+new_date[4:6]+"-"+new_date[6:8]## 大盤補(bǔ)全 ## 399001, 399005, 399006, 399300 dapan_code = ["399001", "399005", "399006", "399300"] for i in dapan_code:old_dapan = pd.read_csv("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+str(last_data_date)+"/"+i+".csv", engine='python')new_dapan = ts.get_h_data(i, start=start_date, end=end_date, index=True, pause=4)new_dapan.sort_index(inplace=True)new_dapan[["open", "high", "close", "low"]] = new_dapan[["open", "high", "close", "low"]]*10000new_dapan["date"] = new_dapan.indexnew_dapan["date"] = new_dapan["date"].astype(str).apply(lambda x:x.replace('-', '')) # new_dapan.index = list(range(len(old_dapan)-1, len(old_dapan)+len(new_dapan)-1))new_dapan.columns = ['open','high', 'close', 'low', 'volumw', 'turover', 'date']if new_dapan.iloc[0, 4] == old_dapan.iloc[-1, 8]:new_dapan.drop(new_dapan.index[0], axis=0, inplace=True)dapan = old_dapan.append(new_dapan, ignore_index=True)dapan = dapan[old_dapan.columns]dapan.fillna(method='ffill',inplace=True)dapan.to_csv("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+new_date+"/"+i+".csv", index=False)## 000001 --> 999999 上證指數(shù) (新浪財經(jīng)對指數(shù)編號不同) old_dapan = pd.read_csv("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+str(last_data_date)+"/"+"999999.csv", engine='python')start_date = last_data_day[0:4]+"-"+last_data_day[4:6]+"-"+last_data_day[6:8] end_date = new_date[0:4]+"-"+new_date[4:6]+"-"+new_date[6:8] new_dapan = ts.get_h_data("000001", start=start_date, end=end_date, index=True, pause=4)new_dapan.sort_index(inplace=True) new_dapan[["open", "high", "close", "low"]] = new_dapan[["open", "high", "close", "low"]]*10000 new_dapan["date"] = new_dapan.index new_dapan["date"] = new_dapan["date"].astype(str).apply(lambda x:x.replace('-', '')) new_dapan.columns = ['open','high', 'close', 'low', 'volumw', 'turover', 'date']if new_dapan.iloc[0, 4] == old_dapan.iloc[-1, 8]:new_dapan.drop(new_dapan.index[0], axis=0, inplace=True)dapan = old_dapan.append(new_dapan, ignore_index=True)dapan = dapan[old_dapan.columns]dapan.fillna(method='ffill',inplace=True)dapan.to_csv("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)"+new_date+"/"+"999999.csv", index=False)

2、A股股票數(shù)據(jù)與概括文件

## R
## 自動補(bǔ)全函數(shù) ## new.date:補(bǔ)全截至?xí)r間 library(WindR) library(xlsx) library(data.table) library(magrittr) library(tcltk2) # (.packages())w.start() setwd("C:/Users/~~~/Desktop/坴戔/華泰/數(shù)據(jù)")## WindR獲取數(shù)據(jù)規(guī)則:退市時期數(shù)據(jù)為NA,停牌時期數(shù)據(jù)以上一交易日收盤價補(bǔ)全? ## 文件夾內(nèi)不能隨便新建文件!!!## 1.設(shè)置截至日期(若補(bǔ)當(dāng)日數(shù)據(jù),五點(diǎn)之后執(zhí)行) new.date <- "20171221"end.date <- paste(substr(new.date, 1, 4), substr(new.date, 5, 6), substr(new.date, 7, 8), sep = "-")## 2.摘取當(dāng)日在市股票代碼 stock.code.df <- w.wset('sectorconstituent',paste0("date=", end.date, ";sectorid=a001010100000000")) if(stock.code.df$ErrorCode == 0){# if(gsub("-", "", as.Date(stock.code.df$Time)) == new.date){stock.code.sh.sz <- stock.code.df$Data$wind_code# }else{# print("獲取時間出錯")# } }else{print(paste0("獲取數(shù)據(jù)出錯,錯誤代碼", stock.code.df$ErrorCode)) } new.stock.code <- substr(stock.code.sh.sz, 1, 6) # str(stock.code.sh.sz)## 3.獲取上一次數(shù)據(jù)日期 old.file.name <- gsub("股票數(shù)據(jù)", "", dir("股票數(shù)據(jù)")) last.data.day <- old.file.name[which.max(old.file.name)] ## 是否需要改為數(shù)值型比較 ## 4.數(shù)據(jù)上一次所有股票代碼 old.stock.code <- gsub(".csv", "", dir(paste0("股票數(shù)據(jù)/股票數(shù)據(jù)", last.data.day)))## 5.創(chuàng)建新的文件夾 dir.create(paste0("股票數(shù)據(jù)/股票數(shù)據(jù)", new.date))## 6.股票補(bǔ)全部分## 概括性文件變量 ## 退市數(shù)據(jù)保留執(zhí)行以下部分代碼,不保留執(zhí)行已有那一行注釋代碼 all.stock.code <- unique(c(new.stock.code, old.stock.code)) general.information <- data.frame(array(dim=c(length(all.stock.code), 5))) # general.information <- data.frame(array(dim=c(length(stock.code.sh.sz), 5)))colnames(general.information) <- c("stock.code", "stock.name", "type", "starttime", "endtime")## 大盤日期 dapan.date <- read.csv(paste0("ts大盤數(shù)據(jù)/大盤數(shù)據(jù)", new.date, "/399300.csv"))$date## 設(shè)置進(jìn)度條 pb <- tkProgressBar("進(jìn)度", "已完成 %", 0, 100) # i <- 3 # length(stock.code.sh.sz) for(i in 1:length(stock.code.sh.sz)){if(new.stock.code[i] %in% old.stock.code){old.df <- fread(paste0("股票數(shù)據(jù)/股票數(shù)據(jù)", last.data.day, "/", new.stock.code[i], ".csv"), header=TRUE, integer64="numeric", sep = ",")## 尋找原數(shù)據(jù)中最后有交易數(shù)據(jù)的行數(shù)與日期if(old.df$volumw[nrow(old.df)] != 0){j <- nrow(old.df)last.trading.day <- old.df$date[j]}else{j <- nrow(old.df)while(old.df$volumw[j] == 0 & j > 0){j <- j - 1}last.trading.day <- old.df$date[j]}start.date <- paste(substr(last.trading.day, 1, 4), substr(last.trading.day, 5, 6), substr(last.trading.day, 7, 8), sep = "-")new.data <- w.wsd(stock.code.sh.sz[i], "trade_code, sec_name, open, high, low, close,volume, amt, free_turn,free_float_shares",start.date, end.date, "unit=1;PriceAdj=F")if(new.data$ErrorCode == 0){if(any(is.na(new.data$Data))){## 退市又復(fù)市if(any(is.na(new.data$Data[1, ]))){print(paste(stock.code.sh.sz[i], "退市又復(fù)市,但上個交易日數(shù)據(jù)獲取為NA,僅輸出新數(shù)據(jù)"))wind.df <- data.frame(array(dim=c(nrow(new.data$Data), 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw", "turover", "free_turn", "free_float_shares")wind.df[, 1] <- new.data$Codewind.df[, 2] <- new.data$Data$SEC_NAMEwind.df[, 3] <- gsub("-", "", new.data$Data$DATETIME)wind.df[, 4] <- 151500000wind.df[, 5:8] <- new.data$Data[4:7] * 10000wind.df[, 9:12] <- new.data$Data[8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]}else if(all(round(unlist(old.df[j, 5:8])) == round(unlist(new.data$Data[1, 4:7]*10000)))){## 退市復(fù)市股權(quán)未變動print(paste(stock.code.sh.sz[i], "退市又復(fù)市,股權(quán)未變動"))wind.df <- data.frame(array(dim=c(nrow(old.df)+nrow(new.data$Data)-1, 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw","turover", "free_turn", "free_float_shares")wind.df[1:nrow(old.df), ] <- old.df[, 1:12] # 14變?yōu)?2wind.df[(nrow(old.df)+1):nrow(wind.df), 1] <- new.data$Codewind.df[(nrow(old.df)+1):nrow(wind.df), 2] <- new.data$Data$SEC_NAME[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 3] <- gsub("-", "", new.data$Data$DATETIME)[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 4] <- 151500000wind.df[(nrow(old.df)+1):nrow(wind.df), 5:8] <- new.data$Data[-1, 4:7] * 10000wind.df[(nrow(old.df)+1):nrow(wind.df), 9:12] <- new.data$Data[-1, 8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]# 停牌重復(fù)行刪去wind.df <- wind.df[!duplicated(wind.df$date), ]}else{## 退市復(fù)市股權(quán)變動print(paste(stock.code.sh.sz[i], "退市又復(fù)市,股權(quán)變動"))wind.df <- data.frame(array(dim=c(nrow(old.df)+nrow(new.data$Data)-1, 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw","turover", "free_turn", "free_float_shares")## 以開盤價定復(fù)權(quán)因子ratio <- unlist(new.data$Data[1, 4]*10000 / old.df[j, 5])wind.df[1:nrow(old.df), 1:4] <- old.df[, 1:4]wind.df[1:nrow(old.df), 5:8] <- old.df[, 5:8] * ratiowind.df[1:nrow(old.df), 9:12] <- old.df[, 9:12]wind.df[(nrow(old.df)+1):nrow(wind.df), 1] <- new.data$Codewind.df[(nrow(old.df)+1):nrow(wind.df), 2] <- new.data$Data$SEC_NAME[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 3] <- gsub("-", "", new.data$Data$DATETIME)[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 4] <- 151500000wind.df[(nrow(old.df)+1):nrow(wind.df), 5:8] <- new.data$Data[-1, 4:7] * 10000wind.df[(nrow(old.df)+1):nrow(wind.df), 9:12] <- new.data$Data[-1, 8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]# 停牌重復(fù)行刪去wind.df <- wind.df[!duplicated(wind.df$date), ]}}else{## 是否停牌不影響,只需判斷股權(quán)是否變動if(all(round(unlist(old.df[j, 5:8])) == round(unlist(new.data$Data[1, 4:7]*10000)))){## 股權(quán)未變動wind.df <- data.frame(array(dim=c(nrow(old.df)+nrow(new.data$Data)-1, 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw","turover", "free_turn", "free_float_shares")wind.df[1:nrow(old.df), ] <- old.df[, 1:12]wind.df[(nrow(old.df)+1):nrow(wind.df), 1] <- new.data$Codewind.df[(nrow(old.df)+1):nrow(wind.df), 2] <- new.data$Data$SEC_NAME[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 3] <- gsub("-", "", new.data$Data$DATETIME)[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 4] <- 151500000wind.df[(nrow(old.df)+1):nrow(wind.df), 5:8] <- new.data$Data[-1, 4:7] * 10000wind.df[(nrow(old.df)+1):nrow(wind.df), 9:12] <- new.data$Data[-1, 8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]# 停牌重復(fù)行刪去wind.df <- wind.df[!duplicated(wind.df$date), ]}else{## 股權(quán)變動print(paste(stock.code.sh.sz[i], "股權(quán)變動"))wind.df <- data.frame(array(dim=c(nrow(old.df)+nrow(new.data$Data)-1, 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw","turover", "free_turn", "free_float_shares")## 以開盤價定復(fù)權(quán)因子ratio <- unlist(new.data$Data[1, 4]*10000 / old.df[j, 5])wind.df[1:nrow(old.df), 1:4] <- old.df[, 1:4]wind.df[1:nrow(old.df), 5:8] <- old.df[, 5:8] * ratiowind.df[1:nrow(old.df), 9:12] <- old.df[, 9:12]wind.df[(nrow(old.df)+1):nrow(wind.df), 1] <- new.data$Codewind.df[(nrow(old.df)+1):nrow(wind.df), 2] <- new.data$Data$SEC_NAME[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 3] <- gsub("-", "", new.data$Data$DATETIME)[-1]wind.df[(nrow(old.df)+1):nrow(wind.df), 4] <- 151500000wind.df[(nrow(old.df)+1):nrow(wind.df), 5:8] <- new.data$Data[-1, 4:7] * 10000wind.df[(nrow(old.df)+1):nrow(wind.df), 9:12] <- new.data$Data[-1, 8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]wind.df <- wind.df[!duplicated(wind.df$date), ]}}}else{print(paste0(stock.code.sh.sz[i], "獲取數(shù)據(jù)出錯,錯誤代碼:", new.data$ErrorCode))break}}else{# 新上市股票print(paste(stock.code.sh.sz[i], "新上市股票"))start.date <- paste(substr(last.data.day, 1, 4), substr(last.data.day, 5, 6), substr(last.data.day, 7, 8), sep = "-")new.data <- w.wsd(stock.code.sh.sz[i], "trade_code, sec_name, open, high, low, close, volume, amt,free_turn, free_float_shares",start.date, end.date, "unit=1;PriceAdj=F")if(new.data$ErrorCode == 0){wind.df <- data.frame(array(dim=c(nrow(new.data$Data), 12)))colnames(wind.df) <- c("wind_code", "name", "date", "time", "open", "high", "low", "close", "volumw","turover", "free_turn", "free_float_shares")wind.df[, 1] <- new.data$Codewind.df[, 2] <- new.data$Data$SEC_NAMEwind.df[, 3] <- gsub("-", "", new.data$Data$DATETIME)wind.df[, 4] <- 151500000wind.df[, 5:8] <- new.data$Data[4:7] * 10000wind.df[, 9:12] <- new.data$Data[8:11]wind.df <- wind.df[!(is.na(wind.df$open)), ]}else{print(paste0(stock.code.sh.sz[i], "新上市股票獲取數(shù)據(jù)出錯,錯誤代碼:", new.data$ErrorCode))break}}## 輸出部分if(nrow(wind.df) == 0){print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(在市股票不可能全為NA導(dǎo)致數(shù)據(jù)框行為0)"))}else{if(any(is.na(wind.df))){print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(數(shù)據(jù)中仍有NA)"))}else{if(any(wind.df[, 5:8] == 0)){print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(數(shù)據(jù)中開高低收存在0)"))}else{if(any(table(wind.df$date) > 1)){print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(數(shù)據(jù)中存在日期相同)"))}else{if(any(wind.df$date != sort(wind.df$date))){print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(數(shù)據(jù)中日期順序不對)"))}else{# 是否與大盤日期匹配,所有日期必須在大盤已有日期內(nèi)if(all(wind.df$date %in% dapan.date)){first.date <- paste(substr(wind.df$date[1], 1, 4), substr(wind.df$date[1], 5, 6), substr(wind.df$date[1], 7, 8), sep = "-")test <- w.wsd(stock.code.sh.sz[i], "trade_code, sec_name, open, high, low, close, volume, amt,free_turn, free_float_shares",first.date, first.date, "unit=1;PriceAdj=F")if(all(round(test$Data[4:7]*10000) == round(wind.df[1, 5:8])) & test$Data$VOLUME == wind.df[1, 9]){general.information[i, 1] <- substr(stock.code.sh.sz[i], 1, 6)general.information[i, 2] <- wind.df$name[1]general.information[i, 3] <- substr(stock.code.sh.sz[i], 8, 9)general.information[i, 4] <- wind.df[1, 3]general.information[i, 5] <- wind.df[nrow(wind.df), 3]write.csv(wind.df, paste0("股票數(shù)據(jù)/股票數(shù)據(jù)", new.date, "/", new.stock.code[i], ".csv"), row.names = FALSE)}else{print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(數(shù)據(jù)補(bǔ)全與wind不同)"))}}else{print(paste(stock.code.sh.sz[i], "數(shù)據(jù)出錯(數(shù)據(jù)中日期與大盤日期不符)"))}}}}}}rm(wind.df);rm(old.df);rm(j)info <- sprintf("已完成 %d%%", round(i*100/length(stock.code.sh.sz)))setTkProgressBar(pb, i*100/length(stock.code.sh.sz), sprintf("進(jìn)度 (%s)", info), info) }## 關(guān)閉進(jìn)度條 close(pb) ## 退市數(shù)據(jù)繼續(xù)保留,則執(zhí)行下面代碼 tuishi.row <- 1 for(i in 1:length(old.stock.code)){if(!(old.stock.code[i] %in% new.stock.code)){print(paste(stock.code.sh.sz, "仍然退市"))old.df <- fread(paste0("股票數(shù)據(jù)/股票數(shù)據(jù)", last.data.day, "/", new.stock.code[i], ".csv"), header=TRUE, integer64="numeric", sep = ",")df <- old.df[, 1:12]write.csv(df, paste("股票數(shù)據(jù)/股票數(shù)據(jù)", new.date, "/", new.stock.code[i], ".csv", sep=""), row.names = FALSE)tuishi.row <- tuishi.row + 1general.information[length(stock.code.sh.sz)+tuishi.row, 1] <- old.stock.code[i]general.information[length(stock.code.sh.sz)+tuishi.row, 2] <- df$name[1]general.information[length(stock.code.sh.sz)+tuishi.row, 3] <- substr(df$wind_code[1], 8, 9)general.information[length(stock.code.sh.sz)+tuishi.row, 4] <- df[1, 3]general.information[length(stock.code.sh.sz)+tuishi.row, 5] <- df[nrow(df), 3]} }## 總概括文件中無NA時輸出 if(all(!(is.na(general.information)))){write.xlsx(general.information, paste0("概括文件/概括文件", new.date, ".xlsx"), row.names = FALSE) }else{print("總概況文件中存在NA,需查驗(yàn)") }

?

轉(zhuǎn)載于:https://www.cnblogs.com/lj0019/p/8094002.html

總結(jié)

以上是生活随笔為你收集整理的A股数据日级前复权数据补全的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。