日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問(wèn) 生活随笔!

生活随笔

當(dāng)前位置: 首頁(yè) > 编程资源 > 编程问答 >内容正文

编程问答

金融风控实战——集成学习

發(fā)布時(shí)間:2025/4/5 编程问答 18 豆豆
生活随笔 收集整理的這篇文章主要介紹了 金融风控实战——集成学习 小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

xgb依然要去除共線性、變量選擇

lr bivar要嚴(yán)格單調(diào),xgb、lightGBM不需要

LightGBM評(píng)分卡

import pandas as pd from sklearn.metrics import roc_auc_score,roc_curve,auc from sklearn.model_selection import train_test_split from sklearn import metrics from sklearn.linear_model import LogisticRegression import numpy as np import random import math import time import lightgbm as lgbdata = pd.read_csv('Bcard.txt') data.head()

data.shape #(95806, 13) #看一下月份分布,我們用最后一個(gè)月做為跨時(shí)間驗(yàn)證集合 data.obs_mth.unique() #array(['2018-10-31', '2018-07-31', '2018-09-30', '2018-06-30', # '2018-11-30'], dtype=object) df_train = data[data.obs_mth != '2018-11-30'].reset_index().copy() val = data[data.obs_mth == '2018-11-30'].reset_index().copy() #這是我們?nèi)康淖兞?#xff0c;info結(jié)尾的是自己做的無(wú)監(jiān)督系統(tǒng)輸出的個(gè)人表現(xiàn),score結(jié)尾的是收費(fèi)的外部征信數(shù)據(jù) lst = ['person_info','finance_info','credit_info','act_info','td_score','jxl_score','mj_score','rh_score']df_train = df_train.sort_values(by = 'obs_mth',ascending = False) df_train.head()

df_train = df_train.sort_values(by = 'obs_mth',ascending = False)rank_lst = [] for i in range(1,len(df_train)+1):rank_lst.append(i)df_train['rank'] = rank_lstdf_train['rank'] = df_train['rank']/len(df_train)pct_lst = [] for x in df_train['rank']:if x <= 0.2:x = 1elif x <= 0.4:x = 2elif x <= 0.6:x = 3elif x <= 0.8:x = 4else:x = 5pct_lst.append(x) df_train['rank'] = pct_lst #train = train.drop('obs_mth',axis = 1) df_train.head()

df_train['rank'].groupby(df_train['rank']).count() #rank #1 15966 #2 15966 #3 15966 #4 15966 #5 15967 #Name: rank, dtype: int64 #定義lgb函數(shù) def LGB_test(train_x,train_y,test_x,test_y):from multiprocessing import cpu_countclf = lgb.LGBMClassifier(boosting_type='gbdt', num_leaves=31, reg_alpha=0.0, reg_lambda=1,max_depth=2, n_estimators=800, objective='binary',subsample=0.7, colsample_bytree=0.7, subsample_freq=1,learning_rate=0.05, min_child_weight=50,random_state=None,n_jobs=cpu_count()-1,num_iterations = 800 #迭代次數(shù))clf.fit(train_x, train_y,eval_set=[(train_x, train_y),(test_x,test_y)],eval_metric='auc',early_stopping_rounds=100)print(clf.n_features_)return clf,clf.best_score_[ 'valid_1']['auc']feature_lst = {} ks_train_lst = [] ks_test_lst = [] for rk in set(df_train['rank']): # 測(cè)試集8.18以后作為跨時(shí)間驗(yàn)證集#定義模型訓(xùn)練集與測(cè)試集ttest = df_train[df_train['rank'] == rk]ttrain = df_train[df_train['rank'] != rk]train = ttrain[lst]train_y = ttrain.bad_indtest = ttest[lst]test_y = ttest.bad_ind start = time.time()model,auc = LGB_test(train,train_y,test,test_y) end = time.time()#模型貢獻(xiàn)度放在feture中feature = pd.DataFrame({'name' : model.booster_.feature_name(),'importance' : model.feature_importances_}).sort_values(by = ['importance'],ascending = False)#計(jì)算訓(xùn)練集、測(cè)試集、驗(yàn)證集上的KS和AUCy_pred_train_lgb = model.predict_proba(train)[:, 1]y_pred_test_lgb = model.predict_proba(test)[:, 1]train_fpr_lgb, train_tpr_lgb, _ = roc_curve(train_y, y_pred_train_lgb)test_fpr_lgb, test_tpr_lgb, _ = roc_curve(test_y, y_pred_test_lgb)train_ks = abs(train_fpr_lgb - train_tpr_lgb).max()test_ks = abs(test_fpr_lgb - test_tpr_lgb).max()train_auc = metrics.auc(train_fpr_lgb, train_tpr_lgb)test_auc = metrics.auc(test_fpr_lgb, test_tpr_lgb)ks_train_lst.append(train_ks)ks_test_lst.append(test_ks) feature_lst[str(rk)] = feature[feature.importance>=20].nametrain_ks = np.mean(ks_train_lst) test_ks = np.mean(ks_test_lst)ft_lst = {} for i in range(1,6):ft_lst[str(i)] = feature_lst[str(i)]fn_lst=list(set(ft_lst['1']) & set(ft_lst['2']) & set(ft_lst['3']) & set(ft_lst['4']) &set(ft_lst['5']))print('train_ks: ',train_ks) print('test_ks: ',test_ks) print('ft_lst: ',fn_lst ) #[LightGBM] [Warning] Unknown parameter: max_features #[1] training's auc: 0.726731 training's binary_logloss: 0.0827979 valid_1's auc: 0.742666 valid_1's binary_logloss: 0.12066 #[2] training's auc: 0.769499 training's binary_logloss: 0.0822062 valid_1's auc: 0.753919 valid_1's binary_logloss: 0.119728 #[3] training's auc: 0.788952 training's binary_logloss: 0.0816227 valid_1's auc: 0.762911 valid_1's binary_logloss: 0.118777 #. . . #[188] training's auc: 0.827082 training's binary_logloss: 0.0777181 valid_1's auc: 0.786679 valid_1's binary_logloss: 0.078782 #[189] training's auc: 0.827128 training's binary_logloss: 0.0777136 valid_1's auc: 0.786756 valid_1's binary_logloss: 0.0787781 #[190] training's auc: 0.827162 training's binary_logloss: 0.0777108 valid_1's auc: 0.786696 valid_1's binary_logloss: 0.0787811#train_ks: 0.4907124806547195 #test_ks: 0.47382530047645305 #ft_lst: ['credit_info', 'person_info', 'finance_info'] lst = ['person_info','finance_info','credit_info','act_info']train = data[data.obs_mth != '2018-11-30'].reset_index().copy() evl = data[data.obs_mth == '2018-11-30'].reset_index().copy()x = train[lst] y = train['bad_ind']evl_x = evl[lst] evl_y = evl['bad_ind']model,auc = LGB_test(x,y,evl_x,evl_y)y_pred = model.predict_proba(x)[:,1] fpr_lgb_train,tpr_lgb_train,_ = roc_curve(y,y_pred) train_ks = abs(fpr_lgb_train - tpr_lgb_train).max() print('train_ks : ',train_ks)y_pred = model.predict_proba(evl_x)[:,1] fpr_lgb,tpr_lgb,_ = roc_curve(evl_y,y_pred) evl_ks = abs(fpr_lgb - tpr_lgb).max() print('evl_ks : ',evl_ks)from matplotlib import pyplot as plt plt.plot(fpr_lgb_train,tpr_lgb_train,label = 'train LR') plt.plot(fpr_lgb,tpr_lgb,label = 'evl LR') plt.plot([0,1],[0,1],'k--') plt.xlabel('False positive rate') plt.ylabel('True positive rate') plt.title('ROC Curve') plt.legend(loc = 'best') plt.show() #[1] training's binary_logloss: 0.090317 training's auc: 0.712883 valid_1's binary_logloss: 0.0986629 valid_1's auc: 0.678619 #Training until validation scores don't improve for 100 rounds. #[2] training's binary_logloss: 0.0896369 training's auc: 0.779216 valid_1's binary_logloss: 0.0978883 valid_1's auc: 0.755811 #[3] training's binary_logloss: 0.0885026 training's auc: 0.779149 valid_1's binary_logloss: 0.0966811 valid_1's auc: 0.749375 #[4] training's binary_logloss: 0.087998 training's auc: 0.780539 valid_1's binary_logloss: 0.0961527 valid_1's auc: 0.759009 #... #[179] training's binary_logloss: 0.0784288 training's auc: 0.812571 valid_1's binary_logloss: 0.0900886 valid_1's auc: 0.779962 #[180] training's binary_logloss: 0.0784267 training's auc: 0.812602 valid_1's binary_logloss: 0.0900914 valid_1's auc: 0.779887 #[181] training's binary_logloss: 0.078425 training's auc: 0.812601 valid_1's binary_logloss: 0.0900941 valid_1's auc: 0.779927 #[182] training's binary_logloss: 0.0784229 training's auc: 0.8126 valid_1's binary_logloss: 0.0900964 valid_1's auc: 0.779932 #Early stopping, best iteration is: #[82] training's binary_logloss: 0.0788374 training's auc: 0.811646 valid_1's binary_logloss: 0.089958 valid_1's auc: 0.779946 #4 #train_ks : 0.4801091876625077 #evl_ks : 0.4416674980164514


LightGBM其實(shí)效果確實(shí)是比較LR要好的,但是我們LR也可以逼近這個(gè)效果,下節(jié)課我們會(huì)具體來(lái)做。

評(píng)分卡公式變形
600+50×ln?P0P1ln?2,P0為好人,P1為壞人600+50 \times \frac{\ln \frac{P_{0}}{P_{1}}}{\ln 2},P_{0}為好人,P_{1}為壞人 600+50×ln2lnP1?P0???,P0?P1?600+50×ln?1?xbetaxbetaln?2600+50 \times \frac{\ln \frac{1-xbeta}{xbeta}}{\ln 2} 600+50×ln2lnxbeta1?xbeta??600+50×log?21?xbetaxbeta600+50 \times \log _{2} \frac{1-{ xbeta }}{{ xbeta }} 600+50×log2?xbeta1?xbeta?

#['person_info','finance_info','credit_info','act_info'] #算分?jǐn)?shù)onekey def score(xbeta):score = 1000+500*(math.log2(1-xbeta)/xbeta) #好人的概率/壞人的概率return score evl['xbeta'] = model.predict_proba(evl_x)[:,1] evl['score'] = evl.apply(lambda x : score(x.xbeta) ,axis=1)fpr_lr,tpr_lr,_ = roc_curve(evl_y,evl['score']) evl_ks = abs(fpr_lr - tpr_lr).max() print('val_ks : ',evl_ks) #val_ks : 0.4416674980164514 #生成報(bào)告 row_num, col_num = 0, 0 bins = 20 Y_predict = evl['xbeta'] Y = evl_y nrows = Y.shape[0] lis = [(Y_predict[i], Y[i]) for i in range(nrows)] ks_lis = sorted(lis, key=lambda x: x[0], reverse=True) bin_num = int(nrows/bins+1) bad = sum([1 for (p, y) in ks_lis if y > 0.5]) good = sum([1 for (p, y) in ks_lis if y <= 0.5]) bad_cnt, good_cnt = 0, 0 KS = [] BAD = [] GOOD = [] BAD_CNT = [] GOOD_CNT = [] BAD_PCTG = [] BADRATE = [] dct_report = {} for j in range(bins):ds = ks_lis[j*bin_num: min((j+1)*bin_num, nrows)]bad1 = sum([1 for (p, y) in ds if y > 0.5])good1 = sum([1 for (p, y) in ds if y <= 0.5])bad_cnt += bad1good_cnt += good1bad_pctg = round(bad_cnt/sum(evl_y),3)badrate = round(bad1/(bad1+good1),3)ks = round(math.fabs((bad_cnt / bad) - (good_cnt / good)),3)KS.append(ks)BAD.append(bad1)GOOD.append(good1)BAD_CNT.append(bad_cnt)GOOD_CNT.append(good_cnt)BAD_PCTG.append(bad_pctg)BADRATE.append(badrate)dct_report['KS'] = KSdct_report['BAD'] = BADdct_report['GOOD'] = GOODdct_report['BAD_CNT'] = BAD_CNTdct_report['GOOD_CNT'] = GOOD_CNTdct_report['BAD_PCTG'] = BAD_PCTGdct_report['BADRATE'] = BADRATE val_repot = pd.DataFrame(dct_report) val_repot

《新程序員》:云原生和全面數(shù)字化實(shí)踐50位技術(shù)專家共同創(chuàng)作,文字、視頻、音頻交互閱讀

總結(jié)

以上是生活随笔為你收集整理的金融风控实战——集成学习的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。

如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。

主站蜘蛛池模板: 中文字幕免费观看 | 6—12呦国产精品 | 日韩免费视频一区二区视频在线观看 | 国产视频二| www日韩精品| 中国黄色大片 | 久久aaaa片一区二区 | 亚洲午夜无码av毛片久久 | 欧美精品视 | av一区二区三区在线 | 免费无毒av| 成人97| 深夜福利网 | 丁香四月婷婷 | 黄片毛片在线免费观看 | 国产寡妇亲子伦一区二区三区四区 | 亚洲高清久久 | 精品人伦一区二区三区蜜桃免费 | 我们好看的2018视频在线观看 | 国产在线观看黄色 | 日韩三级电影网址 | av波多野吉衣 | 开心激情综合网 | 男女黄网站 | 色吊丝av中文字幕 | 国产草草视频 | 亚洲第一欧美 | 久久精品激情 | 免费午夜网站 | 最近最新mv字幕观看 | 91免费视频黄 | 日韩在线播放中文字幕 | 一本大道av伊人久久综合 | 美女黄视频在线观看 | 国产大片av | 狠狠搞视频 | 欧美又大又硬又粗bbbbb | 日韩不卡一二三区 | 久久精品美乳 | 亚洲欧美福利 | 国产精品成人无码 | 91禁在线看| 亚洲精品在线电影 | 99成人免费视频 | 国产aⅴ精品一区二区果冻 台湾性生生活1 | 国模视频一区二区 | 成人午夜视频一区二区播放 | 免费观看污视频 | a级无遮挡超级高清-在线观看 | 狠狠操欧美 | 日韩av麻豆 | 一区二区在线 | 久久久久久久久久久影视 | 爱情岛论坛亚洲入口 | 国产操片| 校园春色中文字幕 | 亚洲精品伊人 | 亚洲一区自拍偷拍 | 欧美91| 国产乱叫456在线 | 国产视频在线一区二区 | 久久精品视频6 | 日本一级视频 | 久久女同互慰一区二区三区 | 国产精品福利在线 | 欧美用舌头去添高潮 | 久久久久久免费 | 欧美图片自拍偷拍 | 亚洲男人天堂2018 | 不卡av电影在线 | 国产黄色大片 | 成人国产精品久久久网站 | 99极品视频 | 中文字幕淫 | 一边摸上面一边摸下面 | 欧日韩在线 | 国产av无码专区亚洲av毛网站 | 插少妇视频 | 岛国av毛片 | 色综合久| 精品无码一区二区三区在线 | 极品超粉嫩尤物69xx | 国产国产精品 | 国产99视频在线观看 | 影音先锋激情在线 | 欧美日韩三级视频 | 深夜视频免费在线观看 | 婷婷久久五月 | 99久久99久久精品国产片桃花 | 色婷婷777 | 99热这里只有精品8 国产一卡二 | 小毛片在线观看 | 精品免费一区二区 | 欧美xxxxx高潮喷水麻豆 | 九九热在线视频观看 | 国产剧情自拍 | 男女做激情爱呻吟口述全过程 | 911亚洲精选 | 国产片91 |