pthread_cancel 退出线程引起死锁的问题和解决方法
??????? Posix的線程終止有兩種情況:正常終止和非正常終止。線程主動調用pthread_exit()或者從線程函數中return都將使線程正常退出,這是可預見的退出方式;非正常終止是線程在其他線程的干預下,或者由于自身運行出錯(比如訪問非法地址)而退出,比如pthreead_cancel,這種退出方式是不可預見的。不論是可預見的線程終止還是異常終止,都會存在資源釋放的問題,在不考慮因運行出錯而退出的前提下,如何保證線程終止時能順利的釋放掉自己所占用的資源,特別是鎖資源,就是一個必須考慮解決的問題。
?????? 最經常出現的情形是資源獨占鎖的使用:線程為了訪問臨界資源而為其加上鎖,但在訪問過程中被外界取消,如果線程處于響應取消狀態,且采用異步方式響應,或者在打開獨占鎖以前的運行路徑上存在取消點,則該臨界資源將永遠處于鎖定狀態得不到釋放。外界取消操作是不可預見的,因此的確需要一個機制來簡化用于資源釋放的編程。
POSIX中的函數cancellation點的:
??????????? pthread_join
??????????? pthread_cond_wait
??????????? thread_cond_timewait
??????????? pthread_testcancel
??????????? sem_wait
??????????? sigwait?????? 都是cancellation點.
??????????? 下面的這些系統函數也是cancellation點:
???????????? accept
???????????? fcntl
???????????? open
???????????? read
???????????? write
???????????? lseek
???????????? close
???????????? send
??????????? sendmsg
???????????? sendto
??????????? connect
???????????? recv
??????????? recvfrom
??????????? recvmsg
???????????? system
??????????? tcdrain
???????????? fsync
???????????? msync
???????????? pause
???????????? wait
??????????? waitpid
??????????? nanosleep
當其他線程調用pthreead_cancel都會讓本線程在這些函數后退出線程。
? 默認測試代碼如下:
#include <stdio.h> #include <string.h> #include <stdlib.h> #include <unistd.h> #include <pthread.h> #include <sys/syscall.h>pthread_mutex_t mutexA; int thStop = 0; int is_safemode = 0; int is_safe_exit = 0; int is_setcancle = 0; int is_notify = 0;void *thread_function1(void *arg) {pthread_t threadId = 0;long int pid = getpid();long int lwpId = syscall(SYS_gettid);threadId = (pthread_t)(pthread_self());printf("thread[0x%lx][%ld][%ld] in function1\n",threadId,lwpId,pid);while(1){printf("function1 owner:%ld waiting lock owner:%d ...\n",lwpId,mutexA.__data.__owner);pthread_mutex_lock(&mutexA);printf("function1 mutex:owner::%d;count::%d;lock:%d\n",mutexA.__data.__owner,mutexA.__data.__count,mutexA.__data.__lock);printf("I an thread[0x%lx][%ld] function1\n",threadId,lwpId);sleep(1);pthread_mutex_unlock(&mutexA);sleep(1);} }void clean_function2_res(void *arg) {int lwpid = (int)*((int *)arg);if(!is_notify){return;}printf("clean function2 res lwpid:%d\n",lwpid);if(mutexA.__data.__owner == lwpid){pthread_mutex_unlock(&mutexA);printf("clean function2 res lock\n");} }void *thread_function2(void *arg) {int oldstate = 0;int waitCount = 0;pthread_t threadId = 0;long int pid = getpid();int lwpId = syscall(SYS_gettid);threadId = (pthread_t)(pthread_self());printf("thread[0x%lx][%d][%ld] in function2\n",threadId,lwpId,pid);pthread_cleanup_push(clean_function2_res,(void *)&lwpId);while(1){printf("function2 owner:%d waiting lock owner:%d ...\n",lwpId,mutexA.__data.__owner);pthread_mutex_lock(&mutexA);printf("function2 mutex:owner::%d;count::%d;lock:%d\n",mutexA.__data.__owner,mutexA.__data.__count,mutexA.__data.__lock);if(thStop){while(1){if((is_safemode) && (is_safe_exit)){break;}printf("waiting thread[0x%ld] cancel...\n",threadId);usleep(500000);if(is_setcancle){waitCount ++;pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,&oldstate);printf("pthread cancel oldstatue:%d;[%d]:[%d]\n",oldstate,PTHREAD_CANCEL_DISABLE,PTHREAD_CANCEL_ENABLE);if(waitCount > 10){printf("it will into cancel pthread point\n");pthread_mutex_unlock(&mutexA);sleep(1);pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,NULL);//printf("waiting cancel point sleep\n");//usleep(500000);printf("waiting cancel testcancel point\n");pthread_testcancel();printf("test cancel point\n");while(1){printf("waiting cancel pthread...\n");usleep(500000);}}}}}else{printf("I an thread[0x%lx][%d] function2\n",threadId,lwpId);sleep(1);}pthread_mutex_unlock(&mutexA);sleep(1);if((is_safemode) && (is_safe_exit)){break;}}if(is_safemode){printf("exit pthread by safe mode\n");pthread_exit(NULL);}pthread_cleanup_pop(0);}int main(int avgc,char **pp_argv) {pthread_t mthid = -1;unsigned int count = 0;int ret = -1;int mode = 0;if(avgc >= 2){mode = atoi(pp_argv[1]);}switch(mode){case 1:is_notify = 1;break;case 2:is_safemode = 1;break;case 3:is_setcancle = 1;break;case 0:default:break;}printf("notify clean mode:%d\n",is_notify);printf("safe mode:%d\n",is_safemode);printf("set cancle mode:%d\n",is_setcancle);is_safe_exit = 0;thStop = 0;pthread_mutex_init(&mutexA, NULL);pthread_create(&mthid,NULL,thread_function1,NULL);printf("create thread:0x%lx\n",mthid);pthread_create(&mthid,NULL,thread_function2,NULL);printf("create thread:0x%lx\n",mthid);do{sleep(1);count ++;printf("main thread count:%d...\n",count);}while(count < 10);thStop = 1;sleep(3);if(is_safemode){is_safe_exit = 1;}else{pthread_cancel(mthid);}pthread_join(mthid,(void *)&ret);while(1){printf("main thread function...\n");sleep(1);}pthread_mutex_destroy(&mutexA);}編譯:gcc -g mylock.c -lpthread -o mylock
復現問題:./mylock 0?? 強制進入死鎖環境;
?????? 主線程調用thStop = 1;讓thread_function2進入lock狀態,然后調用pthread_cancel(mthid);終止線程thread_function2 ,thread_function1因為thread_function2 的退出沒有是否互斥鎖導致無法獲取互斥鎖導致死鎖停止運行;
解決方案1,注冊線程清理回調
void pthread_cleanup_push(void (*routine) (void *), void *arg)
void pthread_cleanup_pop(int execute)
pthread_cleanup_push()/pthread_cleanup_pop()采用先入后出的棧結構管理,void routine(void *arg)函數在調用pthread_cleanup_push()時壓入清理函數棧,多次對pthread_cleanup_push() 的調用將在清理函數棧中形成一個函數鏈;從pthread_cleanup_push的調用點到pthread_cleanup_pop之間的程序段中的終止動作(包括調用pthread_exit()、pthread_cancel和異常終止,不包括return)都將執行pthread_cleanup_push()所指定的清理函數。
運行結果參考 ./mylock 1
解決方案2,線程安全退出,外部線程不要采用pthread_cancel結束線程,而是采用通知方法,由本線程接受到消息或參數后釋放資源安全退出,
運行結果參考 ./mylock 2
解決方案3,在安全公共資源取消線程對pthread_cancel的響應。
???? 設置本線程對Cancel信號的反應,state有兩種值:PTHREAD_CANCEL_ENABLE(缺省)和 PTHREAD_CANCEL_DISABLE,分別表示收到信號后設為CANCLED狀態和忽略CANCEL信號繼續運行;old_state如果不為 NULL則存入原來的Cancel狀態以便恢復。
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE,&oldstate);
/***free resource安全執行完代碼***/
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE,NULL);
設置取消點 pthread_testcancel,
運行結果參考 ./mylock 3
?
gdb 調試mutexA 數據:
#gdb ./mylock
(gdb) b thread_function1
(gdb) n
(gdb) ptype pthread_mutex_t
(gdb) p &mutexA
$4 = (pthread_mutex_t *) 0x602100 <mutexA>
(gdb) p {pthread_mutex_t} 0x602100
$5 = {__data = {__lock = 1, __count = 0, __owner = 10237, __nusers = 1, __kind = 0, __spins = 0, __elision = 0, __list = {__prev = 0x0,
????? __next = 0x0}}, __size = "\001\000\000\000\000\000\000\000\375'\000\000\001", '\000' <repeats 26 times>, __align = 1}
(gdb) p mutexA
(gdb) b thread_function2
?
?
?
?
總結
以上是生活随笔為你收集整理的pthread_cancel 退出线程引起死锁的问题和解决方法的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: flink如何设置以每天零点到第二天零点
- 下一篇: linux用户(user)和用户组(gr