BUG_ON()、panic()、dump_stack()几种内核调试手段
Linux內(nèi)核有一些方法可以用來方便標記bug,提供斷言并輸出信息。最常用的兩個是BUG()和BUG_ON()。
當被調(diào)用的時候,它們會引發(fā)oops,導致棧的回溯和錯誤信息的打印。這些聲明會導致 oops跟硬件的體系結構是相關的。大部分體系結構把BUG()和BUG_ON()定義成某種非法操作,這樣自然會產(chǎn)生需要的oops。你可以把這些調(diào)用當作斷言使用,想要斷言某種情況不該發(fā)生:
if (bad_thing) BUG(); //需要linux 內(nèi)核開啟General setup->Configure standard kernel features->BUG() support或者使用更好的形式:
BUG_ON(bad_thing);?可以用panic()引發(fā)更嚴重的錯誤。調(diào)用panic()不但會打印錯誤消息(Oops)而且還會掛起整個系統(tǒng)。顯然,你只應該在極端惡劣的情況下使用它:
if (terrible_thing)panic("foo is %ld\n", foo);??有些時候,你只是需要在終端上打印一下棧的回溯信息來幫助你測試。此時可以使用dump_stack()。它只在終端上打印寄存器上下文和函數(shù)的跟蹤線索:
if (!debug_check) {printk(KERN_DEBUG "provide some information...\n");dump_stack(); }舉個例子程序
這個例子是參考了別人的代碼,我按照這個代碼執(zhí)行下給大伙看看。通過觸發(fā)proc下的文件來觸發(fā)不同的執(zhí)行函數(shù)。
/*************************************************************************> File Name: pro.c> Author:> Mail:> Created Time: 2020年03月07日 星期六 11時19分38秒************************************************************************/#include <linux/module.h> #include <linux/kernel.h> #include <linux/proc_fs.h> #include <linux/sched.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <asm/uaccess.h> #include <asm/types.h>// Module to make a read entry in the proc file system. // Module to write a command line calculatorMODULE_LICENSE("GPL"); MODULE_AUTHOR("329410527@qq.com");#define MY_PROC_ENTRY "bugon-test"struct proc_dir_entry *proc; int len; char *msg = NULL; #define DATA_SIZE 1024 // We can keep 1024 bytes of data with us./** Function to write to the proc. Here we free the old data, and allocate new space and copy the data to* that newly allocated area.*/#define MY_BUG_ON 1 #define MY_BUG 2 #define MY_DUMPSTACK 3 #define MY_PANIC 4 static int param = 100;/*文件的寫函數(shù)*/ static ssize_t my_proc_write(struct file *filp, const char __user * buffer, size_t count, loff_t *pos) {char *str;str = kmalloc((size_t) count, GFP_KERNEL);if (copy_from_user(str, buffer, count)) {kfree(str);return -EFAULT;}sscanf(str, "%d", ¶m);printk("param has been set to %d\n", param);kfree(str);switch (param) {case MY_BUG_ON:BUG_ON(param);break;case MY_BUG:BUG();break;case MY_DUMPSTACK:dump_stack();break;case MY_PANIC:panic("I am panicking, Why? -- you told so");break;default:printk("unknow param...\n");}return count; } /*讀proc文件*/ ssize_t my_proc_read(struct file *filp,char *buf,size_t count, loff_t *offp ) {int err;char *data = PDE_DATA(file_inode(filp));if ((int) (*offp) > len) {return 0;}printk(KERN_INFO "Reading the proc entry, len of the file is %d", len);if(!(data)) {printk(KERN_INFO "NULL DATA");return 0;}if (count == 0) {printk(KERN_INFO "Read of size zero, doing nothing.");return count;} else {printk(KERN_INFO "Read of size %d", (int) count);}count = len + 1; // +1 to read the \0err = copy_to_user(buf, data, count); // +1 for \0printk(KERN_INFO "Read data : %s", buf);*offp = count;if (err) {printk(KERN_INFO "Error in copying data.");} else {printk(KERN_INFO "Successfully copied data.");}return count; }/*proc文件系統(tǒng)的fops*/ struct file_operations proc_fops = {.read = my_proc_read,.write = my_proc_write, };int create_new_proc_entry(void) {int i;char *DATA = "Hello People";len = strlen(DATA);/*申請內(nèi)存空間*/msg = kmalloc((size_t) DATA_SIZE, GFP_KERNEL); // +1 for \0if (msg != NULL) {printk(KERN_INFO "Allocated memory for msg");} else {return -1;}/*把字符串拷貝到msg*/strncpy(msg, DATA, len+1);for (i=0; i < len +1 ; i++) {printk(KERN_INFO "%c", msg[i]);}/*建立proc文件系統(tǒng)*/proc = proc_create_data(MY_PROC_ENTRY, 0666, NULL, &proc_fops, msg);if (proc) {return 0;}return -1; }int __init proc_bug_on_init (void) {if (create_new_proc_entry()){return -1;}return 0; }void __exit proc_bug_on_cleanup(void) {remove_proc_entry(MY_PROC_ENTRY, NULL); }module_init(proc_bug_on_init); module_exit(proc_bug_on_cleanup);Makefile文件:
PWD=$(shell pwd) VER=$(shell uname -r) KERNEL_BUILD=/lib/modules/$(VER)/build $(info $(PWD)) $(info $(VER)) ifneq ($(KERNELRELEASE),) obj-m := pro.o else PWD := $(shell pwd) KVER := $(shell uname -r) KDIR := /lib/modules/$(KVER)/build all:make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules install:make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules_install clean:make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean endif執(zhí)行make命令后生成文件
weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$ make /ssd/weiqifa0/linux-c/pro-module 5.0.0-23-generic make -C /lib/modules/5.0.0-23-generic/build M=/ssd/weiqifa0/linux-c/pro-module modules make[1]: Entering directory '/usr/src/linux-headers-5.0.0-23-generic' /usr/src/linux-headers-5.0.0-23-generic 5.0.0-23-genericCC [M] /ssd/weiqifa0/linux-c/pro-module/pro.oBuilding modules, stage 2. /usr/src/linux-headers-5.0.0-23-generic 5.0.0-23-genericMODPOST 1 modulesCC /ssd/weiqifa0/linux-c/pro-module/pro.mod.oLD [M] /ssd/weiqifa0/linux-c/pro-module/pro.ko make[1]: Leaving directory '/usr/src/linux-headers-5.0.0-23-generic' weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$執(zhí)行加載模塊
sudo insmod pro.ko執(zhí)行觸發(fā)bugon操作 sudo echo 1 > /proc/bugon-test
[930845.292938] ------------[ cut here ]------------ [930845.292939] kernel BUG at /ssd/weiqifa0/linux-c/pro-module/pro.c:57! [930845.292942] invalid opcode: 0000 [#2] SMP NOPTI [930845.292944] CPU: 2 PID: 12116 Comm: echo Tainted: G D OE 5.0.0-23-generic #24~18.04.1-Ubuntu [930845.292944] Hardware name: System manufacturer System Product Name/PRIME Z370-P II, BIOS 0602 03/14/2019 [930845.292946] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro] [930845.292947] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01 [930845.292948] RSP: 0018:ffffb3e60a44fe50 EFLAGS: 00010246 [930845.292949] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000eec799 [930845.292949] RDX: 0000000000eec798 RSI: ffff907726aa7040 RDI: ffff907726403c80 [930845.292950] RBP: ffffb3e60a44fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8 [930845.292950] R10: ffffd547606a6f80 R11: ffffb3e60a44fcc0 R12: ffff90771a9be310 [930845.292951] R13: 000055852def8410 R14: 000055852def8410 R15: ffff907683c1f300 [930845.292952] FS: 00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000 [930845.292952] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [930845.292953] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0 [930845.292953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [930845.292954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [930845.292954] Call Trace: [930845.292957] proc_reg_write+0x3e/0x60 [930845.292959] __vfs_write+0x1b/0x40 [930845.292960] vfs_write+0xb1/0x1a0 [930845.292961] ksys_write+0x5c/0xe0 [930845.292962] __x64_sys_write+0x1a/0x20 [930845.292964] do_syscall_64+0x5a/0x120 [930845.292966] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [930845.292967] RIP: 0033:0x7f6b9f8ff024 [930845.292968] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 b9 d3 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53 [930845.292968] RSP: 002b:00007fff9b739518 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 [930845.292969] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f6b9f8ff024 [930845.292970] RDX: 0000000000000002 RSI: 000055852def8410 RDI: 0000000000000001 [930845.292970] RBP: 000055852def8410 R08: 00007f6b9f9d9580 R09: 00007f6b9f9de580 [930845.292971] R10: 00007f6b9f9d6ca0 R11: 0000000000000246 R12: 00007f6b9f9d7760 [930845.292971] R13: 0000000000000002 R14: 00007f6b9f9d8560 R15: 00007f6b9f9d7960 [930845.292972] Modules linked in: pro(OE) tcp_diag inet_diag snd_hda_codec_realtek snd_hda_codec_generic amdgpu ledtrig_audio chash amd_iommu_v2 gpu_sched intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi aesni_intel snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi i915 snd_seq aes_x86_64 crypto_simd snd_seq_device cryptd glue_helper snd_timer kvmgt eeepc_wmi intel_cstate nls_iso8859_1 vfio_mdev asus_wmi intel_rapl_perf input_leds radeon wmi_bmof snd joydev sparse_keymap mxm_wmi mdev vfio_iommu_type1 ttm vfio soundcore kvm irqbypass drm_kms_helper drm mei_me i2c_algo_bit mei fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid acpi_pad sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid nvme r8169 ahci realtek nvme_core libahci wmi video [last unloaded: pro] [930845.292992] ---[ end trace 622fbd2856be7806 ]--- [930845.292993] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro] [930845.292994] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01 [930845.292994] RSP: 0018:ffffb3e60893fe50 EFLAGS: 00010246 [930845.292995] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000e7affd [930845.292996] RDX: 0000000000e7affc RSI: ffff907726ba7040 RDI: ffff907726403c80 [930845.292996] RBP: ffffb3e60893fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8 [930845.292997] R10: ffffd547607934c0 R11: 0000000000000001 R12: ffff90771e4d37e8 [930845.292997] R13: 00005585c2683050 R14: 00005585c2683050 R15: ffff907721602200 [930845.292998] FS: 00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000 [930845.292998] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [930845.292999] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0 [930845.292999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [930845.293000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400其他的操作也是一樣。
大家在調(diào)試過程中,可以試試這個方法。在自己的異常代碼加上后,如果有其他調(diào)用導致。就可以看到是誰的調(diào)用導致的。
我們看看BUG_ON()定義的位置
kernel/include/asm-generic/bug.h /** Don't use BUG() or BUG_ON() unless there's really no way out; one* example might be detecting data structure corruption in the middle* of an operation that can't be backed out of. If the (sub)system* can somehow continue operating, perhaps with reduced functionality,* it's probably not BUG-worthy.** If you're tempted to BUG(), think again: is completely giving up* really the *only* solution? There are usually better options, where* users don't need to reboot ASAP and can mostly shut down cleanly.*/ #ifndef HAVE_ARCH_BUG #define BUG() do { \printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \barrier_before_unreachable(); \panic("BUG!"); \ } while (0) #endif#ifndef HAVE_ARCH_BUG_ON #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) #endif里面的注釋寫的很明白,如果你有其他的辦法,建議不要使用BUG_ON()。
? 回復「?籃球的大肚子」進入技術群聊
回復「1024」獲取1000G學習資料
總結
以上是生活随笔為你收集整理的BUG_ON()、panic()、dump_stack()几种内核调试手段的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 台式计算机可以发射无线网络,台式电脑无线
- 下一篇: 再见了,唠嗑