BUG_ON()、panic()、dump_stack()几种内核调试手段
发布日期:2021-06-30 18:44:26 浏览次数:2 分类:技术文章

本文共 10791 字,大约阅读时间需要 35 分钟。

Linux内核有一些方法可以用来方便标记bug,提供断言并输出信息。最常用的两个是BUG()和BUG_ON()。

当被调用的时候,它们会引发oops,导致栈的回溯和错误信息的打印。这些声明会导致 oops跟硬件的体系结构是相关的。大部分体系结构把BUG()和BUG_ON()定义成某种非法操作,这样自然会产生需要的oops。你可以把这些调用当作断言使用,想要断言某种情况不该发生:

if (bad_thing)BUG(); //需要linux 内核开启General setup->Configure standard kernel features->BUG() support

或者使用更好的形式:

BUG_ON(bad_thing); 

可以用panic()引发更严重的错误。调用panic()不但会打印错误消息(Oops)而且还会挂起整个系统。显然,你只应该在极端恶劣的情况下使用它:

if (terrible_thing)       panic("foo is %ld\n", foo);  

有些时候,你只是需要在终端上打印一下栈的回溯信息来帮助你测试。此时可以使用dump_stack()。它只在终端上打印寄存器上下文和函数的跟踪线索:

if (!debug_check) {       printk(KERN_DEBUG "provide some information...\n");       dump_stack();}

举个例子程序

这个例子是参考了别人的代码,我按照这个代码执行下给大伙看看。通过触发proc下的文件来触发不同的执行函数。

/*************************************************************************        > File Name: pro.c        > Author:        > Mail:        > Created Time: 2020年03月07日 星期六 11时19分38秒 ************************************************************************/#include 
#include
#include
#include
#include
#include
#include
#include
#include
// Module to make a read entry in the proc file system.// Module to write a command line calculatorMODULE_LICENSE("GPL");MODULE_AUTHOR("329410527@qq.com");#define MY_PROC_ENTRY "bugon-test"struct proc_dir_entry *proc;int len;char *msg = NULL;#define DATA_SIZE 1024 // We can keep 1024 bytes of data with us./* * Function to write to the proc. Here we free the old data, and allocate new space and copy the data to * that newly allocated area. */#define MY_BUG_ON 1#define MY_BUG 2#define MY_DUMPSTACK 3#define MY_PANIC 4static int param = 100;/*文件的写函数*/static ssize_t my_proc_write(struct file *filp, const char __user * buffer, size_t count, loff_t *pos){ char *str; str = kmalloc((size_t) count, GFP_KERNEL); if (copy_from_user(str, buffer, count)) { kfree(str); return -EFAULT; } sscanf(str, "%d", &param); printk("param has been set to %d\n", param); kfree(str); switch (param) { case MY_BUG_ON: BUG_ON(param); break; case MY_BUG: BUG(); break; case MY_DUMPSTACK: dump_stack(); break; case MY_PANIC: panic("I am panicking, Why? -- you told so"); break; default: printk("unknow param...\n"); } return count;}/*读proc文件*/ssize_t my_proc_read(struct file *filp,char *buf,size_t count, loff_t *offp ){ int err; char *data = PDE_DATA(file_inode(filp)); if ((int) (*offp) > len) { return 0; } printk(KERN_INFO "Reading the proc entry, len of the file is %d", len); if(!(data)) { printk(KERN_INFO "NULL DATA"); return 0; } if (count == 0) { printk(KERN_INFO "Read of size zero, doing nothing."); return count; } else { printk(KERN_INFO "Read of size %d", (int) count); } count = len + 1; // +1 to read the \0 err = copy_to_user(buf, data, count); // +1 for \0 printk(KERN_INFO "Read data : %s", buf); *offp = count; if (err) { printk(KERN_INFO "Error in copying data."); } else { printk(KERN_INFO "Successfully copied data."); } return count;}/*proc文件系统的fops*/struct file_operations proc_fops = { .read = my_proc_read, .write = my_proc_write,};int create_new_proc_entry(void) { int i; char *DATA = "Hello People"; len = strlen(DATA); /*申请内存空间*/ msg = kmalloc((size_t) DATA_SIZE, GFP_KERNEL); // +1 for \0 if (msg != NULL) { printk(KERN_INFO "Allocated memory for msg"); } else { return -1; } /*把字符串拷贝到msg*/ strncpy(msg, DATA, len+1); for (i=0; i < len +1 ; i++) { printk(KERN_INFO "%c", msg[i]); } /*建立proc文件系统*/ proc = proc_create_data(MY_PROC_ENTRY, 0666, NULL, &proc_fops, msg); if (proc) { return 0; } return -1;}int __init proc_bug_on_init (void){ if (create_new_proc_entry()) { return -1; } return 0;}void __exit proc_bug_on_cleanup(void) { remove_proc_entry(MY_PROC_ENTRY, NULL);}module_init(proc_bug_on_init);module_exit(proc_bug_on_cleanup);

Makefile文件:

PWD=$(shell pwd)VER=$(shell uname -r)KERNEL_BUILD=/lib/modules/$(VER)/build$(info $(PWD))$(info $(VER))ifneq ($(KERNELRELEASE),)obj-m := pro.oelsePWD  := $(shell pwd)KVER := $(shell uname -r)KDIR := /lib/modules/$(KVER)/buildall:        make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modulesinstall:        make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules_installclean:        make -C /lib/modules/$(shell uname -r)/build M=$(PWD) cleanendif

执行make命令后生成文件

weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$ make/ssd/weiqifa0/linux-c/pro-module5.0.0-23-genericmake -C /lib/modules/5.0.0-23-generic/build M=/ssd/weiqifa0/linux-c/pro-module modulesmake[1]: Entering directory '/usr/src/linux-headers-5.0.0-23-generic'/usr/src/linux-headers-5.0.0-23-generic5.0.0-23-generic  CC [M]  /ssd/weiqifa0/linux-c/pro-module/pro.o  Building modules, stage 2./usr/src/linux-headers-5.0.0-23-generic5.0.0-23-generic  MODPOST 1 modules  CC      /ssd/weiqifa0/linux-c/pro-module/pro.mod.o  LD [M]  /ssd/weiqifa0/linux-c/pro-module/pro.komake[1]: Leaving directory '/usr/src/linux-headers-5.0.0-23-generic'weiqifa0@weiqifa-System-Product-Name:/ssd/weiqifa0/linux-c/pro-module$

执行加载模块

sudo insmod pro.ko

执行触发bugon操作 sudo echo 1 > /proc/bugon-test

[930845.292938] ------------[ cut here ]------------[930845.292939] kernel BUG at /ssd/weiqifa0/linux-c/pro-module/pro.c:57![930845.292942] invalid opcode: 0000 [#2] SMP NOPTI[930845.292944] CPU: 2 PID: 12116 Comm: echo Tainted: G      D    OE     5.0.0-23-generic #24~18.04.1-Ubuntu[930845.292944] Hardware name: System manufacturer System Product Name/PRIME Z370-P II, BIOS 0602 03/14/2019[930845.292946] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro][930845.292947] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01[930845.292948] RSP: 0018:ffffb3e60a44fe50 EFLAGS: 00010246[930845.292949] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000eec799[930845.292949] RDX: 0000000000eec798 RSI: ffff907726aa7040 RDI: ffff907726403c80[930845.292950] RBP: ffffb3e60a44fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8[930845.292950] R10: ffffd547606a6f80 R11: ffffb3e60a44fcc0 R12: ffff90771a9be310[930845.292951] R13: 000055852def8410 R14: 000055852def8410 R15: ffff907683c1f300[930845.292952] FS:  00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000[930845.292952] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033[930845.292953] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0[930845.292953] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000[930845.292954] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400[930845.292954] Call Trace:[930845.292957]  proc_reg_write+0x3e/0x60[930845.292959]  __vfs_write+0x1b/0x40[930845.292960]  vfs_write+0xb1/0x1a0[930845.292961]  ksys_write+0x5c/0xe0[930845.292962]  __x64_sys_write+0x1a/0x20[930845.292964]  do_syscall_64+0x5a/0x120[930845.292966]  entry_SYSCALL_64_after_hwframe+0x44/0xa9[930845.292967] RIP: 0033:0x7f6b9f8ff024[930845.292968] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b3 0f 1f 80 00 00 00 00 48 8d 05 b9 d3 0d 00 8b 00 85 c0 75 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 41 54 49 89 d4 55 48 89 f5 53[930845.292968] RSP: 002b:00007fff9b739518 EFLAGS: 00000246 ORIG_RAX: 0000000000000001[930845.292969] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f6b9f8ff024[930845.292970] RDX: 0000000000000002 RSI: 000055852def8410 RDI: 0000000000000001[930845.292970] RBP: 000055852def8410 R08: 00007f6b9f9d9580 R09: 00007f6b9f9de580[930845.292971] R10: 00007f6b9f9d6ca0 R11: 0000000000000246 R12: 00007f6b9f9d7760[930845.292971] R13: 0000000000000002 R14: 00007f6b9f9d8560 R15: 00007f6b9f9d7960[930845.292972] Modules linked in: pro(OE) tcp_diag inet_diag snd_hda_codec_realtek snd_hda_codec_generic amdgpu ledtrig_audio chash amd_iommu_v2 gpu_sched intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_hdmi aesni_intel snd_hda_intel snd_hda_codec snd_hda_core snd_hwdep snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi i915 snd_seq aes_x86_64 crypto_simd snd_seq_device cryptd glue_helper snd_timer kvmgt eeepc_wmi intel_cstate nls_iso8859_1 vfio_mdev asus_wmi intel_rapl_perf input_leds radeon wmi_bmof snd joydev sparse_keymap mxm_wmi mdev vfio_iommu_type1 ttm vfio soundcore kvm irqbypass drm_kms_helper drm mei_me i2c_algo_bit mei fb_sys_fops syscopyarea sysfillrect sysimgblt mac_hid acpi_pad sch_fq_codel parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid nvme r8169 ahci realtek nvme_core libahci wmi video [last unloaded: pro][930845.292992] ---[ end trace 622fbd2856be7806 ]---[930845.292993] RIP: 0010:my_proc_write.cold.3+0x75/0x77 [pro][930845.292994] Code: 36 01 d6 eb 1d 0f 0b 83 f8 03 74 11 83 f8 04 75 e6 48 c7 c7 c0 f0 6e c0 e8 2c 8b fa d5 e8 66 2b 92 d6 48 89 d8 e9 8a fe ff ff <0f> 0b 48 c7 c7 76 f1 6e c0 31 db 49 c7 c4 9e f1 6e c0 e8 bf 36 01[930845.292994] RSP: 0018:ffffb3e60893fe50 EFLAGS: 00010246[930845.292995] RAX: 0000000000000000 RBX: 0000000000000002 RCX: 0000000000e7affd[930845.292996] RDX: 0000000000e7affc RSI: ffff907726ba7040 RDI: ffff907726403c80[930845.292996] RBP: ffffb3e60893fe68 R08: 0000000000027040 R09: ffffffffc06ee1c8[930845.292997] R10: ffffd547607934c0 R11: 0000000000000001 R12: ffff90771e4d37e8[930845.292997] R13: 00005585c2683050 R14: 00005585c2683050 R15: ffff907721602200[930845.292998] FS:  00007f6b9f9de580(0000) GS:ffff907726a80000(0000) knlGS:0000000000000000[930845.292998] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033[930845.292999] CR2: 00007f6b9f87f6f0 CR3: 000000014850a004 CR4: 00000000003606e0[930845.292999] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000[930845.293000] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

其他的操作也是一样。

大家在调试过程中,可以试试这个方法。在自己的异常代码加上后,如果有其他调用导致。就可以看到是谁的调用导致的。

我们看看BUG_ON()定义的位置

kernel/include/asm-generic/bug.h/* * Don't use BUG() or BUG_ON() unless there's really no way out; one * example might be detecting data structure corruption in the middle * of an operation that can't be backed out of.  If the (sub)system * can somehow continue operating, perhaps with reduced functionality, * it's probably not BUG-worthy. * * If you're tempted to BUG(), think again:  is completely giving up * really the *only* solution?  There are usually better options, where * users don't need to reboot ASAP and can mostly shut down cleanly. */#ifndef HAVE_ARCH_BUG#define BUG() do { \    printk("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \    barrier_before_unreachable(); \    panic("BUG!"); \} while (0)#endif #ifndef HAVE_ARCH_BUG_ON#define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0)#endif

里面的注释写的很明白,如果你有其他的办法,建议不要使用BUG_ON()。

  回复「 篮球的大肚子」进入技术群聊

回复「1024」获取1000G学习资料

转载地址:https://linus.blog.csdn.net/article/details/104765287 如侵犯您的版权,请留言回复原文章的地址,我们会给您删除此文章,给您带来不便请您谅解!

上一篇:声明为数组定义为指针,声明为指针定义为数组
下一篇:你试试用心呼吸

发表评论

最新留言

第一次来,支持一个
[***.219.124.196]2024年05月04日 15时40分16秒