`
dato0123
  • 浏览: 913785 次
文章分类
社区版块
存档分类
最新评论

kdb代码分析(七)

 
阅读更多

在大家都进入了kdb之后,in control的那个cpu就开始执行1566行的kdb_local()函数了.依然定义于kdb/kdbmain.c:

1134 /*

1135 * kdb_local

1136 *

1137 * The main code for kdb. This routine is invoked on a specific

1138 * processor, it is not global. The main kdb() routine ensures

1139 * that only one processor at a time is in this routine. This

1140 * code is called with the real reason code on the first entry

1141 * to a kdb session, thereafter it is called with reason SWITCH,

1142 * even if the user goes back to the original cpu.

1143 *

1144 * Inputs:

1145 * reason The reason KDB was invoked

1146 * error The hardware-defined error code

1147 * regs The exception frame at time of fault/breakpoint. NULL

1148 * for reason SILENT or CPU_UP, otherwise valid.

1149 * db_result Result code from the break or debug point.

1150 * Returns:

1151 * 0 KDB was invoked for an event which it wasn't responsible

1152 * 1 KDB handled the event for which it was invoked.

1153 * KDB_CMD_GO User typed 'go'.

1154 * KDB_CMD_CPU User switched to another cpu.

1155 * KDB_CMD_SS Single step.

1156 * KDB_CMD_SSB Single step until branch.

1157 * Locking:

1158 * none

1159 * Remarks:

1160 * none

1161 */

1162

1163 static int

1164 kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs, kdb_dbtrap_t db_result)

1165 {

1166 char *cmdbuf;

1167 int diag;

1168 struct task_struct *kdb_current = kdb_curr_task(smp_processor_id());

1169

1170 /* If kdb has been entered for an event which has been/will be

1171 * recovered then silently return. We have to get this far into kdb in

1172 * order to synchronize all the cpus, typically only one cpu (monarch)

1173 * knows that the event is recoverable but the other cpus (slaves) may

1174 * also be driven into kdb before that decision is made by the monarch.

1175 *

1176 * To pause in kdb even for recoverable events, 'set RECOVERY_PAUSE 1'

1177 */

1178 KDB_DEBUG_STATE("kdb_local 1", reason);

1179 if (reason == KDB_REASON_ENTER

1180 && KDB_FLAG(RECOVERY)

1181 && !KDB_FLAG(CATASTROPHIC)) {

1182 int recovery_pause = 0;

1183 kdbgetintenv("RECOVERY_PAUSE", &recovery_pause);

1184 if (recovery_pause == 0)

1185 reason = KDB_REASON_SILENT;

1186 else

1187 kdb_printf("%s: Recoverable error detected but"

1188 " RECOVERY_PAUSE is set, staying in KDB/n",

1189 __FUNCTION__);

1190 }

1191

1192 KDB_DEBUG_STATE("kdb_local 2", reason);

1193 kdb_go_count = 0;

1194 if (kdb_quiet(reason)) {

1195 /* no message */

1196 } else if (reason == KDB_REASON_DEBUG) {

1197 /* special case below */

1198 } else {

1199 kdb_printf("/nEntering kdb (current=0x%p, pid %d) ", kdb_current, kdb_current->pid);

1200 #if defined(CONFIG_SMP)

1201 kdb_printf("on processor %d ", smp_processor_id());

1202 #endif

1203 }

1204

1205 switch (reason) {

1206 case KDB_REASON_DEBUG:

1207 {

1208 /*

1209 * If re-entering kdb after a single step

1210 * command, don't print the message.

1211 */

1212 switch(db_result) {

1213 case KDB_DB_BPT:

1214 kdb_printf("/nEntering kdb (0x%p, pid %d) ", kdb_current, kdb_current->pid);

1215 #if defined(CONFIG_SMP)

1216 kdb_printf("on processor %d ", smp_processor_id());

1217 #endif

1218 kdb_printf("due to Debug @ " kdb_machreg_fmt "/n", kdba_getpc(regs));

1219 break;

1220 case KDB_DB_SSB:

1221 /*

1222 * In the midst of ssb command. Just return.

1223 */

1224 KDB_DEBUG_STATE("kdb_local 3", reason);

1225 return KDB_CMD_SSB; /* Continue with SSB command */

1226

1227 break;

1228 case KDB_DB_SS:

1229 break;

1230 case KDB_DB_SSBPT:

1231 KDB_DEBUG_STATE("kdb_local 4", reason);

1232 return 1; /* kdba_db_trap did the work */

1233 default:

1234 kdb_printf("kdb: Bad result from kdba_db_trap: %d/n",

1235 db_result);

1236 break;

1237 }

1238

1239 }

1240 break;

1241 case KDB_REASON_ENTER:

1242 if (KDB_STATE(KEYBOARD))

1243 kdb_printf("due to Keyboard Entry/n");

1244 else

1245 kdb_printf("due to KDB_ENTER()/n");

1246 break;

1247 case KDB_REASON_KEYBOARD:

1248 KDB_STATE_SET(KEYBOARD);

1249 kdb_printf("due to Keyboard Entry/n");

1250 break;

1251 case KDB_REASON_ENTER_SLAVE: /* drop through, slaves only get released via cpu switch */

1252 case KDB_REASON_SWITCH:

1253 kdb_printf("due to cpu switch/n");

1254 if (KDB_STATE(GO_SWITCH)) {

1255 KDB_STATE_CLEAR(GO_SWITCH);

1256 KDB_DEBUG_STATE("kdb_local 5", reason);

1257 return KDB_CMD_GO;

1258 }

1259 break;

1260 case KDB_REASON_OOPS:

1261 kdb_printf("Oops: %s/n", kdb_diemsg);

1262 kdb_printf("due to oops @ " kdb_machreg_fmt "/n", kdba_getpc(regs));

1263 kdba_dumpregs(regs, NULL, NULL);

1264 break;

1265 case KDB_REASON_NMI:

1266 kdb_printf("due to NonMaskable Interrupt @ " kdb_machreg_fmt "/n",

1267 kdba_getpc(regs));

1268 kdba_dumpregs(regs, NULL, NULL);

1269 break;

1270 case KDB_REASON_BREAK:

1271 kdb_printf("due to Breakpoint @ " kdb_machreg_fmt "/n", kdba_getpc(regs));

1272 /*

1273 * Determine if this breakpoint is one that we

1274 * are interested in.

1275 */

1276 if (db_result != KDB_DB_BPT) {

1277 kdb_printf("kdb: error return from kdba_bp_trap: %d/n", db_result);

1278 KDB_DEBUG_STATE("kdb_local 6", reason);

1279 return 0; /* Not for us, dismiss it */

1280 }

1281 break;

1282 case KDB_REASON_RECURSE:

1283 kdb_printf("due to Recursion @ " kdb_machreg_fmt "/n", kdba_getpc(regs));

1284 break;

1285 case KDB_REASON_CPU_UP:

1286 case KDB_REASON_SILENT:

1287 KDB_DEBUG_STATE("kdb_local 7", reason);

1288 if (reason == KDB_REASON_CPU_UP)

1289 kdba_cpu_up();

1290 return KDB_CMD_GO; /* Silent entry, silent exit */

1291 break;

1292 default:

1293 kdb_printf("kdb: unexpected reason code: %d/n", reason);

1294 KDB_DEBUG_STATE("kdb_local 8", reason);

1295 return 0; /* Not for us, dismiss it */

1296 }

1297

1298 kdba_local_arch_setup();

1299

1300 kdba_set_current_task(kdb_current);

1301

1302 while (1) {

1303 /*

1304 * Initialize pager context.

1305 */

1306 kdb_nextline = 1;

1307 KDB_STATE_CLEAR(SUPPRESS);

1308 #ifdef kdba_setjmp

1309 /*

1310 * Use kdba_setjmp/kdba_longjmp to break out of

1311 * the pager early and to attempt to recover from kdb errors.

1312 */

1313 KDB_STATE_CLEAR(LONGJMP);

1314 if (kdbjmpbuf) {

1315 if (kdba_setjmp(&kdbjmpbuf[smp_processor_id()])) {

1316 /* Command aborted (usually in pager) */

1317 continue;

1318 }

1319 else

1320 KDB_STATE_SET(LONGJMP);

1321 }

1322 #endif /* kdba_setjmp */

1323

1324 cmdbuf = cmd_cur;

1325 *cmdbuf = '/0';

1326 *(cmd_hist[cmd_head])='/0';

1327

1328 if (KDB_FLAG(ONLY_DO_DUMP)) {

1329 /* kdb is off but a catastrophic error requires a dump.

1330 * Take the dump and reboot.

1331 * Turn on logging so the kdb output appears in the log

1332 * buffer in the dump.

1333 */

1334 const char *setargs[] = { "set", "LOGGING", "1" };

1335 kdb_set(2, setargs);

1336 kdb_do_dump();

1337 kdb_reboot(0, NULL);

1338 /*NOTREACHED*/

1339 }

1340

1341 do_full_getstr:

1342 #if defined(CONFIG_SMP)

1343 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"), smp_processor_id());

1344 #else

1345 snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));

1346 #endif

1347 if (defcmd_in_progress)

1348 strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);

1349

1350 /*

1351 * Fetch command from keyboard

1352 */

1353 cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);

1354 if (*cmdbuf != '/n') {

1355 if (*cmdbuf < 32) {

1356 if(cmdptr == cmd_head) {

1357 strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);

1358 *(cmd_hist[cmd_head]+strlen(cmd_hist[cmd_head])-1) = '/0';

1359 }

1360 if(!handle_ctrl_cmd(cmdbuf))

1361 *(cmd_cur+strlen(cmd_cur)-1) = '/0';

1362 cmdbuf = cmd_cur;

1363 goto do_full_getstr;

1364 }

1365 else

1366 strncpy(cmd_hist[cmd_head], cmd_cur, CMD_BUFLEN);

1367

1368 cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;

1369 if (cmd_head == cmd_tail) cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;

1370

1371 }

1372

1373 cmdptr = cmd_head;

1374 diag = kdb_parse(cmdbuf);

1375 if (diag == KDB_NOTFOUND) {

1376 kdb_printf("Unknown kdb command: '%s'/n", cmdbuf);

1377 diag = 0;

1378 }

1379 if (diag == KDB_CMD_GO

1380 || diag == KDB_CMD_CPU

1381 || diag == KDB_CMD_SS

1382 || diag == KDB_CMD_SSB)

1383 break;

1384

1385 if (diag)

1386 kdb_cmderror(diag);

1387 }

1388

1389 kdba_local_arch_cleanup();

1390

1391 KDB_DEBUG_STATE("kdb_local 9", diag);

1392 return diag;

1393 }

我承认一开始我看到这些几百行的函数是有一些害怕,但是慢慢的我发现,其实Kernel中出现这种几百行的函数,就好比华为死一个人一样,再正常不过了.倒是假如哪天写代码的不写这种暴长的函数了,就好似不了,又如同嫖客不了,人们反而会说她们不正常,人们反而会说他们不务正业.

我们能做的只是让一切继续,继续往下看,由于我们的reasonKDB_REASON_KEYBOARD,所以我们会执行1199,会执行1249,从而在屏幕上我们总能看到类似下面这样的信息被打印出来:

Entering kdb (current=0xffff81022fcab7e0, pid 0) on processor 7 due to Keyboard Entry

[7]kdb>

而如果我们是从KDB_ENTER()进来的,那么1245行的信息就会打印出来,因为我们的reasonKDB_REASON_ENTER.

下一个需要关注的函数是kdba_set_current_task().来自arch/i386/kdb/kdbasupport.c

944 void

945 kdba_set_current_task(const struct task_struct *p)

946 {

947 kdb_current_task = p;

948 if (kdb_task_has_cpu(p)) {

949 struct kdb_running_process *krp = kdb_running_process + kdb_process_cpu(p);

950 kdb_current_regs = krp->regs;

951 return;

952 }

953 kdb_current_regs = NULL;

954 }

如果你看不明白这个函数那我也没话说,其实我相信湖北天门市的城管都能看懂的.无非就是为了保存一个作案现场,以便日后使用.kdb_current_task保存了当前的这个进程,kdb_current_regs保存了当前的寄存器.天门城管就是看明白了魏文华把他们的作案现场(粗暴执法)给保存(拍摄)了下来所以才会恼羞成怒,若是他们没进监狱,我倒是希望他们给个面子,来给大家讲解Linux Kernel代码.

1302行开始,我们发现我们又将进入一个死循环了.先不用看具体的代码,想想也能知道,kdb提示符已经可以打印出来,接下来,kdb进入一个死循环的目的就是为了时时刻刻等待着我们输入kdb那些命令,一个命令执行完了之后,继续循环,继续等待.这种意境很美好,相当于kdb用玫瑰铺满道路,期待你输入的命令,如果你什么也不输入,那么她只会等到心碎的花瓣在寒风中哭泣.

而具体的代码也确实如我们所说的那样,1343snprintf就是准备打印kdb提示符,默认的就是”[n]kdb> “,n表示处理器的编号.然后1353kdb_getstr()就是如注释说的那样,从键盘获取命令.(当然你会看到,实际上kdb提示符也是在kdb_getstr()中打印出来的.)假如你什么也没输入,那么cmdbuf就是空.

376 /*

377 * kdb_getstr

378 *

379 * Print the prompt string and read a command from the

380 * input device.

381 *

382 * Parameters:

383 * buffer Address of buffer to receive command

384 * bufsize Size of buffer in bytes

385 * prompt Pointer to string to use as prompt string

386 * Returns:

387 * Pointer to command buffer.

388 * Locking:

389 * None.

390 * Remarks:

391 * For SMP kernels, the processor number will be

392 * substituted for %d, %x or %o in the prompt.

393 */

394

395 char *

396 kdb_getstr(char *buffer, size_t bufsize, char *prompt)

397 {

398 if(prompt && kdb_prompt_str!=prompt)

399 strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);

400 kdb_printf(kdb_prompt_str);

401 kdb_nextline = 1; /* Prompt and input resets line number */

402 return kdb_read(buffer, bufsize);

403 }

kdb_read()中具体是怎么读的呢?其它不用说,还是凭男人的直觉就知道,一定是轮询.事实也的确是如此,不过我们暂且不去深究,先把这个函数跳过去,以尽快结束我们kdb()这个大函数.

甭管读到什么,总之当且仅当我们读到一些东西的时候,kdb_getstr()才会返回,从而cmdbuf里是一定有东西了,也就是说你一定是输入了一些东西了,而在1374,kdb_parse(),就会执行命令.执行命令的返回值赋给了diag.当然有很多种情况,最后1392,也就把diag给返回了.也就是说我们回到了kdb_main_loop().

在这里kdb_local的返回值赋给了result,下面的代码就是拿着result去分析来分析去,设置各种flag,然后最终又把result作为返回值给返回了.如果你想退出kdb,你可以输入”go”,这种情况diag以及之后的result就都是KDB_CMD_GO.

我们继续回溯,kdb_main_loop的结束引发了kdba_main_loop()也返回.返回值都是一样的.直到此刻,我们终于再一次回到了kdb().

这之后,kdba_adjust_ip()被调用,不过对x86来说,这是个空函数,飘过.

再之后,我们看到WAIT_IPI,HOLD_CPU给清掉了.与此同时,设置了LEAVING flag.

然后又出现了一个疑似死循环,while中调用了一个比较重要的函数,kdb_previous_event(),来自kdb/kdbmain.c:

1418 /*

1419 * kdb_previous_event

1420 *

1421 * Return a count of cpus that are leaving kdb, i.e. the number

1422 * of processors that are still handling the previous kdb event.

1423 *

1424 * Inputs:

1425 * None.

1426 * Returns:

1427 * Count of cpus in previous event.

1428 * Locking:

1429 * none

1430 * Remarks:

1431 * none

1432 */

1433

1434 static int

1435 kdb_previous_event(void)

1436 {

1437 int i, leaving = 0;

1438 for (i = 0; i < NR_CPUS; ++i) {

1439 if (KDB_STATE_CPU(LEAVING, i))

1440 ++leaving;

1441 }

1442 return leaving;

1443 }

这个函数统计的就是有几个cpu对应的LEAVING flag被设置了,实际情况是我们刚刚为每个CPU设置了LEAVING,因此对于多处理器的机器来说,这里的返回值肯定不是1.while循环中说,如果kdb_previous_event返回值不为1,while循环就像永不消逝的电波一样,它反反复复,它生生不息,它试图摆脱轮回的束缚,但是,它能摆脱吗?

为了看看这个循环是否真的是死循环,我们得先回去看一下那些spin in kdb_main_loop()中的cpu,要知道它们循环的条件是HOLD_CPU被设置了,而我们现在看到这个flag终于被清除掉了,这就意味着各个cpu都将结束该循环.

而回过去仔细看kdb_main_loop(),你会发现由于LEAVING flag的设置,使得各个cpu将结束kdb_main_loop(),返回值result1,然后kdba_main_loop()也返回了,然后由于返回值为1,使得kdb()也将最终返回.但是在返回之前,2091,LEAVING也将被清除.也就是说,对于那些spincpu,它们所对应的LEAVING flag是已经被清除掉了,只剩下这个in controlcpu还设置了这个flag,因此,这时候,kdb_previous_event()将等于1,于是,这个疑似死循环终于可以结束了.

最后作一些恢复工作之后,永垂不朽的kdb()函数也终于垂了朽了.到这一刻,所有的cpu都正式离开了kdb(),从此世界清静了,人们群众的生活再次回归宁静,各个进程又按着往日的习惯运转着,执行着,睡眠着.

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics