六: wait4 ()系統調用
在父進程中,用wait4()可以獲得子進程的退出狀態,並且防止在父進程退出前,子進程退出造成僵死 狀態。這是我們這節分析的最後一個小節了。
關於wait4()在用戶空間的調用方式可以自行參考相關資料,在這裡只是討論內核對這個系統調用的實 現過程。
Wait4()的系統調用入口為sys_wait4().代碼如下所示:
asmlinkage long sys_wait4(pid_t pid, int __user *stat_addr, int options, struct rusage __user *ru) { long ret; //options的標志為須為WNOHANG…__WALL的組合,否則會出錯 //相關標志的作用在do_wait()中再進行分析 if (options & ~(WNOHANG|WUNTRACED|WCONTINUED| __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; ret = do_wait(pid, options | WEXITED, NULL, stat_addr, ru); /* avoid REGPARM breakage on x86: */ prevent_tail_call(ret); return ret; }
do_wait()是其中的核心處理函數。代碼如下:
static long do_wait(pid_t pid, int options, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { //初始化一個等待隊列 DECLARE_WAITQUEUE(wait, current); struct task_struct *tsk; int flag, retval; int allowed, denied; //將當前進程加入等待隊列,子進程退出給父進程發送信號會wake up些等待隊列 add_wait_queue(¤t->signal->wait_chldexit,&wait); repeat: flag = 0; allowed = denied = 0; //設置進程狀態為TASK_INTERRUPTIBLE.下次調度必須要等到子進程喚醒才可以了 current->state = TASK_INTERRUPTIBLE; read_lock(&tasklist_lock); tsk = current; do { struct task_struct *p; struct list_head *_p; int ret; //遍歷進程下的子進程 list_for_each(_p,&tsk->children) { p = list_entry(_p, struct task_struct, sibling); //判斷是否是我們要wait 的子進程 ret = eligible_child(pid, options, p); if (!ret) continue; if (unlikely(ret < 0)) { denied = ret; continue; } allowed = 1; switch (p->state) { //子進程為TASK_TRACED.即處於跟蹤狀態。則取子進程的相關信息 case TASK_TRACED: flag = 1; //判斷是否是被父進程跟蹤的子進程 //如果是則返回1..不是返回0 if (!my_ptrace_child(p)) continue; /*FALLTHROUGH*/ case TASK_STOPPED: flag = 1; //WUNTRACED:子進程是停止的,也馬上返回 //沒有定義WUNTRACED 參數.繼續遍歷子進程 /*從此看出.生父進程是不會處理STOP狀態的子進程的.只有 發起跟蹤的進程才會 */ if (!(options & WUNTRACED) && !my_ptrace_child(p)) continue; //WNOWAIT:不會將zombie子進程的退出狀態撤銷 //下次調用wait系列函數的時候還可以繼續獲得這個退出狀態 retval = wait_task_stopped(p, ret == 2, (options & WNOWAIT), infop, stat_addr, ru); if (retval == -EAGAIN) goto repeat; if (retval != 0) /* He released the lock. */ goto end; break; default: // case EXIT_DEAD: //不需要處理DEAD狀態 if (p->exit_state == EXIT_DEAD) continue; // case EXIT_ZOMBIE: //子進程為僵屍狀態 if (p->exit_state == EXIT_ZOMBIE) { if (ret == 2) goto check_continued; if (!likely(options & WEXITED)) continue; retval = wait_task_zombie( p, (options & WNOWAIT), infop, stat_addr, ru); /* He released the lock. */ if (retval != 0) goto end; break; } check_continued: /* * It's running now, so it might later * exit, stop, or stop and then continue. */ flag = 1; //WCONTINUED:報告任何繼續運行的指定進程號的子進程的狀態 if (!unlikely(options & WCONTINUED)) continue; //取進程的相關狀態 retval = wait_task_continued( p, (options & WNOWAIT), infop, stat_addr, ru); if (retval != 0) /* He released the lock. */ goto end; break; } } //遍歷被跟蹤出去的子進程 //從這裡可以看出.如果一個子進程被跟蹤出去了.那麼子進程的退出 //操作並不是由生父進程進行了 if (!flag) { list_for_each(_p, &tsk->ptrace_children) { p = list_entry(_p, struct task_struct, ptrace_list); if (!eligible_child(pid, options, p)) continue; flag = 1; break; } } if (options & __WNOTHREAD) break; //也有可能是進程中的線程在wait其fork出來的子進程 tsk = next_thread(tsk); BUG_ON(tsk->signal != current->signal); } while (tsk != current); // read_unlock(&tasklist_lock); if (flag) { retval = 0; //如果定義了WHNOHANG:馬上退出 if (options & WNOHANG) goto end; retval = -ERESTARTSYS; if (signal_pending(current)) goto end; schedule(); goto repeat; } retval = -ECHILD; if (unlikely(denied) && !allowed) retval = denied; end: //將進程設為運行狀態,從等待隊列中移除 current->state = TASK_RUNNING; remove_wait_queue(¤t->signal->wait_chldexit,&wait); if (infop) { if (retval > 0) retval = 0; else { /* * For a WNOHANG return, clear out all the fields * we would set so the user can easily tell the * difference. */ if (!retval) retval = put_user(0, &infop->si_signo); if (!retval) retval = put_user(0, &infop->si_errno); if (!retval) retval = put_user(0, &infop->si_code); if (!retval) retval = put_user(0, &infop->si_pid); if (!retval) retval = put_user(0, &infop->si_uid); if (!retval) retval = put_user(0, &infop->si_status); } } return retval; }
這代段碼還是比較簡單。先遍歷進程的子進程,再遍歷被跟蹤出去的進程,再遍歷線程中的線程。我 們分析一下裡面用到的幾個重要的子函數。
eligible_child()用來判斷子進程是否是我們想要wait的子進程.代碼如下:
static int eligible_child(pid_t pid, int options, struct task_struct *p) { int err; //根據PID判斷是不是我們要wait的子進程 //pid >0:等待的子程程的進程號等於pid //pid = 0:等待進程組號等於當前進程組號的所有子進程 //pid < -1 :等待任何進程組號等於pid絕對值的子進程 //pid == -1 :等待任何子進程 if (pid > 0) { if (p->pid != pid) return 0; } else if (!pid) { if (process_group(p) != process_group(current)) return 0; } else if (pid != -1) { if (process_group(p) != -pid) return 0; } //如果子進程exit_signal ==-1且沒有被跟蹤.那不會對子進程進行回收 if (p->exit_signal == -1 && !p->ptrace) return 0; if (((p->exit_signal != SIGCHLD) ^ ((options & __WCLONE) != 0)) && !(options & __WALL)) return 0; /* * Do not consider thread group leaders that are * in a non-empty thread group: */ //如果子進程是進程組leader,且進程組不為空 if (delay_group_leader(p)) return 2; err = security_task_wait(p); if (err) return err; return 1; }
對TASK_TRACED和TASK_STOPPED狀態的子進程操作是在wait_task_stopped()中完成的。它的代碼如下:
static int wait_task_stopped(struct task_struct *p, int delayed_group_leader, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { int retval, exit_code; //進程退出狀態碼為零.沒有相關退出信息 if (!p->exit_code) return 0; // if (delayed_group_leader && !(p->ptrace & PT_PTRACED) && p->signal && p->signal->group_stop_count > 0) return 0; //正在取task裡面的信息,為了防止意外釋放,先增加它的引用計數 get_task_struct(p); read_unlock(&tasklist_lock); //如果WNOWAIT 被定義 if (unlikely(noreap)) { pid_t pid = p->pid; uid_t uid = p->uid; int why = (p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED; exit_code = p->exit_code; //退出狀態碼為零,但是過程已經處於退出狀態中(僵屍或者是死進程) if (unlikely(!exit_code) || unlikely(p->exit_state)) goto bail_ref; //把子進程的各項信息保存起來 //返回值是退出子進程的PID return wait_noreap_copyout(p, pid, uid, why, exit_code, infop, ru); } write_lock_irq(&tasklist_lock); //如果子進程沒有退出.只要取子進程的退出信息,再清除子進程的退出信息 //即可 exit_code = xchg(&p->exit_code, 0); if (unlikely(p->exit_state)) { p->exit_code = exit_code; exit_code = 0; } if (unlikely(exit_code == 0)) { write_unlock_irq(&tasklist_lock); bail_ref: put_task_struct(p); return -EAGAIN; } //將子進程加到父進程子鏈表的末尾 remove_parent(p); add_parent(p); write_unlock_irq(&tasklist_lock); //收集相關的信息 retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (!retval && stat_addr) retval = put_user((exit_code << 8) | 0x7f, stat_addr); if (!retval && infop) retval = put_user(SIGCHLD, &infop->si_signo); if (!retval && infop) retval = put_user(0, &infop->si_errno); if (!retval && infop) retval = put_user((short)((p->ptrace & PT_PTRACED) ? CLD_TRAPPED : CLD_STOPPED), &infop->si_code); if (!retval && infop) retval = put_user(exit_code, &infop->si_status); if (!retval && infop) retval = put_user(p->pid, &infop->si_pid); if (!retval && infop) retval = put_user(p->uid, &infop->si_uid); if (!retval) retval = p->pid; //減少task的引用計數 put_task_struct(p); BUG_ON(!retval); return retval; }
對僵屍進程的操作是由wait_task_zombie()完成的。代如如下:
static int wait_task_zombie(struct task_struct *p, int noreap, struct siginfo __user *infop, int __user *stat_addr, struct rusage __user *ru) { unsigned long state; int retval; int status; //WNOWAIT被設置.不需要釋放子進程的資源,只要取相關信息即可 if (unlikely(noreap)) { pid_t pid = p->pid; uid_t uid = p->uid; int exit_code = p->exit_code; int why, status; //子進程不為EXIT_ZOMBIE .異常退出 if (unlikely(p->exit_state != EXIT_ZOMBIE)) return 0; //沒有退出信號具沒有被跟蹤.退出 if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) return 0; //增加引用計數 get_task_struct(p); read_unlock(&tasklist_lock); if ((exit_code & 0x7f) == 0) { why = CLD_EXITED; status = exit_code >> 8; } else { why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; status = exit_code & 0x7f; } //取相關信息 return wait_noreap_copyout(p, pid, uid, why, status, infop, ru); } /* * Try to move the task's state to DEAD * only one thread is allowed to do this: */ //將子進程狀態設為EXIT_DEAD狀態 state = xchg(&p->exit_state, EXIT_DEAD); //如果子進程不為EXIT_ZOMBIE狀態,異常退出 if (state != EXIT_ZOMBIE) { BUG_ON(state != EXIT_DEAD); return 0; } //沒有退出信號,且沒有被跟蹤 if (unlikely(p->exit_signal == -1 && p->ptrace == 0)) { return 0; } //子進程的real_parent等於當前父進程.說明子進程並沒有被跟蹤出去 if (likely(p->real_parent == p->parent) && likely(p->signal)) { struct signal_struct *psig; struct signal_struct *sig; //更新父進程的一些統計信息 spin_lock_irq(&p->parent->sighand->siglock); psig = p->parent->signal; sig = p->signal; psig->cutime = cputime_add(psig->cutime, cputime_add(p->utime, cputime_add(sig->utime, sig->cutime))); psig->cstime = cputime_add(psig->cstime, cputime_add(p->stime, cputime_add(sig->stime, sig->cstime))); psig->cmin_flt += p->min_flt + sig->min_flt + sig->cmin_flt; psig->cmaj_flt += p->maj_flt + sig->maj_flt + sig->cmaj_flt; psig->cnvcsw += p->nvcsw + sig->nvcsw + sig->cnvcsw; psig->cnivcsw += p->nivcsw + sig->nivcsw + sig->cnivcsw; psig->cinblock += task_io_get_inblock(p) + sig->inblock + sig->cinblock; psig->coublock += task_io_get_oublock(p) + sig->oublock + sig->coublock; spin_unlock_irq(&p->parent->sighand->siglock); } /* * Now we are sure this task is interesting, and no other * thread can reap it because we set its state to EXIT_DEAD. */ //取得相關的退出信息 read_unlock(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; if (!retval && stat_addr) retval = put_user(status, stat_addr); if (!retval && infop) retval = put_user(SIGCHLD, &infop->si_signo); if (!retval && infop) retval = put_user(0, &infop->si_errno); if (!retval && infop) { int why; if ((status & 0x7f) == 0) { why = CLD_EXITED; status >>= 8; } else { why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; status &= 0x7f; } retval = put_user((short)why, &infop->si_code); if (!retval) retval = put_user(status, &infop->si_status); } if (!retval && infop) retval = put_user(p->pid, &infop->si_pid); if (!retval && infop) retval = put_user(p->uid, &infop->si_uid); if (retval) { // TODO: is this safe? p->exit_state = EXIT_ZOMBIE; return retval; } retval = p->pid; //當前進程不是生父進程.則說明進程是被跟蹤出去了 // TODO:子進程exit退出的時候,只會向其當前父進程發送信號的哦^_^ if (p->real_parent != p->parent) { write_lock_irq(&tasklist_lock); /* Double-check with lock held. */ if (p->real_parent != p->parent) { //將進程從跟蹤鏈表中脫落,並設置父進程為生父進程 __ptrace_unlink(p); // TODO: is this safe? //重新設置為EXIT_ZOMBI狀態 p->exit_state = EXIT_ZOMBIE; /* * If this is not a detached task, notify the parent. * If it's still not detached after that, don't release * it now. */ //如果允許發送信息,則給生父進程發送相關信號 if (p->exit_signal != -1) { do_notify_parent(p, p->exit_signal); if (p->exit_signal != -1) p = NULL; } } write_unlock_irq(&tasklist_lock); } //釋放子進程的剩余資源 if (p != NULL) release_task(p); BUG_ON(!retval); return retval; }
至此,我們看到了繼子進程退出之後的完整處理。在此,值得注意的是。子進程在退出的時候會給父 進程發送相應的信號(例如SIG_CHILD),默認的信號處理函數也會進行相應的處理。
七:等待隊列的操作
在這裡,我們第一次接觸到了等待隊列,我們就以上面的代碼做為例子來分析一下。
1:申請一個等待隊列:
DECLARE_WAITQUEUE(): //name:等待隊列的名字。Tsk:所要操作的task #define DECLARE_WAITQUEUE(name, tsk) \ wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) #define __WAITQUEUE_INITIALIZER(name, tsk) { \ .private = tsk, \ .func = default_wake_function, \ .task_list = { NULL, NULL } }
default_wake_function()為默認的喚醒處理函數。
2:添加等待隊列。
在上面的代碼中,有:
add_wait_queue(¤t->signal->wait_chldexit,&wait);
它的意思是將wait添加至¤t->signal->wait_chldexit中。代碼如下:
void fastcall add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait) { unsigned long flags; wait->flags &= ~WQ_FLAG_EXCLUSIVE; //為了防止競爭。加鎖 spin_lock_irqsave(&q->lock, flags); //添加至隊列 __add_wait_queue(q, wait); //解鎖 spin_unlock_irqrestore(&q->lock, flags); }
3:喚醒操作:
在do_notify_parent()中有這樣的代碼片段:
…… __wake_up_parent(tsk, tsk->parent); …… __wake_up_parent()的代碼如下: static inline void __wake_up_parent(struct task_struct *p, struct task_struct *parent) { wake_up_interruptible_sync(&parent->signal->wait_chldexit); }
parent->signal->wait_chldexit這個隊列很熟吧?我們在父進程中添加的等待隊列就是添加在 這裡哦。^_^
喚醒隊列的操作是由wake_up_interruptible_sync()完成的,代碼如下:
wake_up_interruptible_sync() à __wake_up_sync()à__wake_up_common(): static void __wake_up_common(wait_queue_head_t *q, unsigned int mode, int nr_exclusive, int sync, void *key) { struct list_head *tmp, *next; list_for_each_safe(tmp, next, &q->task_list) { wait_queue_t *curr = list_entry(tmp, wait_queue_t, task_list); unsigned flags = curr->flags; if (curr->func(curr, mode, sync, key) && (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive) break; } }
上述操作會遍歷整個等待隊列,然後運行對應的函數。我們在前面申請等待隊列的時候,默認的函數 為:default_wake_function()。它會將操作的task放入運行隊列,並將狀態設為RUNING這個函數等之 後我們分析進程切換與調度的時候再來分析。
八:小結
通過分析進程的創建,執行與消息等過程,可以對子程管理子系統有一個大概的了解。該子系統與其 它子系統關系十分密切。對進程資源的管理和釋放是理解這個子系統的難點。在下一個小節點,我們接著 分析進程的切換與調度。