等待隊列
Sleep相關函數將進程的狀態設置為非運行態,在下一次調度來時,將在schedule函數中將本進程從運行隊列中移除。sleep函數將進程加入等待隊列,然後調用schedule函數選擇並重新開始另一個程序的執行。當調用wake_up類函數將進程喚醒時,wake_up類函數將進程加入運行隊列中,調度程序重新從sleep函數中下一條沒有執行的指令開始執行。
sleep類函數都調用sleep_on_common函數實現,只是傳入的參數有別。
- static long __sched
- sleep_on_common(wait_queue_head_t *q, int state, long timeout)
- {
- unsigned long flags;
- wait_queue_t wait;
- /*初始化等待隊列*/
- init_waitqueue_entry(&wait, current);
- /*設置當前進程狀態*/
- __set_current_state(state);
-
- spin_lock_irqsave(&q->lock, flags);
- __add_wait_queue(q, &wait);/*加入等待隊列中*/
- spin_unlock(&q->lock);
- /*sleep until timeout,在本進程睡眠的過程中會調用別的進程運行*/
- timeout = schedule_timeout(timeout);
- spin_lock_irq(&q->lock);
- /*當本進程被喚醒時,從這裡繼續開始運行
- 也就是將該進程從等待隊列中移除*/
- __remove_wait_queue(q, &wait);
- spin_unlock_irqrestore(&q->lock, flags);
-
- return timeout;
- }
- static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
- {
- q->flags = 0;
- q->private = p;/*將進程保存為隊列私有屬性*/
- q->func = default_wake_function;/*設定為缺省的喚醒函數*/
- }
我們看喚醒函數,default_wake_function最終調用函數try_to_wake_up
- /***
- * try_to_wake_up - wake up a thread
- * @p: the to-be-woken-up thread
- * @state: the mask of task states that can be woken
- * @sync: do a synchronous wakeup?
- *
- * Put it on the run-queue if it's not already there. The "current"
- * thread is always on the run-queue (except when the actual
- * re-schedule is in progress), and as such you're allowed to do
- * the simpler "current->state = TASK_RUNNING" to mark yourself
- * runnable without the overhead of this.
- *
- * returns failure only if the task is already active.
- */
- static int try_to_wake_up(struct task_struct *p, unsigned int state,
- int wake_flags)
- {
- int cpu, orig_cpu, this_cpu, success = 0;
- unsigned long flags;
- struct rq *rq, *orig_rq;
-
- if (!sched_feat(SYNC_WAKEUPS))
- wake_flags &= ~WF_SYNC;/* waker not goes to sleep after wakup */
-
- this_cpu = get_cpu();/*cpu id*/
-
- smp_wmb();
- rq = orig_rq = task_rq_lock(p, &flags);/*獲得進程的rq*/
- update_rq_clock(rq);/*更新rq的時鐘*/
- if (!(p->state & state))
- goto out;
-
- if (p->se.on_rq)/*如果進程已經在運行隊列中*/
- goto out_running;
-
- cpu = task_cpu(p);/*返回進程對應的cpu*/
- orig_cpu = cpu;
-
- #ifdef CONFIG_SMP
- if (unlikely(task_running(rq, p)))/*如果當前進程時p,也就是waker*/
- goto out_activate;
-
- /*
- * In order to handle concurrent wakeups and release the rq->lock
- * we put the task in TASK_WAKING state.
- *
- * First fix up the nr_uninterruptible count:
- */
- if (task_contributes_to_load(p))
- rq->nr_uninterruptible--;
- p->state = TASK_WAKING;
- task_rq_unlock(rq, &flags);
- /*通常用在執行一個新的程序,或是WakeUp
- 一個Task時,會根據目前SMP下每個處理器的
- 負荷,決定Task是否要切換到另一個處理器
- 的RunQueue去執行,執行時會返回最後目標
- 處理器的值.*/
- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
- if (cpu != orig_cpu)
- set_task_cpu(p, cpu);/*設置task在制定的cpu上運行*/
-
- rq = task_rq_lock(p, &flags);/*task對應的rq*/
-
- if (rq != orig_rq)
- update_rq_clock(rq);/*更新clock*/
-
- WARN_ON(p->state != TASK_WAKING);
- cpu = task_cpu(p);
-
- #ifdef CONFIG_SCHEDSTATS/*yes*/
- schedstat_inc(rq, ttwu_count);/*Wake Up Task的次數加一.*/
- if (cpu == this_cpu)
- /*Wake Up 同一個處理器Task的次數加一.*/
- schedstat_inc(rq, ttwu_local);
- else {
- struct sched_domain *sd;
- for_each_domain(this_cpu, sd) {
- if (cpumask_test_cpu(cpu, sched_domain_span(sd))) {
- schedstat_inc(sd, ttwu_wake_remote);
- break;
- }
- }
- }
- #endif /* CONFIG_SCHEDSTATS */
-
- out_activate:
- #endif /* CONFIG_SMP */
- /*下面為設置相關計數變量*/
- schedstat_inc(rq, field)(p, se.nr_wakeups);
- if (wake_flags & WF_SYNC)
- schedstat_inc(p, se.nr_wakeups_sync);
- if (orig_cpu != cpu)
- schedstat_inc(p, se.nr_wakeups_migrate);
- if (cpu == this_cpu)
- schedstat_inc(p, se.nr_wakeups_local);
- else
- schedstat_inc(p, se.nr_wakeups_remote);
- /*將進程移動到對應調度類的運行隊列*/
- activate_task(rq, p, 1);
- success = 1;
-
- /*
- * Only attribute actual wakeups done by this task.
- */
- if (!in_interrupt()) {/*下面為對se中變量last_wakeup和
- avg_wakeup的更新*/
- struct sched_entity *se = ¤t->se;
- u64 sample = se->sum_exec_runtime;
-
- if (se->last_wakeup)
- sample -= se->last_wakeup;
- else
- sample -= se->start_runtime;
- update_avg(&se->avg_wakeup, sample);
-
- se->last_wakeup = se->sum_exec_runtime;
- }
-
- out_running:
- trace_sched_wakeup(rq, p, success);
-
- /*用以決定一個Task是否可以中斷目前正在
- 運作的Task,取得執行權.*/
- check_preempt_curr(rq, p, wake_flags);
-
- p->state = TASK_RUNNING;
- #ifdef CONFIG_SMP
- if (p->sched_class->task_wake_up)
- p->sched_class->task_wake_up(rq, p);
-
- if (unlikely(rq->idle_stamp)) {/*該值可用以表示這個
- 處理器是何時進入到Idle的
- 狀態,在這裡得到更新*/
- u64 delta = rq->clock - rq->idle_stamp;
- u64 max = 2*sysctl_sched_migration_cost;
-
- if (delta > max)
- rq->avg_idle = max;
- else/*avg_idle可反應目前處理器進入Idle狀態的時間長短*/
- update_avg(&rq->avg_idle, delta);
- rq->idle_stamp = 0;
- }
- #endif
- out:
- task_rq_unlock(rq, &flags);
- put_cpu();
-
- return success;
- }
所有的wake_up類函數都最終調用__wake_up_common函數實現
- static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
- int nr_exclusive, int wake_flags, void *key)
- {
- wait_queue_t *curr, *next;
-
- list_for_each_entry_safe(curr, next, &q->task_list, task_list) {
- unsigned flags = curr->flags;
-
- if (curr->func(curr, mode, wake_flags, key) &&/*在這裡會調用上面注冊的try_to_wake_up函數*/
- (flags & WQ_FLAG_EXCLUSIVE) && !--nr_exclusive)
- break;
- }
- }
wait_event方式
考慮到sleep_on類函數在以下條件中不能使用,那就是必須測試條件並且當條件還沒喲得到驗證時又緊接著讓進城去睡眠;為實現這樣的功能,內核采用wait_event的方式實現。
- #define __wait_event(wq, condition) \
- do { \
- DEFINE_WAIT(__wait); \
- \
- for (;;) { /*加入等待隊列,設置進程狀態*/ \
- prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \
- if (condition) \
- break; \
- schedule();/*調用其他進程運行*/ \
- }/*當進程被喚醒時繼續如下執行*/ \
- finish_wait(&wq, &__wait); \
- } while (0)
當下一次調度到來時,調度程序把設置為非運行的當前進程從運行隊列裡面刪除,而進程被wake_up類函數喚醒時,wake_up類函數將其加入運行隊列,繼續執行上面沒有執行完成的wait_event函數(執行finish_wait函數),finish_wait函數將其從等待隊列中刪除。