进程创建
在进程创建时,调用do_fork函数来创建新进程,那么和调度相关的操作主要有两个,一个是sched_fork,这是对一个进程进行调度的初始化,另外一个就是wake_up_new_task,这个是把刚刚创建的子进程唤醒加入到调度器中管理。
首先来看sched_fork函数,调用流为do_fork–>copy_process–>sched_fork。
/*
* fork()/clone()-time setup:
*/
int sched_fork(unsigned long clone_flags, struct task_struct *p)
{
unsigned long flags;
int cpu = get_cpu();
__sched_fork(clone_flags, p); //调度相关结构体的初始化操作函数
/*
* We mark the process as running here. This guarantees that
* nobody will actually run it, and a signal or other external
* event cannot wake it up and insert it on the runqueue either.
*/
p->state = TASK_RUNNING; //设置进程状态为RUNNING
/*
* Make sure we do not leak PI boosting priority to the child.
*/
p->prio = current->normal_prio; //设置优先级为普通优先级
/*
* Revert to default priority/policy on fork if requested.
*/
if (unlikely(p->sched_reset_on_fork)) {
if (task_has_dl_policy(p) || task_has_rt_policy(p)) {
p->policy = SCHED_NORMAL;
p->static_prio = NICE_TO_PRIO(0);
p->rt_priority = 0;
} else if (PRIO_TO_NICE(p->static_prio) < 0)
p->static_prio = NICE_TO_PRIO(0);
p->prio = p->normal_prio = __normal_prio(p);
set_load_weight(p);
/*
* We don't need the reset flag anymore after the fork. It has
* fulfilled its duty:
*/
p->sched_reset_on_fork = 0; //以上这段是为了判断是否要重置调度策略
}
if (dl_prio(p->prio)) {
put_cpu();
return -EAGAIN;
} else if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
} else {
p->sched_class = &fair_sched_class; //普通进程设置调度类为CFS调度器
}
if (p->sched_class->task_fork)
p->sched_class->task_fork(p); //执行调度类中的task_fork回调
/*
* The child is not yet in the pid-hash so no cgroup attach races,
* and the cgroup is pinned to this child due to cgroup_fork()
* is ran before sched_fork().
*
* Silence PROVE_RCU.
*/
raw_spin_lock_irqsave(&p->pi_lock, flags);
set_task_cpu(p, cpu); //设置子进程的cpu为父进程的cpu
raw_spin_unlock_irqrestore(&p->pi_lock, flags);
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
if (likely(sched_info_on()))
memset(&p->sched_info, 0, sizeof(p->sched_info));
#endif
#if defined(CONFIG_SMP)
p->on_cpu = 0;
#endif
init_task_preempt_count(p); //进程抢占标志初始化
#ifdef CONFIG_SMP
plist_node_init(&p->pushable_tasks, MAX_PRIO);
RB_CLEAR_NODE(&p->pushable_dl_tasks);
#endif
put_cpu();
return 0;
}
下面来看CFS调度器中task_fork的实现,调用流do_fork–>copy_process–>sched_fork–>task_fork_fair:
static void task_fork_fair(struct task_struct *p)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se, *curr;
int this_cpu = smp_processor_id();
struct rq *rq = this_rq();
unsigned long flags;
raw_spin_lock_irqsave(&rq->lock, flags);
update_rq_clock(rq);
cfs_rq = task_cfs_rq(current);
curr = cfs_rq->curr;
/*
* Not only the cpu but also the task_group of the parent might have
* been changed after parent->se.parent,cfs_rq were copied to
* child->se.parent,cfs_rq. So call __set_task_cpu() to make those
* of child point to valid ones.
*/
rcu_read_lock();
__set_task_cpu(p, this_cpu);
rcu_read_unlock();
update_curr(cfs_rq);
if (curr)
se->vruntime = curr->vruntime;
place_entity(cfs_rq, se, 1);
if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
/*
* Upon rescheduling, sched_class::put_prev_task() will place
* 'current' within the tree based on its new key value.
*/
swap(curr->vruntime, se->vruntime);
resched_curr(rq);
}
se->vruntime -= cfs_rq->min_vruntime;
raw_spin_unlock_irqrestore(&rq->lock, flags);
}
这个函数主要实现的是如下几个步骤:
(1)更新runqueue clock
(2)设置当前进程cpu为父进程的CPU
(3)update_curr是CFS调度器中核心函数,更新父进程的sum_exec_runtime,vruntime和runqueue的min_vruntime
(4)place_entity对于新创建的进程进行惩罚,vruntime会加上一个值,放置新创建进程恶意占有CPU
加入运行队列(enqueue操作)
上面介绍的就是进程创建关于调度的初始化过程,那么初始化完成后,下面就要把新的子进程加入到调度器中,涉及的函数如下do_fork–>wake_up_new_task:
void wake_up_new_task(struct task_struct *p)
{
unsigned long flags;
struct rq *rq;
raw_spin_lock_irqsave(&p->pi_lock, flags);
#ifdef CONFIG_SMP
/*
* Fork balancing, do it here and not earlier because:
* - cpus_allowed can change in the fork path
* - any previously selected cpu might disappear through hotplug
*/
set_task_cpu(p, select_task_rq(p, task_cpu(p), SD_BALANCE_FORK, 0)); //子进程重新选择runqueue和cpu,相当于进行了一次负载均衡处理
#endif
/* Initialize new task's runnable average */
init_task_runnable_average(p); //依据权重初始化子进程的时间片和负载贡献
rq = __task_rq_lock(p);
activate_task(rq, p, 0); //把子进程加入到runqueue,这是该函数的关键核心
p->on_rq = TASK_ON_RQ_QUEUED;
trace_sched_wakeup_new(p, true);
check_preempt_curr(rq, p, WF_FORK);
#ifdef CONFIG_SMP
if (p->sched_class->task_woken)
p->sched_class->task_woken(rq, p);
#endif
task_rq_unlock(rq, p, &flags);
}
void activate_task(struct rq *rq, struct task_struct *p, int flags)
{
if (task_contributes_to_load(p))
rq->nr_uninterruptible--;
enqueue_task(rq, p, flags);
}
static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
{
update_rq_clock(rq);
sched_info_queued(rq, p);
p->sched_class->enqueue_task(rq, p, flags);
}
static void
enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se = &p->se;
for_each_sched_entity(se) {
if (se->on_rq)
break;
cfs_rq = cfs_rq_of(se);
enqueue_entity(cfs_rq, se, flags); //调度实体加入到runqueue中
/*
* end evaluation on encountering a throttled cfs_rq
*
* note: in the case of encountering a throttled cfs_rq we will
* post the final h_nr_running increment below.
*/
if (cfs_rq_throttled(cfs_rq))
break;
cfs_rq->h_nr_running++;
flags = ENQUEUE_WAKEUP;
}
for_each_sched_entity(se) {
cfs_rq = cfs_rq_of(se);
cfs_rq->h_nr_running++;
if (cfs_rq_throttled(cfs_rq))
break;
update_cfs_shares(cfs_rq); //更新cfs shares
update_entity_load_avg(se, 1); //更新调度实体负载和runqueue负载,在实际上在上面的enqueue_entity也会执行这两步
}
if (!se) {
update_rq_runnable_avg(rq, rq->nr_running);
add_nr_running(rq, 1);
}
hrtick_update(rq);
}
static void
enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
{
/*
* Update the normalized vruntime before updating min_vruntime
* through calling update_curr().
*/
if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
se->vruntime += cfs_rq->min_vruntime; //新创建进程加上一个min_vruntime
/*
* Update run-time statistics of the 'current'.
*/
update_curr(cfs_rq); //更新子进程runqueue对应的当前进程相关的时间信息和vruntime信息
enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP); //子进程加入到对应runqueue的负载计算中
account_entity_enqueue(cfs_rq, se);
update_cfs_shares(cfs_rq);
if (flags & ENQUEUE_WAKEUP) {
place_entity(cfs_rq, se, 0); //对于刚刚唤醒的进程进行补偿,vruntime减去一个值,提高优先级执行
enqueue_sleeper(cfs_rq, se);
}
update_stats_enqueue(cfs_rq, se);
check_spread(cfs_rq, se);
if (se != cfs_rq->curr)
__enqueue_entity(cfs_rq, se); //加入到runqueue中的rb tree的处理
se->on_rq = 1;
if (cfs_rq->nr_running == 1) {
list_add_leaf_cfs_rq(cfs_rq);
check_enqueue_throttle(cfs_rq);
}
}
经历了以上这些操作以后,一个进程的创建到加入runqueue的过程就完成了,后续该进程就开始接受CFS调度器的调度了。