Linux wait返回及timer_create问题解决

libowenhit 2018-08-11

前言
前段时间查一个问题,发现应用层在使用wait函数时,在没有等到信号的情况下,wait函数返回了,并且返回值为0,没有超时及异常提示,不符合常理,跟进后发现,虽然c库代码编写不够严谨,但根源是应用层代码对timer_create的不当使用,引入了隐患。在这做一个分析,作为以后分析同类问题的参考。

一、 wait函数不合理返回问题
如下面代码,在postAndWait函数中,先把task queue进处理队列,然后调用wait等待task处理完成发送信号,接着在run函数中运行task及发送信号,当wait函数收到信号后,正常返回,这为正常的运行流程。但发现有时出现了在run中,task还没运行,也没有发送信号,wait函数就已经返回,并且返回值为0(success)。

frameworks\base\libs\hwui\renderthread\ RenderProxy.cpp

void* RenderProxy::postAndWait(MethodInvokeRenderTask* task) {
    void* retval;
    task->setReturnPtr(&retval);
    SignalingRenderTask syncTask(task, &mSyncMutex, &mSyncCondition);
    AutoMutex _lock(mSyncMutex);
    mRenderThread.queue(&syncTask);    // queue task
    mSyncCondition.wait(mSyncMutex);  // 等待task运行完成发送信号
    return retval;   
// 若在task还没运行,wait就返回,task被释放,task运行线程不知道task被释放,一到task运行就出问题
}

frameworks\base\libs\hwui\renderthread\ RenderTask.cpp

void SignalingRenderTask::run() {
    mTask->run();        // task的运行
    mLock->lock();
    mSignal->signal();      // 发送信号给wait
    mLock->unlock();
}

二、wait不合理返回分析
跟进内核代码发现,当wait函数在等待时,wait所在的线程被挂起,正常情况下,当task的运行线程给wait所在的线程发送信号后,wait所在的线程被设置为可运行状态,等待系统调度运行并正常返回,wait函数调用路径及返回如下,调用路径如绿色标示的,返回点如紫色标示。(发送信号流程的代码位置与wait流程代码处于相同文件中,可自行跟踪)
system\core\include\utils\ Condition.h

inline status_t Condition::wait(Mutex& mutex) {
    return -pthread_cond_wait(&mCond, &mutex.mMutex);
}

bionic\libc\bionic\ Pthread_cond.cpp

int pthread_cond_wait(pthread_cond_t* cond, pthread_mutex_t* mutex) {
  return __pthread_cond_timedwait(cond, mutex, NULL,  COND_GET_CLOCK(cond->value));
}

bionic\libc\bionic\ Pthread_cond.cpp

__LIBC_HIDDEN__
int __pthread_cond_timedwait(pthread_cond_t* cond, pthread_mutex_t* mutex, const timespec* abstime, clockid_t clock) {
  timespec ts;
  timespec* tsp;

  if (abstime != NULL) {        // 没有设置超时时间,不走这里
    if (__timespec_from_absolute(&ts, abstime, clock) < 0) {
      return ETIMEDOUT;
    }
    tsp = &ts;
  } else {
    tsp = NULL;
  }

  return __pthread_cond_timedwait_relative(cond, mutex, tsp);
}

bionic\libc\bionic\ Pthread_cond.cpp

__LIBC_HIDDEN__
int __pthread_cond_timedwait_relative(pthread_cond_t* cond, pthread_mutex_t* mutex, const timespec* reltime) {
  int old_value = cond->value;

  pthread_mutex_unlock(mutex);
  int status = __futex_wait_ex(&cond->value, COND_IS_SHARED(cond->value), old_value, reltime);
  pthread_mutex_lock(mutex);

  if (status == -ETIMEDOUT) {
    return ETIMEDOUT;
  }

  return 0;
}
bionic\libc\private\ Bionic_futex.h
static inline int __futex_wait_ex(volatile void* ftx, bool shared, int value, const struct timespec* timeout) {
  return __futex(ftx, shared ? FUTEX_WAIT : FUTEX_WAIT_PRIVATE, value, timeout);
}

bionic\libc\private\ Bionic_futex.h

static inline __always_inline int __futex(volatile void* ftx, int op, int value, const struct timespec* timeout) {
  // Our generated syscall assembler sets errno, but our callers (pthread functions) don't want to.
  int saved_errno = errno;
  int result = syscall(__NR_futex, ftx, op, value, timeout);
  if (__predict_false(result == -1)) {
    result = -errno;
    errno = saved_errno;
  }
  return result;
}

kernel\kernel\ Futex.c

SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
        struct timespec __user *, utime, u32 __user *, uaddr2,
        u32, val3)
{
    struct timespec ts;
    ktime_t t, *tp = NULL;
    u32 val2 = 0;
    int cmd = op & FUTEX_CMD_MASK;

    if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI ||
              cmd == FUTEX_WAIT_BITSET ||
              cmd == FUTEX_WAIT_REQUEUE_PI)) {
        if (copy_from_user(&ts, utime, sizeof(ts)) != 0)
            return -EFAULT;
        if (!timespec_valid(&ts))
            return -EINVAL;

        t = timespec_to_ktime(ts);
        if (cmd == FUTEX_WAIT)
            t = ktime_add_safe(ktime_get(), t);
        tp = &t;
    }
    /*
    * requeue parameter in 'utime' if cmd == FUTEX_*_REQUEUE_*.
    * number of waiters to wake in 'utime' if cmd == FUTEX_WAKE_OP.
    */
    if (cmd == FUTEX_REQUEUE || cmd == FUTEX_CMP_REQUEUE ||
        cmd == FUTEX_CMP_REQUEUE_PI || cmd == FUTEX_WAKE_OP)
        val2 = (u32) (unsigned long) utime;

    return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
}

kernel\kernel\ Futex.c

long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
        u32 __user *uaddr2, u32 val2, u32 val3)
{
    int cmd = op & FUTEX_CMD_MASK;
    unsigned int flags = 0;

    if (!(op & FUTEX_PRIVATE_FLAG))
        flags |= FLAGS_SHARED;

    if (op & FUTEX_CLOCK_REALTIME) {
        flags |= FLAGS_CLOCKRT;
        if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
            return -ENOSYS;
    }

    switch (cmd) {
    case FUTEX_LOCK_PI:
    case FUTEX_UNLOCK_PI:
    case FUTEX_TRYLOCK_PI:
    case FUTEX_WAIT_REQUEUE_PI:
    case FUTEX_CMP_REQUEUE_PI:
        if (!futex_cmpxchg_enabled)
            return -ENOSYS;
    }

    switch (cmd) {
    case FUTEX_WAIT:
        val3 = FUTEX_BITSET_MATCH_ANY;
    case FUTEX_WAIT_BITSET:
        return futex_wait(uaddr, flags, val, timeout, val3);
    case FUTEX_WAKE:
        val3 = FUTEX_BITSET_MATCH_ANY;
    case FUTEX_WAKE_BITSET:
        return futex_wake(uaddr, flags, val, val3);
    case FUTEX_REQUEUE:
        return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
    case FUTEX_CMP_REQUEUE:
        return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
    case FUTEX_WAKE_OP:
        return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
    case FUTEX_LOCK_PI:
        return futex_lock_pi(uaddr, flags, val, timeout, 0);
    case FUTEX_UNLOCK_PI:
        return futex_unlock_pi(uaddr, flags);
    case FUTEX_TRYLOCK_PI:
        return futex_lock_pi(uaddr, flags, 0, timeout, 1);
    case FUTEX_WAIT_REQUEUE_PI:
        val3 = FUTEX_BITSET_MATCH_ANY;
        return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
                        uaddr2);
    case FUTEX_CMP_REQUEUE_PI:
        return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
    }
    return -ENOSYS;
}

kernel\kernel\ Futex.c

static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
              ktime_t *abs_time, u32 bitset)
{
    struct hrtimer_sleeper timeout, *to = NULL;
    struct restart_block *restart;
    struct futex_hash_bucket *hb;
    struct futex_q q = futex_q_init;
    int ret;

    if (!bitset)
        return -EINVAL;
    q.bitset = bitset;

    if (abs_time) {
        to = &timeout;

        hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
                      CLOCK_REALTIME : CLOCK_MONOTONIC,
                      HRTIMER_MODE_ABS);
        hrtimer_init_sleeper(to, current);
        hrtimer_set_expires_range_ns(&to->timer, *abs_time,
                        current->timer_slack_ns);
    }


retry:
    /*
    * Prepare to wait on uaddr. On success, holds hb lock and increments
    * q.key refs.
    */

    ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
    if (ret)
        goto out;

    /* queue_me and wait for wakeup, timeout, or a signal. */
    futex_wait_queue_me(hb, &q, to);

    /* If we were woken (and unqueued), we succeeded, whatever. */
    ret = 0;
    /* unqueue_me() drops q.key ref */
    if (!unqueue_me(&q)) {                 
        /* unqueue_me返回值情况 */
        /* 1 - if the futex_q was still queued (and we removed unqueued it); */
        /* 0 - if the futex_q was already removed by the waking thread(发送信号唤醒的情况) */ 

        goto out;                        // 正常等到信号后返回走这里
    }
    ret = -ETIMEDOUT;
    if (to && !to->task) {
        goto out;
    }

    /*
    * We expect signal_pending(current), but we might be the
    * victim of a spurious wakeup as well.
    */
    if (!signal_pending(current)) {
        trace_printk("retry\n");
        goto retry;
    }

    ret = -ERESTARTSYS;
    if (!abs_time) {
        goto out;
    }

    restart = &current_thread_info()->restart_block;
    restart->fn = futex_wait_restart;
    restart->futex.uaddr = uaddr;
    restart->futex.val = val;
    restart->futex.time = abs_time->tv64;
    restart->futex.bitset = bitset;
    restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;

    ret = -ERESTART_RESTARTBLOCK;

out:
    if (to) {
        hrtimer_cancel(&to->timer);
        destroy_hrtimer_on_stack(&to->timer);
    }

    return ret;            // 正常返回值为0
}

kernel\kernel\ Futex.c

static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
                struct hrtimer_sleeper *timeout)
{
    /*
    * The task state is guaranteed to be set before another task can
    * wake it. set_current_state() is implemented using set_mb() and
    * queue_me() calls spin_unlock() upon completion, both serializing
    * access to the hash list and forcing another memory barrier.
    */
    set_current_state(TASK_INTERRUPTIBLE);
    queue_me(q, hb);

    /* Arm the timer */
    if (timeout) {
        hrtimer_start_expires(&timeout->timer, HRTIMER_MODE_ABS);
        if (!hrtimer_active(&timeout->timer))
            timeout->task = NULL;
    }

    /*
    * If we have been removed from the hash list, then another task
    * has tried to wake us, and we can skip the call to schedule().
    */
    if (likely(!plist_node_empty(&q->list))) {
        /*
        * If the timer has already expired, current will already be
        * flagged for rescheduling. Only call schedule if there
        * is no timeout, or if it has yet to expire.
        */
        if (!timeout || timeout->task) {
            freezable_schedule();       
        }
    }
    __set_current_state(TASK_RUNNING);
}

如果该等待线程使用timer_create创建了定时器,并且创建的定时器超时是给当前线程发送信号(timer_create的创建在第4节分析),当定时器超时后,就会把当前线程设置为可运行的状态,等待系统调度运行。若这时该线程刚好调用了wait在等待信号,由于该线程已经被设置为可运行状态,当调度到该线程运行时,futex_wait_queue_me函数的freezable_schedule()就会返回,这时futex_wait返回流程与返回值都与正常接收到信号时返回的不一样,如下面代码标示:
kernel\kernel\ Futex.c

static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
              ktime_t *abs_time, u32 bitset)
{
    struct hrtimer_sleeper timeout, *to = NULL;
    struct restart_block *restart;
    struct futex_hash_bucket *hb;
    struct futex_q q = futex_q_init;
    int ret;

    if (!bitset)
        return -EINVAL;
    q.bitset = bitset;

    if (abs_time) {
        to = &timeout;

        hrtimer_init_on_stack(&to->timer, (flags & FLAGS_CLOCKRT) ?
                      CLOCK_REALTIME : CLOCK_MONOTONIC,
                      HRTIMER_MODE_ABS);
        hrtimer_init_sleeper(to, current);
        hrtimer_set_expires_range_ns(&to->timer, *abs_time,
                        current->timer_slack_ns);
    }


retry:
    /*
    * Prepare to wait on uaddr. On success, holds hb lock and increments
    * q.key refs.
    */

    ret = futex_wait_setup(uaddr, val, flags, &q, &hb);
    if (ret)
        goto out;

    /* queue_me and wait for wakeup, timeout, or a signal. */
    futex_wait_queue_me(hb, &q, to);

    /* If we were woken (and unqueued), we succeeded, whatever. */
    ret = 0;
    /* unqueue_me() drops q.key ref */
    if (!unqueue_me(&q)) {                 
        /* unqueue_me返回值情况 */
        /* 1 - if the futex_q was still queued (and we removed unqueued it); */
        /* 0 - if the futex_q was already removed by the waking thread(发送信号唤醒的情况) */ 

        goto out;  // 不是等待的信号唤醒,futex_q was still queued,unqueue_me返回1,流程不走这
    }
    ret = -ETIMEDOUT;
    if (to && !to->task) {
        goto out;          //没有设置超时返回,没走这
    }

    /*
    * We expect signal_pending(current), but we might be the
    * victim of a spurious wakeup as well.
    */
    if (!signal_pending(current)) {
        goto retry;        // 是该线程定时器唤醒,不走这
    }

    ret = -ERESTARTSYS;
    if (!abs_time) {
        goto out;        // 最后流程到这里,则ret = -ERESTARTSYS
    }

    restart = &current_thread_info()->restart_block;
    restart->fn = futex_wait_restart;
    restart->futex.uaddr = uaddr;
    restart->futex.val = val;
    restart->futex.time = abs_time->tv64;
    restart->futex.bitset = bitset;
    restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;

    ret = -ERESTART_RESTARTBLOCK;

out:
    if (to) {
        hrtimer_cancel(&to->timer);
        destroy_hrtimer_on_stack(&to->timer);
    }

    return ret;            // 返回值为-ERESTARTSYS(-512)
}

从上面代码可以看出,futex_wait的返回值并不为0,但到了应用层得到的返回值就变成0了,分析后发现是c库的代码不严谨导致的,如下面代码:

bionic\libc\bionic\ Pthread_cond.cpp

__LIBC_HIDDEN__
int __pthread_cond_timedwait_relative(pthread_cond_t* cond, pthread_mutex_t* mutex, const timespec* reltime) {
  int old_value = cond->value;

  pthread_mutex_unlock(mutex);
  int status = __futex_wait_ex(&cond->value, COND_IS_SHARED(cond->value), old_value, reltime);
  pthread_mutex_lock(mutex);

  if (status == -ETIMEDOUT) {
    return ETIMEDOUT;        // 只有超时返回时,才返回非0值,其它情况都是返回0
  }

  return 0;
}

这样就导致了上层无法识别出除了超时之外的其它情况返回。

三、定时器超时唤醒线程的流程
定时器超时唤醒线程与发送信号唤醒线程流程不同,下面代码分析定时器唤醒时走的流程,调用路径如绿色标示(有关linux定时器的知识,可以搜索“linux定时器的实现”,可以找到很多介绍)。
kernel\kernel\Posix-timers.c

int posix_timer_event(struct k_itimer *timr, int si_private)  // posix定时器超时后调用到这里
{
    struct task_struct *task;
    int shared, ret = -1;
    /*
    * FIXME: if ->sigq is queued we can race with
    * dequeue_signal()->do_schedule_next_timer().
    *
    * If dequeue_signal() sees the "right" value of
    * si_sys_private it calls do_schedule_next_timer().
    * We re-queue ->sigq and drop ->it_lock().
    * do_schedule_next_timer() locks the timer
    * and re-schedules it while ->sigq is pending.
    * Not really bad, but not that we want.
    */
    timr->sigq->info.si_sys_private = si_private;

    rcu_read_lock();
    task = pid_task(timr->it_pid, PIDTYPE_PID);
    if (task) {
        shared = !(timr->it_sigev_notify & SIGEV_THREAD_ID);
        ret = send_sigqueue(timr->sigq, task, shared);
    }
    rcu_read_unlock();
    /* If we failed to send the signal the timer stops. */
    return ret > 0;
}

kernel\kernel\Signal.c

int send_sigqueue(struct sigqueue *q, struct task_struct *t, int group)
{
    int sig = q->info.si_signo;
    int sival = q->info.si_value.sival_int;
    struct sigpending *pending;
    unsigned long flags;
    int ret, result;

    BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));

    ret = -1;
    if (!likely(lock_task_sighand(t, &flags)))
        goto ret;

    ret = 1; /* the signal is ignored */
    result = TRACE_SIGNAL_IGNORED;
    if (!prepare_signal(sig, t, false))
        goto out;

    ret = 0;
    if (unlikely(!list_empty(&q->list))) {
        /*
        * If an SI_TIMER entry is already queue just increment
        * the overrun count.
        */
        BUG_ON(q->info.si_code != SI_TIMER);
        q->info.si_overrun++;
        result = TRACE_SIGNAL_ALREADY_PENDING;
        goto out;
    }
    q->info.si_overrun = 0;

    signalfd_notify(t, sig);
    pending = group ? &t->signal->shared_pending : &t->pending;
    list_add_tail(&q->list, &pending->list);
    sigaddset(&pending->signal, sig);
    complete_signal(sig, t, group);
    result = TRACE_SIGNAL_DELIVERED;
out:
    trace_signal_generate(sig, &q->info, t, group, result);
    unlock_task_sighand(t, &flags);
ret:
    return ret;
}

kernel\kernel\Signal.c

static void complete_signal(int sig, struct task_struct *p, int group)
{
    struct signal_struct *signal = p->signal;
    struct task_struct *t;

    /*
    * Now find a thread we can wake up to take the signal off the queue.
    *
    * If the main thread wants the signal, it gets first crack.
    * Probably the least surprising to the average bear.
    */
    if (wants_signal(sig, p))
        t = p;
    else if (!group || thread_group_empty(p))
        /*
        * There is just one thread and it does not need to be woken.
        * It will dequeue unblocked signals before it runs again.
        */
        return;
    else {
        /*
        * Otherwise try to find a suitable thread.
        */
        t = signal->curr_target;
        while (!wants_signal(sig, t)) {
            t = next_thread(t);
            if (t == signal->curr_target)
                /*
                * No thread needs to be woken.
                * Any eligible threads will see
                * the signal in the queue soon.
                */
                return;
        }
        signal->curr_target = t;
    }

    /*
    * Found a killable thread.  If the signal will be fatal,
    * then start taking the whole group down immediately.
    */
    if (sig_fatal(p, sig) &&
        !(signal->flags & (SIGNAL_UNKILLABLE | SIGNAL_GROUP_EXIT)) &&
        !sigismember(&t->real_blocked, sig) &&
        (sig == SIGKILL || !t->ptrace)) {
        /*
        * This signal will be fatal to the whole group.
        */
        if (!sig_kernel_coredump(sig)) {
            /*
            * Start a group exit and wake everybody up.
            * This way we don't have other threads
            * running and doing things after a slower
            * thread has the fatal signal pending.
            */
            signal->flags = SIGNAL_GROUP_EXIT;
            signal->group_exit_code = sig;
            signal->group_stop_count = 0;
            t = p;
            do {
                task_clear_jobctl_pending(t, JOBCTL_PENDING_MASK);
                sigaddset(&t->pending.signal, SIGKILL);
                signal_wake_up(t, 1);
            } while_each_thread(p, t);
            return;
        }
    }

    /*
    * The signal is already in the shared-pending queue.
    * Tell the chosen thread to wake up and dequeue it.
    */
    signal_wake_up(t, sig == SIGKILL);
    return;
}

kernel\include\linux\Sched.h

static inline void signal_wake_up(struct task_struct *t, bool resume)
{
    signal_wake_up_state(t, resume ? TASK_WAKEKILL : 0);
}
kernel\kernel\Signal.c
void signal_wake_up_state(struct task_struct *t, unsigned int state)
{
    set_tsk_thread_flag(t, TIF_SIGPENDING);
    /*
    * TASK_WAKEKILL also means wake it up in the stopped/traced/killable
    * case. We don't check t->state here because there is a race with it
    * executing another processor and just now entering stopped state.
    * By using wake_up_state, we ensure the process will wake up and
    * handle its death signal.
    */
    if (!wake_up_state(t, state | TASK_INTERRUPTIBLE))
        kick_process(t);
}

kernel\kernel\sched\Core.c

int wake_up_state(struct task_struct *p, unsigned int state)
{
    return try_to_wake_up(p, state, 0);
}
kernel\kernel\sched\Core.c
static int
try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
{
    unsigned long flags;
    int cpu, success = 0;

    /*
    * If we are going to wake up a thread waiting for CONDITION we
    * need to ensure that CONDITION=1 done by the caller can not be
    * reordered with p->state check below. This pairs with mb() in
    * set_current_state() the waiting thread does.
    */
    smp_mb__before_spinlock();
    raw_spin_lock_irqsave(&p->pi_lock, flags);
    if (!(p->state & state))
        goto out;

    success = 1; /* we're going to change ->state */
    cpu = task_cpu(p);

    if (p->on_rq && ttwu_remote(p, wake_flags))
        goto stat;

#ifdef CONFIG_SMP
    /*
    * If the owning (remote) cpu is still in the middle of schedule() with
    * this task as prev, wait until its done referencing the task.
    */
    while (p->on_cpu)
        cpu_relax();
    /*
    * Pairs with the smp_wmb() in finish_lock_switch().
    */
    smp_rmb();

    p->sched_contributes_to_load = !!task_contributes_to_load(p);
    p->state = TASK_WAKING;

    if (p->sched_class->task_waking)
        p->sched_class->task_waking(p);

    cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
    if (task_cpu(p) != cpu) {
        wake_flags |= WF_MIGRATED;
        set_task_cpu(p, cpu);
    }
#endif /* CONFIG_SMP */


    ttwu_queue(p, cpu);    /* run ttwu_do_activate->ttwu_do_wakeup */
stat:
    ttwu_stat(p, cpu, wake_flags);
out:
    raw_spin_unlock_irqrestore(&p->pi_lock, flags);

    return success;
}
kernel\kernel\sched\Core.c
static void ttwu_queue(struct task_struct *p, int cpu)
{
    struct rq *rq = cpu_rq(cpu);

#if defined(CONFIG_SMP)
    if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) {
        sched_clock_cpu(cpu); /* sync clocks x-cpu */
        ttwu_queue_remote(p, cpu);
        return;
    }
#endif

    raw_spin_lock(&rq->lock);
    ttwu_do_activate(rq, p, 0);
    raw_spin_unlock(&rq->lock);
}
kernel\kernel\sched\Core.c
static void
ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags)
{
#ifdef CONFIG_SMP
    if (p->sched_contributes_to_load)
        rq->nr_uninterruptible--;
#endif

    ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING);
    ttwu_do_wakeup(rq, p, wake_flags);
}

kernel\kernel\sched\Core.c

static void
ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
{
    check_preempt_curr(rq, p, wake_flags);
    trace_sched_wakeup(p, true);

    p->state = TASK_RUNNING;    // 设置为可运行状态,后面任务被调度了就可以运行

#ifdef CONFIG_SMP
    if (p->sched_class->task_woken)
        p->sched_class->task_woken(rq, p);

    if (rq->idle_stamp) {
        u64 delta = rq->clock - rq->idle_stamp;
        u64 max = 2*sysctl_sched_migration_cost;

        if (delta > max)
            rq->avg_idle = max;
        else
            update_avg(&rq->avg_idle, delta);
        rq->idle_stamp = 0;
    }
#endif
}

四、timer_create创建及参数说明
从前面的分析看,看起来只要在一个线程内创建了定时器,并且使用wait等待,就会存在问题,其实并不是这样,从timer_create创建时的参数及做实际情况发现,只有在timer_create使用不当时,才会存在该问题。timer_create函数如下(timer_create的实现在bionic\libc\bionic\ Posix_timers.cpp):

int timer_create(clockid_t clock_id, sigevent* evp, timer_t* timer_id);

这里我们只关心第二个参数,第二个参数 struct sigevent 用来设置定时器到时时的通知方式。该数据结构如下:

struct sigevent { 
int sigev_notify; /* Notification method */ 
int sigev_signo; /* Notification signal */ 
union sigval sigev_value; /* Data passed with notification */ 
void (*sigev_notify_function) (union sigval);  /* Function used for thread notification (SIGEV_THREAD) */ 
void *sigev_notify_attributes;  /* Attributes for notification thread (SIGEV_THREAD) */ 
pid_t sigev_notify_thread_id;  /* ID of thread to signal (SIGEV_THREAD_ID) */ 
};

其中sigev_notify 表示通知方式,有如下几种:
通知方式 描述
SIGEV_NONE 定时器到期时不产生通知。。。
SIGEV_SIGNAL 定时器到期时将给进程投递一个信号,sigev_signo 可以用来指定使用什么信号。
SIGEV_THREAD 定时器到期时将启动新的线程进行需要的处理
SIGEV_THREAD_ID(仅针对 Linux) 定时器到期时将向指定线程发送信号。
■如果采用 SIGEV_NONE 方式,使用者必须调用timer_gettime 函数主动读取定时器已经走过的时间。类似轮询。
■如果采用 SIGEV_SIGNAL 方式,使用者可以选择使用什么信号,用 sigev_signo 表示信号值,比如 SIG_ALARM。
■如果使用 SIGEV_THREAD 方式,timer_create时会专门创建一个线程用于调用超时处理函数。需要设置 sigev_notify_function为超时调用函数入口;sigev_value 保存了传入 sigev_notify_function 的参数。sigev_notify_attributes 如果非空,则应该是一个指向 pthread_attr_t 的指针,用来设置线程的属性(比如 stack 大小,detach 状态等)。
■SIGEV_THREAD_ID 通常和 SIGEV_SIGNAL 联合使用,这样当 Timer 到期时,系统会向由 sigev_notify_thread_id 指定的线程发送信号,否则可能进程中的任意线程都可能收到该信号。这个选项是 Linux 对 POSIX 标准的扩展,目前主要是 GLibc 在实现 SIGEV_THREAD 的时候使用到,应用程序很少会需要用到这种模式。

从实际的情况看,当sigev_notify设置为SIGEV_SIGNAL时,当定时器超时就会唤醒调用timer_create创建定时器的线程,若该线程刚好在wait,就会出现前面分析的wait返回的情况。如果sigev_notify设置为SIGEV_THREAD,则在定时器超时后,只会唤醒专门创建的定时器处理函数线程,而不会唤醒调用timer_create创建定时器的线程,就不会存在前面分析的wait返回的情况。

五、总结
从分析看,虽然c库代码处理不够严谨,但问题的根源还是timer_create使用不当引起的。在使用timer_create时,不建议把sigev_notify设置为SIGEV_SIGNAL,除非能明确该线程只是进行定时器超时的处理。

相关推荐