C++11中的mutex, lock，condition variable實現分析

本文分析的是llvm libc++的實現：https://libcxx.llvm.org/

C++11中的各種mutex, lock對象，實際上都是對posix的mutex，condition的封裝。不過裏麵也有很多細節值得學習。

std::mutex

先來看下std::mutex：

包增了一個pthread_mutex_t __m_，很簡單，每個函數該幹嘛就幹嘛。

class mutex
{
    pthread_mutex_t __m_;

public:
     mutex() _NOEXCEPT {__m_ = (pthread_mutex_t)<strong>PTHREAD_MUTEX_INITIALIZER</strong>;}
     ~mutex();
private:
    mutex(const mutex&);// = delete;
    mutex& operator=(const mutex&);// = delete;
public:
    void lock();
    bool try_lock() _NOEXCEPT;
    void unlock() _NOEXCEPT;

    typedef pthread_mutex_t* native_handle_type;
    _LIBCPP_INLINE_VISIBILITY native_handle_type native_handle() {return &__m_;}
};

mutex::~mutex()
{
    pthread_mutex_destroy(&__m_);
}

void mutex::lock()
{
    int ec = pthread_mutex_lock(&__m_);
    if (ec)
        __throw_system_error(ec, "mutex lock failed");
}

bool mutex::try_lock() _NOEXCEPT
{
    return pthread_mutex_trylock(&__m_) == 0;
}

void mutex::unlock() _NOEXCEPT
{
    int ec = pthread_mutex_unlock(&__m_);
    (void)ec;
    assert(ec == 0);
}

三種鎖狀態：std::defer_lock, std::try_to_lock, std::adopt_lock

這三個是用於標識鎖在傳遞到一些包裝類時，鎖的狀態：

std::defer_lock，還沒有獲取到鎖

std::try_to_lock，在包裝類構造時，嚐試去獲取鎖

std::adopt_lock，調用者已經獲得了鎖

這三個東東，實際上是用於偏特化的，是三個空的struct：

struct  defer_lock_t {};
struct  try_to_lock_t {};
struct  adopt_lock_t {};
constexpr defer_lock_t  defer_lock  = defer_lock_t();
constexpr try_to_lock_t try_to_lock = try_to_lock_t();
constexpr adopt_lock_t  adopt_lock  = adopt_lock_t();

在下麵的代碼裏，就可以看到這三個東東是怎麼用的了。

std::lock_guard

這個類比較重要，因為我們真正使用lock的時候，大部分都是要用這個。

這個類其實很簡單：

在構造函數裏調用 mutext.lock()，
在釋構函數裏，調用了mutex.unlock() 函數。

因為C++會在函數拋出異常時，自動調用作用域內的變量的析構函數，所以使用std::lock_guard可以在異常時自動釋放鎖，這就是為什麼要避免直接使用mutex的函數，而是要用std::lock_guard的原因了。

template <class _Mutex>
class lock_guard
{
public:
    typedef _Mutex mutex_type;
private:
    mutex_type& __m_;
public:
    explicit lock_guard(mutex_type& __m)
        : __m_(__m) {__m_.lock();}
    lock_guard(mutex_type& __m, adopt_lock_t)
        : __m_(__m) {}
    ~lock_guard() {__m_.unlock();}
private:
    lock_guard(lock_guard const&);// = delete;
    lock_guard& operator=(lock_guard const&);// = delete;
};

注意，std::lock_guard的兩個構造函數，當隻傳遞mutex時，會在構造函數時調用mutext.lock()來獲得鎖。

當傳遞了adopt_lock_t時，說明調用者已經拿到了鎖，所以不再嚐試去獲得鎖。

std::unique_lock

unique_lock實際上也是一個包裝類，起名為unique可能是和std::lock函數區分用的。
注意，多了一個owns_lock函數和release()函數，這兩個在std::lock函數會用到。

owns_lock函數用於判斷是否擁有鎖；

release()函數則放棄了對鎖的關聯，當析構時，不會去unlock鎖。
再看下unique_lock的實現，可以發現，上麵的三種類型就是用來做偏特化用的：

template <class _Mutex>
class unique_lock
{
public:
    typedef _Mutex mutex_type;
private:
    mutex_type* __m_;
    bool __owns_;

public:
    unique_lock() _NOEXCEPT : __m_(nullptr), __owns_(false) {}
    explicit unique_lock(mutex_type& __m)
        : __m_(&__m), __owns_(true) {__m_->lock();}
    unique_lock(mutex_type& __m, defer_lock_t) _NOEXCEPT
        : __m_(&__m), __owns_(false) {}
    unique_lock(mutex_type& __m, try_to_lock_t)    //偏特化
        : __m_(&__m), __owns_(__m.try_lock()) {}
    unique_lock(mutex_type& __m, adopt_lock_t)     //偏特化
        : __m_(&__m), __owns_(true) {}
    template <class _Clock, class _Duration>
        unique_lock(mutex_type& __m, const chrono::time_point<_Clock, _Duration>& __t)
            : __m_(&__m), __owns_(__m.try_lock_until(__t)) {}
    template <class _Rep, class _Period>
        unique_lock(mutex_type& __m, const chrono::duration<_Rep, _Period>& __d)
            : __m_(&__m), __owns_(__m.try_lock_for(__d)) {}
    ~unique_lock()
    {
        if (__owns_)
            __m_->unlock();
    }

private:
    unique_lock(unique_lock const&); // = delete;
    unique_lock& operator=(unique_lock const&); // = delete;

public:
    unique_lock(unique_lock&& __u) _NOEXCEPT
        : __m_(__u.__m_), __owns_(__u.__owns_)
        {__u.__m_ = nullptr; __u.__owns_ = false;}
    unique_lock& operator=(unique_lock&& __u) _NOEXCEPT
        {
            if (__owns_)
                __m_->unlock();
            __m_ = __u.__m_;
            __owns_ = __u.__owns_;
            __u.__m_ = nullptr;
            __u.__owns_ = false;
            return *this;
        }

    void lock();
    bool try_lock();

    template <class _Rep, class _Period>
    bool try_lock_for(const chrono::duration<_Rep, _Period>& __d);
    template <class _Clock, class _Duration>
    bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t);

    void unlock();
    void swap(unique_lock& __u) _NOEXCEPT
    {
        _VSTD::swap(__m_, __u.__m_);
        _VSTD::swap(__owns_, __u.__owns_);
    }
    mutex_type* release() _NOEXCEPT
    {
        mutex_type* __m = __m_;
        __m_ = nullptr;
        __owns_ = false;
        return __m;
    }
    bool owns_lock() const _NOEXCEPT {return __owns_;}
    operator bool () const _NOEXCEPT {return __owns_;}
    mutex_type* mutex() const _NOEXCEPT {return __m_;}
};

std::lock和std::try_lock函數

上麵的都是類對象，這兩個是函數。

std::lock和std::try_lock函數用於在同時使用多個鎖時，防止死鎖。這個實際上很重要的，因為手寫代碼來處理多個鎖的同步問題，很容易出錯。

要注意的是std::try_lock函數的返回值：

當成功時，返回-1；

當失敗時，返回第幾個鎖沒有獲取成功，以0開始計數；

首先來看下隻有兩個鎖的情況，代碼雖然看起來比較簡單，但裏麵卻有大文章：

template <class _L0, class _L1>
void
lock(_L0& __l0, _L1& __l1)
{
    while (true)
    {
        {
            unique_lock<_L0> __u0(__l0);
            if (__l1.try_lock())  //已獲得鎖l0，再嚐試獲取l1
            {
                __u0.release();   //l0和l1都已獲取到，因為unique_lock在釋構時會釋放l0，所以要調用release()函數，不讓它釋放l0鎖。
                break;
            }
        }//如果同時獲取l0,l1失敗，這裏會釋放l0。
        sched_yield();  //把線程放到同一優先級的調度隊列的尾部，CPU切換到其它線程執行
        {
            unique_lock<_L1> __u1(__l1); //因為上麵嚐試先獲取l1失敗，說明有別的線程在持有l1，那麼這次先嚐試獲取鎖l1（隻有前麵的線程釋放了，才可能獲取到）
            if (__l0.try_lock())
            {
                __u1.release();
                break;
            }
        }
        sched_yield();
    }
}
template <class _L0, class _L1>
int
try_lock(_L0& __l0, _L1& __l1)
{
    unique_lock<_L0> __u0(__l0, try_to_lock);
    if (__u0.owns_lock())
    {
        if (__l1.try_lock()) //注意try_lock返回值的定義，否則這裏無法理解
        {
            __u0.release();
            return -1;
        }
        else
            return 1;
    }
    return 0;
}

上麵的lock函數用嚐試的辦法防止了死鎖。

上麵是兩個鎖的情況，那麼在多個參數的情況下呢？

先來看下std::try_lock函數的實現：

裏麵遞歸地調用了try_lock函數自身，如果全部鎖都獲取成功，則依次把所有的unique_lock都release掉。

如果有失敗，則計數失敗的次數，最終返回。

template <class _L0, class _L1, class _L2, class... _L3>
int
try_lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3&... __l3)
{
    int __r = 0;
    unique_lock<_L0> __u0(__l0, try_to_lock);
    if (__u0.owns_lock())
    {
        __r = try_lock(__l1, __l2, __l3...);
        if (__r == -1)
            __u0.release();
        else
            ++__r;
    }
    return __r;
}

再來看多參數的std::lock的實現：

template <class _L0, class _L1, class _L2, class ..._L3>
void
__lock_first(int __i, _L0& __l0, _L1& __l1, _L2& __l2, _L3& ...__l3)
{
    while (true)
    {
        switch (__i)  //__i用來標記上一次獲取參數裏的第幾個鎖失敗，從0開始計數
        {
        case 0:   //第一次執行時，__i是0
            {
                unique_lock<_L0> __u0(__l0);
                __i = try_lock(__l1, __l2, __l3...);
                if (__i == -1)  //獲取到l0之後，如果嚐試獲取後麵的鎖也成功了，即全部鎖都獲取到了，則設置unique_lock為release，並返回
                {
                    __u0.release();
                    return;
                }
            }
            ++__i;  //因為__i表示是獲取第幾個鎖失敗，而上麵的try_lock(__l1,__l2__l3,...)是從l1開始的，因此這裏要+1，調整到沒有獲取成功的鎖上，下次先從它開始獲取。
            sched_yield();
            break;
        case 1:   //說明上次獲取l1失敗，這次先獲取到l1。
            {
                unique_lock<_L1> __u1(__l1);    
                __i = try_lock(__l2, __l3..., __l0);   //把前一次的l0放到最後。這次先獲取到了l1，再嚐試獲取後麵的鎖。
                if (__i == -1)
                {
                    __u1.release();
                    return;
                }
            }
            if (__i == sizeof...(_L3) + 1)   //說明把l0放到最後麵時，最後獲取l0時失敗了。那麼說明現在有其它線程持有l0，那麼下一次要從l0開始獲取。
                __i = 0;
            else
                __i += 2; //因為__i表示是獲取第幾個鎖失敗，而上麵的try_lock(__l2,__l3..., __l0)是從l2開始的，因此這裏要+2
            sched_yield();
            break;
        default:
            __lock_first(__i - 2, __l2, __l3..., __l0, __l1);    //因為這裏是從l2開始的，因此__i要減2。
            return;
        }
    }
}

template <class _L0, class _L1, class _L2, class ..._L3>
inline _LIBCPP_INLINE_VISIBILITY
void
lock(_L0& __l0, _L1& __l1, _L2& __l2, _L3& ...__l3)
{
    __lock_first(0, __l0, __l1, __l2, __l3...);
}

可以看到多參數的std::lock的實現是：

先獲取一個鎖，然後再調用std::try_lock去獲取剩下的鎖，如果失敗了，則下次先獲取上次失敗的鎖。

重複上麵的過程，直到成功獲取到所有的鎖。

上麵的算法用比較巧妙的方式實現了參數的輪轉。

std::timed_mutex

std::timed_mutex 是裏麵封裝了mutex和condition，這樣就兩個函數可以用：
try_lock_for
try_lock_until

實際上是posix的mutex和condition的包裝。

class timed_mutex
{
    mutex              __m_;
    condition_variable __cv_;
    bool               __locked_;
public:
     timed_mutex();
     ~timed_mutex();
private:
    timed_mutex(const timed_mutex&); // = delete;
    timed_mutex& operator=(const timed_mutex&); // = delete;
public:
    void lock();
    bool try_lock() _NOEXCEPT;
    template <class _Rep, class _Period>
        _LIBCPP_INLINE_VISIBILITY
        bool try_lock_for(const chrono::duration<_Rep, _Period>& __d)
            {return try_lock_until(chrono::steady_clock::now() + __d);}
    template <class _Clock, class _Duration>
        bool try_lock_until(const chrono::time_point<_Clock, _Duration>& __t);
    void unlock() _NOEXCEPT;
};

template <class _Clock, class _Duration>
bool
timed_mutex::try_lock_until(const chrono::time_point<_Clock, _Duration>& __t)
{
    using namespace chrono;
    unique_lock<mutex> __lk(__m_);
    bool no_timeout = _Clock::now() < __t;
    while (no_timeout && __locked_)
        no_timeout = __cv_.wait_until(__lk, __t) == cv_status::no_timeout;
    if (!__locked_)
    {
        __locked_ = true;
        return true;
    }
    return false;
}

std::recursive_mutex和std::recursive_timed_mutex

這兩個實際上是std::mutex和std::timed_mutex 的recursive模式的實現，即鎖得獲得者可以重複多次調用lock()函數。

和posix mutex裏的recursive mutex是一樣的。

看下std::recursive_mutex的構造函數就知道了。

recursive_mutex::recursive_mutex()
{
    pthread_mutexattr_t attr;
    int ec = pthread_mutexattr_init(&attr);
    if (ec)
        goto fail;
    ec = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
    if (ec)
    {
        pthread_mutexattr_destroy(&attr);
        goto fail;
    }
    ec = pthread_mutex_init(&__m_, &attr);
    if (ec)
    {
        pthread_mutexattr_destroy(&attr);
        goto fail;
    }
    ec = pthread_mutexattr_destroy(&attr);
    if (ec)
    {
        pthread_mutex_destroy(&__m_);
        goto fail;
    }
    return;
fail:
    __throw_system_error(ec, "recursive_mutex constructor failed");
}

std::cv_status

這個用來表示condition等待返回的狀態的，和上麵的三個表示lock的狀態的用途差不多。

enum cv_status
{
    no_timeout,
    timeout
};

std::condition_variable

包裝了posix condition variable。

class condition_variable
{
    pthread_cond_t __cv_;
public:
    condition_variable() {__cv_ = (pthread_cond_t)PTHREAD_COND_INITIALIZER;}
    ~condition_variable();
private:
    condition_variable(const condition_variable&); // = delete;
    condition_variable& operator=(const condition_variable&); // = delete;
public:
    void notify_one() _NOEXCEPT;
    void notify_all() _NOEXCEPT;

    void wait(unique_lock<mutex>& __lk) _NOEXCEPT;
    template <class _Predicate>
        void wait(unique_lock<mutex>& __lk, _Predicate __pred);

    template <class _Clock, class _Duration>
        cv_status
        wait_until(unique_lock<mutex>& __lk,
                   const chrono::time_point<_Clock, _Duration>& __t);

    template <class _Clock, class _Duration, class _Predicate>
        bool
        wait_until(unique_lock<mutex>& __lk,
                   const chrono::time_point<_Clock, _Duration>& __t,
                   _Predicate __pred);

    template <class _Rep, class _Period>
        cv_status
        wait_for(unique_lock<mutex>& __lk,
                 const chrono::duration<_Rep, _Period>& __d);

    template <class _Rep, class _Period, class _Predicate>
        bool
        wait_for(unique_lock<mutex>& __lk,
                 const chrono::duration<_Rep, _Period>& __d,
                 _Predicate __pred);

    typedef pthread_cond_t* native_handle_type;
    _LIBCPP_INLINE_VISIBILITY native_handle_type native_handle() {return &__cv_;}

private:
    void __do_timed_wait(unique_lock<mutex>& __lk,
       chrono::time_point<chrono::system_clock, chrono::nanoseconds>) _NOEXCEPT;
};

裏麵的函數都是符合直覺的實現，值得注意的是：

cv_status是通過判斷時間而確定的，如果超時的則返回cv_status::timeout，如果沒有超時，則返回cv_status::no_timeout。

condition_variable::wait_until函數可以傳入一個predicate，即一個用戶自定義的判斷是否符合條件的函數。這個也是很常見的模板編程的方法了。

template <class _Clock, class _Duration>
cv_status
condition_variable::wait_until(unique_lock<mutex>& __lk,
                               const chrono::time_point<_Clock, _Duration>& __t)
{
    using namespace chrono;
    wait_for(__lk, __t - _Clock::now());
    return _Clock::now() < __t ? cv_status::no_timeout : cv_status::timeout;
}

template <class _Clock, class _Duration, class _Predicate>
bool
condition_variable::wait_until(unique_lock<mutex>& __lk,
                   const chrono::time_point<_Clock, _Duration>& __t,
                   _Predicate __pred)
{
    while (!__pred())
    {
        if (wait_until(__lk, __t) == cv_status::timeout)
            return __pred();
    }
    return true;
}

std::condition_variable_any

std::condition_variable_any的接口和std::condition_variable一樣，不同的是std::condition_variable隻能使用std::unique_lock<std::mutex>，而std::condition_variable_any可以使用任何的鎖對象。

下麵來看下為什麼std::condition_variable_any可以使用任意的鎖對象。

class _LIBCPP_TYPE_VIS condition_variable_any
{
    condition_variable __cv_;
    shared_ptr<mutex>  __mut_;
public:
    condition_variable_any();

    void notify_one() _NOEXCEPT;
    void notify_all() _NOEXCEPT;

    template <class _Lock>
        void wait(_Lock& __lock);
    template <class _Lock, class _Predicate>
        void wait(_Lock& __lock, _Predicate __pred);

    template <class _Lock, class _Clock, class _Duration>
        cv_status
        wait_until(_Lock& __lock,
                   const chrono::time_point<_Clock, _Duration>& __t);

    template <class _Lock, class _Clock, class _Duration, class _Predicate>
        bool
        wait_until(_Lock& __lock,
                   const chrono::time_point<_Clock, _Duration>& __t,
                   _Predicate __pred);

    template <class _Lock, class _Rep, class _Period>
        cv_status
        wait_for(_Lock& __lock,
                 const chrono::duration<_Rep, _Period>& __d);

    template <class _Lock, class _Rep, class _Period, class _Predicate>
        bool
        wait_for(_Lock& __lock,
                 const chrono::duration<_Rep, _Period>& __d,
                 _Predicate __pred);
};

可以看到，在std::condition_variable_any裏，用shared_ptr<mutex> __mut_來包裝了mutex。所以一切都明白了，回顧std::unique_lock<std::mutex>，它包裝了mutex，當析構時自動釋放mutex。在std::condition_variable_any裏，這份工作讓shared_ptr<mutex>來做了。

因此，也可以很輕鬆得出std::condition_variable_any會比std::condition_variable稍慢的結論了。

其它的東東：

sched_yield()函數的man手冊：
sched_yield() causes the calling thread to relinquish the CPU. The thread is moved to the end of the queue for its
static priority and a new thread gets to run.

在C++14裏還有std::shared_lock和std::shared_timed_mutex，但是libc++裏還沒有對應的實現，因此不做分析。

總結

llvm libc++中的各種mutex, lock, condition variable實際上是封閉了posix裏的對應實現。封裝的技巧和一些細節值得細細推敲學習。

看完了實現源碼之後，對於如何使用就更加清晰了。

參考：

https://en.cppreference.com/w/cpp

https://libcxx.llvm.org/

最後更新：2017-04-03 07:57:12