src/corelib/thread/qatomicwait.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

// Copyright (C) 2025 Intel Corporation.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only

#include "qatomicwait_p.h"

#include <qendian.h>
#include "qfutex_p.h"
#include "qwaitcondition_p.h"

#include <array>
#include <condition_variable>
#include <mutex>

QT_BEGIN_NAMESPACE

/*
 * Implementation details:
 *
 * Normally, a call from atomic_notify_one() or atomic_notify_all() corresponds
 * to a call to cond.notify_one() or cond.notify_all(). This simple
 * implementation would require that we keep a distinct wait condition variable
 * per atomic variable, of which there could be an arbitrary number.
 *
 * Instead, we have a limited set of std::condition_variable, which are
 * selected based on the address of the variable being waited/notified on.
 * Because multiple, distinct variables could be sharing the same condition
 * variable, we need to perform a cond.notify_all() call in case the lock is
 * contended by waiters to more than one address, as we can't be sure which one
 * would be woken up by the cond.notify_one() call.
 *
 * We recover some of the performance of notifying a single waker by also
 * storing the address of the variable that was being waited on. If it matches
 * the address of the variable being notified, we can perform a notify_one()
 * call. This also allows us to avoid any system call in case no waiter has yet
 * joined the queue. In case of contention, we store a sentinel address
 * instead, indicating there are waiters to multiple variables. The last waiter
 * to leave a wait queue is responsible for resetting the watched address back
 * to nullptr.
 *
 * Performance details:
 *
 * This implementation is designed for systems where neither the Standard
 * Library's own std::atomic_wait nor operating system futexes are available
 * (read: usually, not the systems Qt is used most frequently on). Therefore,
 * it's designed for simplicity, not performance. Simplifications applied
 * include:
 * - the total number of possible condition variables
 * - the hashing algorithm to select a condition variable (simple XOR)
 * - no attempt to wait for a variable change before calling cond.wait()
 *   (no spinning, no HW-assisted wait)
 *
 * Other limitations:
 *
 * We only support 8-, 16-, 32- and 64-bit variables and we require bit-exact
 * comparisons for all bits. This means we do not support objects with padding
 * bits or without unique representations.
 */

namespace {
struct QAtomicWaitLocks
{
    // Sentinel address used to indicate this Lock is being used by waiters to
    // multiple addresses, implying we must make a notify_all() call.
    static void *contendedWatchAddress() { return reinterpret_cast<void *>(-1); }

    struct Lock {
        alignas(QtPrivate::IdealMutexAlignment) std::mutex mutex;
        alignas(QtPrivate::IdealMutexAlignment) std::condition_variable cond;

        // Can assume values:
        // - nullptr: no waiter is waiting
        // - contendedWatchAddress(): waiters to distinct addresses
        // - any other value: all waiters are waiting on the same address
        const void *watchedAddress = nullptr;
        qsizetype watcherCount = 0;
    };

    static constexpr int LockCount = 16;
    std::array<Lock, LockCount> locks;

    int indexFor(const void *ptr)
    {
        static_assert((LockCount & (LockCount - 1)) == 0, "LockCount is not a power of two");

        quintptr value = quintptr(ptr) / sizeof(int);
        quintptr idx = value % LockCount;
#ifndef QATOMICWAIT_USE_FALLBACK
        // XOR some higher bits too to avoid hashing collisions
        // (we don't do it in the unit test because we *want* those collisions)
        idx ^= (value / LockCount) % LockCount;
        idx ^= (value / LockCount / LockCount) % LockCount;
#endif
        return int(idx);
    }

    Lock &lockFor(const void *ptr) { return locks[indexFor(ptr)]; }
};
} // unnamed namespace

static inline void checkFutexUse()
{
#if !defined(QATOMICWAIT_USE_FALLBACK)
    if (QtFutex::futexAvailable()) {
        // This will disable the code and data on systems where futexes are
        // always available (currently: FreeBSD, Linux, Windows).
        qFatal("Implementation should have used futex!");
    }
#endif
}

static QAtomicWaitLocks &atomicLocks() noexcept
{
    checkFutexUse();

    static QAtomicWaitLocks global {};
    return global;
}

template <typename T> static bool isEqual(const void *address, const void *old)
{
    auto atomic = static_cast<const std::atomic<T> *>(address);
    auto expected = static_cast<const T *>(old);
    return atomic->load(std::memory_order_relaxed) == *expected;
}

static bool isEqual(const void *address, const void *old, qsizetype size) noexcept
{
    switch (size) {
    case 1: return isEqual<quint8 >(address, old);
    case 2: return isEqual<quint16>(address, old);
    case 4: return isEqual<quint32>(address, old);
    case 8: return isEqual<quint64>(address, old);
    }
    Q_UNREACHABLE_RETURN(false);
}

void QtPrivate::_q_atomicWait(const void *address, const void *old, qsizetype size) noexcept
{
    auto &locker = atomicLocks().lockFor(address);

    // NOT noexcept; we'll terminate if locking the mutex fails
    std::unique_lock lock(locker.mutex);

    // is the value still current?
    if (!isEqual(address, old, size))
        return;                     // no, failed to wait

    if (locker.watchedAddress && locker.watchedAddress != address)
        locker.watchedAddress = QAtomicWaitLocks::contendedWatchAddress();
    else
        locker.watchedAddress = address;
    ++locker.watcherCount;

    do {
        locker.cond.wait(lock);
    } while (isEqual(address, old, size));

    if (--locker.watcherCount == 0)
        locker.watchedAddress = nullptr;
}

void QtPrivate::_q_atomicWake(void *address, WakeMode mode) noexcept
{
    auto &locker = atomicLocks().lockFor(address);

    // NOT noexcept; we'll terminate if locking the mutex fails
    std::unique_lock lock(locker.mutex);

    // can we wake just one?
    if (mode == WakeMode::One && locker.watchedAddress == address)
        locker.cond.notify_one();
    else if (locker.watchedAddress != nullptr)
        locker.cond.notify_all();
    else
        qt_noop();          // no one was waiting
}

QT_END_NAMESPACE