diff options
author | Thiago Macieira <[email protected]> | 2023-05-22 19:07:53 -0700 |
---|---|---|
committer | Thiago Macieira <[email protected]> | 2025-06-27 07:50:31 -0700 |
commit | 4409a7c21399e3c602edeea62c409a18ef78148f (patch) | |
tree | 24ce3d3234a983245836359438e040006b7e96d6 /src | |
parent | b540732975db81fa327663069af3fc1c4311d8b9 (diff) |
Long live QLatch
Like std::latch[1][2]. Originally proposed by N3666, it only became a
reality after the atomic wait functionality in C++20. We can't depend on
that yet in Qt because a) we don't depend on C++20 yet, and b) the
implementations of atomic waiting and std::latch are too recent in the
Standard Library implementations (GCC 12, LLVM 12, MSVC 19.28). This
implementation therefore builds the functionality from scratch, like the
original proposal did[3].
We'll probably keep our implementation for the long run, because it's
more efficient than the Standard Libraries' implementations. The MS STL
implementation is the closest to ours and to bare OS functionality: uses
WaitOnAddress / WakeByAddress as expected, but it generates a bit more
code than is necessary. And it's Windows-specific, of course.
Both cross-platform implementations (libstdc++ and libc++) do far more
work than necessary for platforms that offer a OS futex-like
support. Both of them busy-loop waiting for the atomic value to change
and then exponentially back off using sched_yield(). Those aren't useful
to us, as the majority of our uses are with threads that have just been
created and have therefore likely made little progress. They can be
actively harmful in some cases. The libc++ implementation is even worse
by using std::high_resolution_clock to time this looping up to 64 µs in
inline code before making a system call to sleep and wait (and it can't
/ won't use the latch's address for the futex itself).
Both implementations also use an extra atomic out of a global pool (16
in libstdc++, 256 in libc++) to indicate whether there is any waiter on
this address and therefore avoid the system call to wake them. See the
next commit for an efficient implementation for QLatch.
This implementation uses the limited atomic-wait functionality added by
the previous commit for platforms that don't support futexes.
[1] https://wg21.link/p1135
[2] https://en.cppreference.com/w/cpp/thread/latch/latch
[3] https://github.com/ogiroux/atomic_wait/tree/master/include
Change-Id: Ib5ce7a497e034ebabb2cfffd1761a3a6ff2598d3
Reviewed-by: Mårten Nordheim <[email protected]>
Reviewed-by: Fabian Kosmale <[email protected]>
Diffstat (limited to 'src')
-rw-r--r-- | src/corelib/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/corelib/thread/qlatch.cpp | 197 | ||||
-rw-r--r-- | src/corelib/thread/qlatch_p.h | 93 | ||||
-rw-r--r-- | src/corelib/thread/qtsan_impl.h | 12 |
4 files changed, 303 insertions, 0 deletions
diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt index da4b00c403d..af7173b72d5 100644 --- a/src/corelib/CMakeLists.txt +++ b/src/corelib/CMakeLists.txt @@ -758,6 +758,7 @@ qt_internal_extend_target(Core CONDITION QT_FEATURE_thread SOURCES thread/qatomic.cpp thread/qfutex_p.h + thread/qlatch.cpp thread/qlatch_p.h thread/qmutex.cpp thread/qmutex_p.h thread/qreadwritelock.cpp thread/qreadwritelock_p.h thread/qthreadstorage.cpp thread/qthreadstorage_p.h diff --git a/src/corelib/thread/qlatch.cpp b/src/corelib/thread/qlatch.cpp new file mode 100644 index 00000000000..f91dfb1400a --- /dev/null +++ b/src/corelib/thread/qlatch.cpp @@ -0,0 +1,197 @@ +// Copyright (C) 2025 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#include "qlatch_p.h" + +#include "qatomicwait_p.h" +#include "qfutex_p.h" + +QT_BEGIN_NAMESPACE + +using namespace QtFutex; + +#if defined(QATOMICWAIT_USE_FALLBACK) +static constexpr bool ForcedFallbackAtomicWait = true; +namespace atomicwait = QtFallbackAtomicWait; +#else +static constexpr bool ForcedFallbackAtomicWait = false; +namespace atomicwait = q20; +#endif + +/*! + \class QLatch + \internal + + Implements the same API as \c std::latch (C++20), allowing a single + synchronization between threads. + + \section2 Typical uses + \section3 Waiting for threaded work to finish + + For this use-case, one or more threads perform some work, which needs to + finish before the caller thread can proceed. For this, each worker thread + calls countDown() once they have finished their work, while the caller + thread suspends execution by calling wait(). + + The operation is best seen in: + \code + QLatch latch(segments); + int y = 0; + for (int i = 0; i < segments; ++i) { + int yn = (data->height - y) / (segments - i); + threadPool->start([&, y, yn]() { + convertSegment(y, y + yn); + latch.countDown(); + }); + y += yn; + } + latch.wait(); + \endcode + + Or, for a single thread: + \code + QLatch latch(1); + QMetaObject::invokeMethod(object, [&]() { + doSomething(); + latch.countDown(); + }, Qt::QueuedConnection); + latch.wait(); + \endcode + + In fact, the above is exactly what Qt::BlockingQueued connection does. + \section3 Synchronizing execution + + For this use-case, multiple threads must reach a particular state before + any of them may proceed. In this case, all of them call arriveAndWait(), + causing all but the last one of them to suspend execution until that last + one also arrives. + + \code + QLatch latch(n); + for (int i = 0; i < n; ++i) { + threadPool->start([] { + latch.arriveAndWait(); + doStressfulWork(); + }); + } + \endcode + + \section2 Differences from \c std::latch + + \list + \li Uses \c{int} in the API instead of \c{ptrdiff_t} (note that the max() + is the same as libstdc++'s on Linux). + \li count_down() is not \c{const} (libstdc++ implementation is). + \endlist + +*/ + +/*! + \fn QLatch::QLatch(int expected) noexcept + + Initializes the QLatch to indicate that countDown() will be called \a + expected times. You probably want to pass a value greater than zero. +*/ + +/*! + \fn int QLatch::pending() noexcept + \internal + + Returns the counter. + + Don't use; for the unit test only. +*/ + +/*! + \fn void QLatch::countDown(int n) noexcept + \fn void QLatch::count_down(int n) noexcept + + Decrements the internal counter by \a n. If the internal counter drops to + zero after this operation, any threads currently waiting will be woken up. + If \a n is greater than the value of the internal counter or is negative, + the behavior is undefined. + + This function does not block and may be used to notify waiters that this + thread has reached a particular point and they may proceed. To synchronize + all threads so they all resume work at the same time, use arriveAndWait(). + + This function implements release memory ordering. + + \sa arriveAndWait(), wait() +*/ + +/*! + \fn bool QLatch::tryWait() const noexcept + \fn void QLatch::try_wait() const noexcept + + Returns true if the internal counter in this latch has dropped to zero, + false otherwise. This function does not block. + + This function implements acquire memory ordering. + + \sa wait(), countDown() +*/ + +/*! + \fn void QLatch::wait() noexcept + + Waits for the internal counter in this latch to drop to zero. + + This function implements acquire memory ordering. + + \sa tryWait(), arriveAndWait(), countDown() +*/ + +/*! + \fn void QLatch::arriveAndWait(int n) noexcept + \fn void QLatch::arrive_and_wait(int n) noexcept + + This function decrements the internal counter by \a n. If the counter + remains non-zero after this operation, it suspends the current thread until + it does become zero. Otherwise it wakes all other current waiters. + + This function is useful to synchronize multiple threads so they may start + some execution at (nearly) exactly the same time. + + This function is exactly equivalent to: + \code + countDown(n); + wait(); + \endcode + + This function implements acquire-and-release memory ordering. + + \sa countDown(), wait() +*/ + +/*! + \fn int QLatch::max() noexcept + + Returns the maximum number that can be passed to the constructor. +*/ + +void QLatch::waitInternal(int current) noexcept +{ + auto waitLoop = [&](auto waiter) { + do { + waiter(current); + } while ((current = counter.loadAcquire()) != 0); + }; + + if (futexAvailable() && !ForcedFallbackAtomicWait) + waitLoop([&](int current) { futexWait(counter, current); }); + else + waitLoop([&](int current) { + atomicwait::atomic_wait_explicit(&counter._q_value, current, std::memory_order_relaxed); + }); +} + +void QLatch::wakeUp() noexcept +{ + if (futexAvailable() && !ForcedFallbackAtomicWait) + futexWakeAll(counter); + else + atomicwait::atomic_notify_all(&counter._q_value); +} + +QT_END_NAMESPACE diff --git a/src/corelib/thread/qlatch_p.h b/src/corelib/thread/qlatch_p.h new file mode 100644 index 00000000000..b407e3b6c7d --- /dev/null +++ b/src/corelib/thread/qlatch_p.h @@ -0,0 +1,93 @@ +// Copyright (C) 2024 Intel Corporation. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + +#ifndef QLATCH_P_H +#define QLATCH_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists purely as an implementation +// detail. This header file may change from version to version without notice, +// or even be removed. +// +// We mean it. +// + +#include <QtCore/qbasicatomic.h> +#include <QtCore/qtsan_impl.h> + +#include <private/qglobal_p.h> + +#include <limits> + +QT_BEGIN_NAMESPACE + +class QLatch +{ +public: + constexpr explicit QLatch(int expected) noexcept + : counter(expected) + {} + + int pending() const noexcept + { + return counter.loadAcquire(); + } + + void countDown(int n = 1) noexcept + { + QtTsan::latchCountDown(&counter); + if (counter.fetchAndSubRelease(n) == n) // addAndFetch(n) == 0 + wakeUp(); + } + + bool tryWait() const noexcept + { + if (pending() != 0) + return false; + QtTsan::latchWait(&counter); + return true; + } + + void wait() noexcept // not const + { + if (int current = counter.loadAcquire(); current != 0) { + waitInternal(current); + QtTsan::latchWait(&counter); + } + } + + void arriveAndWait(int n = 1) noexcept + { + countDown(n); + wait(); + } + + // API compatible with C++20: + static constexpr int max() noexcept { return std::numeric_limits<int>::max(); } + void count_down(int n = 1) noexcept { countDown(n); } + bool try_wait() const noexcept { return tryWait(); } + void arrive_and_wait(int n = 1) noexcept { arriveAndWait(n); } + +private: + QBasicAtomicInt counter; + + Q_DISABLE_COPY_MOVE(QLatch) + +#ifdef QATOMICWAIT_USE_FALLBACK +# define Q_LATCH_EXPORT /* being linked into the unit test */ +#else +# define Q_LATCH_EXPORT Q_CORE_EXPORT +#endif + + void Q_LATCH_EXPORT waitInternal(int current) noexcept; + void Q_LATCH_EXPORT wakeUp() noexcept; + +#undef Q_LATCH_EXPORT +}; + +QT_END_NAMESPACE + +#endif // QLATCH_P_H diff --git a/src/corelib/thread/qtsan_impl.h b/src/corelib/thread/qtsan_impl.h index b28d65e65f4..2d1cb2c1bdb 100644 --- a/src/corelib/thread/qtsan_impl.h +++ b/src/corelib/thread/qtsan_impl.h @@ -33,6 +33,16 @@ inline void futexRelease(void *addr, void *addr2 = nullptr) ::__tsan_release(addr); } +inline void latchWait(const void *addr) +{ + ::__tsan_acquire(const_cast<void *>(addr)); +} + +inline void latchCountDown(void *addr) +{ + ::__tsan_release(addr); +} + inline void mutexPreLock(void *addr, unsigned flags) { ::__tsan_mutex_pre_lock(addr, flags); @@ -61,6 +71,8 @@ enum : unsigned { #else inline void futexAcquire(void *, void * = nullptr) {} inline void futexRelease(void *, void * = nullptr) {} +inline void latchCountDown(void *) {} +inline void latchWait(const void *) {} enum : unsigned { MutexWriteReentrant, |