std::hardware_destructive_interference_size, std::hardware_constructive_interference_size

来自cppreference.com
< cpp‎ | thread
 
 
并发支持库
线程
(C++11)
(C++20)
(C++20)
hardware_destructive_interference_sizehardware_constructive_interference_size
(C++17)(C++17)
this_thread 命名空间
(C++11)
(C++11)
(C++11)
原子类型
(C++11)
(C++20)
原子类型的初始化
(C++11)(C++20 中弃用)
(C++11)(C++20 中弃用)
原子操作的自由函数
原子标志的自由函数
内存序
互斥
(C++11)
通用锁管理
(C++11)
(C++11)
(C++11)
(C++11)(C++11)(C++11)
(C++11)
(C++11)
条件变量
(C++11)
信号量
闩与屏障
(C++20)
(C++20)
future
(C++11)
(C++11)
(C++11)
(C++11)
 
在标头 <new> 定义
inline constexpr std::size_t
    hardware_destructive_interference_size = /*implementation-defined*/;
(1) (C++17 起)
inline constexpr std::size_t
    hardware_constructive_interference_size = /*implementation-defined*/;
(2) (C++17 起)
1) 二个对象间避免假数据共享的最小偏移。保证至少为 alignof(std::max_align_t)
struct keep_apart {
  alignas(std::hardware_destructive_interference_size) std::atomic<int> cat;
  alignas(std::hardware_destructive_interference_size) std::atomic<int> dog;
};
2) 鼓励真共享的最大连续内存大小。保证至少为 alignof(std::max_align_t)
struct together {
  std::atomic<int> dog;
  int puppy;
};
struct kennel {
  // 其他数据成员……
  alignas(sizeof(together)) together pack;
  // 其他数据成员……
};
static_assert(sizeof(together) <= std::hardware_constructive_interference_size);

注解

这些常量提供一种可移植的访问 L1 数据缓存线大小的方式。

示例

程序使用二个线程(原子地)写入给定全局对象的数据成员。第一个对象适合一条缓存线,这导致“硬件干涉”。第二个对象保持其数据成员在分离的缓存线上,故避免了线程写入后可能的“缓存同步”。

#include <atomic>
#include <chrono>
#include <cstddef>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <new>
#include <thread>
 
#ifdef __cpp_lib_hardware_interference_size
    using std::hardware_constructive_interference_size;
    using std::hardware_destructive_interference_size;
#else
    // 在 x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ... 上为 64 字节
    constexpr std::size_t hardware_constructive_interference_size = 64;
    constexpr std::size_t hardware_destructive_interference_size = 64;
#endif
 
std::mutex cout_mutex;
 
constexpr int max_write_iterations{10'000'000}; // 性能评估时间调节
 
struct alignas(hardware_constructive_interference_size)
OneCacheLiner { // 占据一条缓存线
    std::atomic_uint64_t x{};
    std::atomic_uint64_t y{};
} oneCacheLiner;
 
struct TwoCacheLiner { // 占据二条缓存线
    alignas(hardware_destructive_interference_size) std::atomic_uint64_t x{};
    alignas(hardware_destructive_interference_size) std::atomic_uint64_t y{};
} twoCacheLiner;
 
inline auto now() noexcept { return std::chrono::high_resolution_clock::now(); }
 
template<bool xy>
void oneCacheLinerThread() {
    const auto start { now() };
 
    for (uint64_t count{}; count != max_write_iterations; ++count)
        if constexpr (xy)
             oneCacheLiner.x.fetch_add(1, std::memory_order_relaxed);
        else oneCacheLiner.y.fetch_add(1, std::memory_order_relaxed);
 
    const std::chrono::duration<double, std::milli> elapsed { now() - start };
    std::lock_guard lk{cout_mutex};
    std::cout << "oneCacheLinerThread() spent " << elapsed.count() << " ms\n";
    if constexpr (xy)
         oneCacheLiner.x = elapsed.count();
    else oneCacheLiner.y = elapsed.count();
}
 
template<bool xy>
void twoCacheLinerThread() {
    const auto start { now() };
 
    for (uint64_t count{}; count != max_write_iterations; ++count)
        if constexpr (xy)
             twoCacheLiner.x.fetch_add(1, std::memory_order_relaxed);
        else twoCacheLiner.y.fetch_add(1, std::memory_order_relaxed);
 
    const std::chrono::duration<double, std::milli> elapsed { now() - start };
    std::lock_guard lk{cout_mutex};
    std::cout << "twoCacheLinerThread() spent " << elapsed.count() << " ms\n";
    if constexpr (xy)
         twoCacheLiner.x = elapsed.count();
    else twoCacheLiner.y = elapsed.count();
}
 
int main() {
    std::cout << "__cpp_lib_hardware_interference_size "
#   ifdef __cpp_lib_hardware_interference_size
        " = " << __cpp_lib_hardware_interference_size << "\n";
#   else
        "is not defined, use 64 as fallback\n";
#   endif
 
    std::cout
        << "hardware_destructive_interference_size == "
        << hardware_destructive_interference_size << '\n'
        << "hardware_constructive_interference_size == "
        << hardware_constructive_interference_size << "\n\n";
 
    std::cout
        << std::fixed << std::setprecision(2)
        << "sizeof( OneCacheLiner ) == " << sizeof( OneCacheLiner ) << '\n'
        << "sizeof( TwoCacheLiner ) == " << sizeof( TwoCacheLiner ) << "\n\n";
 
    constexpr int max_runs{4};
 
    int oneCacheLiner_average{0};
    for (auto i{0}; i != max_runs; ++i) {
        std::thread th1{oneCacheLinerThread<0>};
        std::thread th2{oneCacheLinerThread<1>};
        th1.join(); th2.join();
        oneCacheLiner_average += oneCacheLiner.x + oneCacheLiner.y;
    }
    std::cout << "Average time: " << (oneCacheLiner_average / max_runs / 2) << " ms\n\n";
 
    int twoCacheLiner_average{0};
    for (auto i{0}; i != max_runs; ++i) {
        std::thread th1{twoCacheLinerThread<0>};
        std::thread th2{twoCacheLinerThread<1>};
        th1.join(); th2.join();
        twoCacheLiner_average += twoCacheLiner.x + twoCacheLiner.y;
    }
    std::cout << "Average time: " << (twoCacheLiner_average / max_runs / 2) << " ms\n\n";
}

可能的输出:

__cpp_lib_hardware_interference_size is not defined, use 64 as fallback
hardware_destructive_interference_size == 64
hardware_constructive_interference_size == 64
 
sizeof( OneCacheLiner ) == 64
sizeof( TwoCacheLiner ) == 128
 
oneCacheLinerThread() spent 634.25 ms
oneCacheLinerThread() spent 651.55 ms
oneCacheLinerThread() spent 990.23 ms
oneCacheLinerThread() spent 1033.94 ms
oneCacheLinerThread() spent 838.14 ms
oneCacheLinerThread() spent 883.25 ms
oneCacheLinerThread() spent 873.02 ms
oneCacheLinerThread() spent 914.26 ms
Average time: 852 ms
 
twoCacheLinerThread() spent 119.22 ms
twoCacheLinerThread() spent 127.91 ms
twoCacheLinerThread() spent 114.17 ms
twoCacheLinerThread() spent 126.41 ms
twoCacheLinerThread() spent 125.17 ms
twoCacheLinerThread() spent 126.06 ms
twoCacheLinerThread() spent 117.94 ms
twoCacheLinerThread() spent 129.03 ms
Average time: 122 ms

参阅

返回实现支持的并发线程数
(std::thread 的公开静态成员函数)
返回实现支持的并发线程数
(std::jthread 的公开静态成员函数)