std::hardware_destructive_interference_size, std::hardware_constructive_interference_size
来自cppreference.com
在标头 <new> 定义
|
||
inline constexpr std::size_t hardware_destructive_interference_size = /*implementation-defined*/; |
(1) | (C++17 起) |
inline constexpr std::size_t hardware_constructive_interference_size = /*implementation-defined*/; |
(2) | (C++17 起) |
1) 二个对象间避免假数据共享的最小偏移。保证至少为 alignof(std::max_align_t)
struct keep_apart { alignas(std::hardware_destructive_interference_size) std::atomic<int> cat; alignas(std::hardware_destructive_interference_size) std::atomic<int> dog; };
2) 鼓励真共享的最大连续内存大小。保证至少为 alignof(std::max_align_t)
struct together { std::atomic<int> dog; int puppy; }; struct kennel { // 其他数据成员…… alignas(sizeof(together)) together pack; // 其他数据成员…… }; static_assert(sizeof(together) <= std::hardware_constructive_interference_size);
注解
这些常量提供一种可移植的访问 L1 数据缓存线大小的方式。
示例
程序使用二个线程(原子地)写入给定全局对象的数据成员。第一个对象适合一条缓存线,这导致“硬件干涉”。第二个对象保持其数据成员在分离的缓存线上,故避免了线程写入后可能的“缓存同步”。
运行此代码
#include <atomic> #include <chrono> #include <cstddef> #include <iomanip> #include <iostream> #include <mutex> #include <new> #include <thread> #ifdef __cpp_lib_hardware_interference_size using std::hardware_constructive_interference_size; using std::hardware_destructive_interference_size; #else // 在 x86-64 │ L1_CACHE_BYTES │ L1_CACHE_SHIFT │ __cacheline_aligned │ ... 上为 64 字节 constexpr std::size_t hardware_constructive_interference_size = 64; constexpr std::size_t hardware_destructive_interference_size = 64; #endif std::mutex cout_mutex; constexpr int max_write_iterations{10'000'000}; // 性能评估时间调节 struct alignas(hardware_constructive_interference_size) OneCacheLiner { // 占据一条缓存线 std::atomic_uint64_t x{}; std::atomic_uint64_t y{}; } oneCacheLiner; struct TwoCacheLiner { // 占据二条缓存线 alignas(hardware_destructive_interference_size) std::atomic_uint64_t x{}; alignas(hardware_destructive_interference_size) std::atomic_uint64_t y{}; } twoCacheLiner; inline auto now() noexcept { return std::chrono::high_resolution_clock::now(); } template<bool xy> void oneCacheLinerThread() { const auto start { now() }; for (uint64_t count{}; count != max_write_iterations; ++count) if constexpr (xy) oneCacheLiner.x.fetch_add(1, std::memory_order_relaxed); else oneCacheLiner.y.fetch_add(1, std::memory_order_relaxed); const std::chrono::duration<double, std::milli> elapsed { now() - start }; std::lock_guard lk{cout_mutex}; std::cout << "oneCacheLinerThread() spent " << elapsed.count() << " ms\n"; if constexpr (xy) oneCacheLiner.x = elapsed.count(); else oneCacheLiner.y = elapsed.count(); } template<bool xy> void twoCacheLinerThread() { const auto start { now() }; for (uint64_t count{}; count != max_write_iterations; ++count) if constexpr (xy) twoCacheLiner.x.fetch_add(1, std::memory_order_relaxed); else twoCacheLiner.y.fetch_add(1, std::memory_order_relaxed); const std::chrono::duration<double, std::milli> elapsed { now() - start }; std::lock_guard lk{cout_mutex}; std::cout << "twoCacheLinerThread() spent " << elapsed.count() << " ms\n"; if constexpr (xy) twoCacheLiner.x = elapsed.count(); else twoCacheLiner.y = elapsed.count(); } int main() { std::cout << "__cpp_lib_hardware_interference_size " # ifdef __cpp_lib_hardware_interference_size " = " << __cpp_lib_hardware_interference_size << "\n"; # else "is not defined, use 64 as fallback\n"; # endif std::cout << "hardware_destructive_interference_size == " << hardware_destructive_interference_size << '\n' << "hardware_constructive_interference_size == " << hardware_constructive_interference_size << "\n\n"; std::cout << std::fixed << std::setprecision(2) << "sizeof( OneCacheLiner ) == " << sizeof( OneCacheLiner ) << '\n' << "sizeof( TwoCacheLiner ) == " << sizeof( TwoCacheLiner ) << "\n\n"; constexpr int max_runs{4}; int oneCacheLiner_average{0}; for (auto i{0}; i != max_runs; ++i) { std::thread th1{oneCacheLinerThread<0>}; std::thread th2{oneCacheLinerThread<1>}; th1.join(); th2.join(); oneCacheLiner_average += oneCacheLiner.x + oneCacheLiner.y; } std::cout << "Average time: " << (oneCacheLiner_average / max_runs / 2) << " ms\n\n"; int twoCacheLiner_average{0}; for (auto i{0}; i != max_runs; ++i) { std::thread th1{twoCacheLinerThread<0>}; std::thread th2{twoCacheLinerThread<1>}; th1.join(); th2.join(); twoCacheLiner_average += twoCacheLiner.x + twoCacheLiner.y; } std::cout << "Average time: " << (twoCacheLiner_average / max_runs / 2) << " ms\n\n"; }
可能的输出:
__cpp_lib_hardware_interference_size is not defined, use 64 as fallback hardware_destructive_interference_size == 64 hardware_constructive_interference_size == 64 sizeof( OneCacheLiner ) == 64 sizeof( TwoCacheLiner ) == 128 oneCacheLinerThread() spent 634.25 ms oneCacheLinerThread() spent 651.55 ms oneCacheLinerThread() spent 990.23 ms oneCacheLinerThread() spent 1033.94 ms oneCacheLinerThread() spent 838.14 ms oneCacheLinerThread() spent 883.25 ms oneCacheLinerThread() spent 873.02 ms oneCacheLinerThread() spent 914.26 ms Average time: 852 ms twoCacheLinerThread() spent 119.22 ms twoCacheLinerThread() spent 127.91 ms twoCacheLinerThread() spent 114.17 ms twoCacheLinerThread() spent 126.41 ms twoCacheLinerThread() spent 125.17 ms twoCacheLinerThread() spent 126.06 ms twoCacheLinerThread() spent 117.94 ms twoCacheLinerThread() spent 129.03 ms Average time: 122 ms
参阅
返回实现支持的并发线程数 ( std::thread 的公开静态成员函数) | |
返回实现支持的并发线程数 ( std::jthread 的公开静态成员函数) |