From 43da394304fba820830da2cef2c0214fe292c037 Mon Sep 17 00:00:00 2001 From: John Naylor Date: Mon, 23 Jun 2025 18:03:56 +0700 Subject: [PATCH] Properly fix AVX-512 CRC calculation bug The problem that led to the workaround in f83f14881c7 was not in fact a compiler bug, but a failure to zero the upper bits of the vector register containing the initial scalar CRC value. Fix that and revert the workaround. Diagnosed-by: Nathan Bossart Diagnosed-by: Raghuveer Devulapalli Tested-by: Andy Fan Tested-by: Soumyadeep Chakraborty Reviewed-by: Nathan Bossart Reviewed-by: Raghuveer Devulapalli Discussion: https://postgr.es/m/PH8PR11MB82866B07AA6758D12F699C00FB70A@PH8PR11MB8286.namprd11.prod.outlook.com --- src/port/pg_crc32c_sse42.c | 2 +- src/port/pg_crc32c_sse42_choose.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/port/pg_crc32c_sse42.c b/src/port/pg_crc32c_sse42.c index 9af3474a6ca..1a717255355 100644 --- a/src/port/pg_crc32c_sse42.c +++ b/src/port/pg_crc32c_sse42.c @@ -123,7 +123,7 @@ pg_comp_crc32c_avx512(pg_crc32c crc, const void *data, size_t len) __m512i k; k = _mm512_broadcast_i32x4(_mm_setr_epi32(0x740eef02, 0, 0x9e4addf8, 0)); - x0 = _mm512_xor_si512(_mm512_castsi128_si512(_mm_cvtsi32_si128(crc0)), x0); + x0 = _mm512_xor_si512(_mm512_zextsi128_si512(_mm_cvtsi32_si128(crc0)), x0); buf += 64; /* Main loop. */ diff --git a/src/port/pg_crc32c_sse42_choose.c b/src/port/pg_crc32c_sse42_choose.c index 802e47788c1..74d2421ba2b 100644 --- a/src/port/pg_crc32c_sse42_choose.c +++ b/src/port/pg_crc32c_sse42_choose.c @@ -95,9 +95,7 @@ pg_comp_crc32c_choose(pg_crc32c crc, const void *data, size_t len) __cpuidex(exx, 7, 0); #endif -#if defined(__clang__) && !defined(__OPTIMIZE__) - /* Some versions of clang are broken at -O0 */ -#elif defined(USE_AVX512_CRC32C_WITH_RUNTIME_CHECK) +#ifdef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK if (exx[2] & (1 << 10) && /* VPCLMULQDQ */ exx[1] & (1 << 31)) /* AVX512-VL */ pg_comp_crc32c = pg_comp_crc32c_avx512; -- 2.39.5