LCOV - differential code coverage report
Current view: top level - src/port - pg_popcount_x86.c (source / functions) Coverage Total Hit UNC GNC
Current: 0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7 Lines: 40.7 % 91 37 54 37
Current Date: 2026-03-14 14:10:32 -0400 Functions: 77.8 % 9 7 2 7
Baseline: lcov-20260315-024220-baseline Branches: 41.7 % 24 10 14 10
Baseline Date: 2026-03-14 15:27:56 +0100 Line coverage date bins:
Legend: Lines:     hit not hit
Branches: + taken - not taken # not executed
(7,30] days: 66.7 % 3 2 1 2
(30,360] days: 87.2 % 39 34 5 34
(360..) days: 2.0 % 49 1 48 1
Function coverage date bins:
(30,360] days: 100.0 % 6 6 6
(360..) days: 33.3 % 3 1 2 1
Branch coverage date bins:
(7,30] days: 33.3 % 6 2 4 2
(30,360] days: 80.0 % 10 8 2 8
(360..) days: 0.0 % 8 0 8

 Age         Owner                    Branch data    TLA  Line data    Source code
                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * pg_popcount_x86.c
                                  4                 :                :  *    Holds the x86-64 pg_popcount() implementations.
                                  5                 :                :  *
                                  6                 :                :  * Copyright (c) 2024-2026, PostgreSQL Global Development Group
                                  7                 :                :  *
                                  8                 :                :  * IDENTIFICATION
                                  9                 :                :  *    src/port/pg_popcount_x86.c
                                 10                 :                :  *
                                 11                 :                :  *-------------------------------------------------------------------------
                                 12                 :                :  */
                                 13                 :                : #include "c.h"
                                 14                 :                : 
                                 15                 :                : #ifdef HAVE_X86_64_POPCNTQ
                                 16                 :                : 
                                 17                 :                : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
                                 18                 :                : #include <immintrin.h>
                                 19                 :                : #endif
                                 20                 :                : 
                                 21                 :                : #include "port/pg_bitutils.h"
                                 22                 :                : #include "port/pg_cpu.h"
                                 23                 :                : 
                                 24                 :                : /*
                                 25                 :                :  * The SSE4.2 versions are built regardless of whether we are building the
                                 26                 :                :  * AVX-512 versions.
                                 27                 :                :  *
                                 28                 :                :  * Technically, POPCNT is not part of SSE4.2, and isn't even a vector
                                 29                 :                :  * operation, but in practice this is close enough, and "sse42" seems easier to
                                 30                 :                :  * follow than "popcnt" for these names.
                                 31                 :                :  */
                                 32                 :                : static uint64 pg_popcount_sse42(const char *buf, int bytes);
                                 33                 :                : static uint64 pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask);
                                 34                 :                : 
                                 35                 :                : /*
                                 36                 :                :  * These are the AVX-512 implementations of the popcount functions.
                                 37                 :                :  */
                                 38                 :                : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
                                 39                 :                : static uint64 pg_popcount_avx512(const char *buf, int bytes);
                                 40                 :                : static uint64 pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask);
                                 41                 :                : #endif                          /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
                                 42                 :                : 
                                 43                 :                : /*
                                 44                 :                :  * The function pointers are initially set to "choose" functions.  These
                                 45                 :                :  * functions will first set the pointers to the right implementations (base on
                                 46                 :                :  * what the current CPU supports) and then will call the pointer to fulfill the
                                 47                 :                :  * caller's request.
                                 48                 :                :  */
                                 49                 :                : static uint64 pg_popcount_choose(const char *buf, int bytes);
                                 50                 :                : static uint64 pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask);
                                 51                 :                : uint64      (*pg_popcount_optimized) (const char *buf, int bytes) = pg_popcount_choose;
                                 52                 :                : uint64      (*pg_popcount_masked_optimized) (const char *buf, int bytes, bits8 mask) = pg_popcount_masked_choose;
                                 53                 :                : 
                                 54                 :                : 
                                 55                 :                : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
                                 56                 :                : 
                                 57                 :                : /*
                                 58                 :                :  * Returns true if the CPU supports the instructions required for the AVX-512
                                 59                 :                :  * pg_popcount() implementation.
                                 60                 :                :  */
                                 61                 :                : static bool
  493 nathan@postgresql.or       62                 :GNC         359 : pg_popcount_avx512_available(void)
                                 63                 :                : {
   16 john.naylor@postgres       64   [ -  +  -  - ]:            359 :     return x86_feature_available(PG_AVX512_BW) &&
   16 john.naylor@postgres       65                 :UNC           0 :         x86_feature_available(PG_AVX512_VPOPCNTDQ);
                                 66                 :                : }
                                 67                 :                : 
                                 68                 :                : #endif                          /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
                                 69                 :                : 
                                 70                 :                : /*
                                 71                 :                :  * These functions get called on the first call to pg_popcount(), etc.
                                 72                 :                :  * They detect whether we can use the asm implementations, and replace
                                 73                 :                :  * the function pointers so that subsequent calls are routed directly to
                                 74                 :                :  * the chosen implementation.
                                 75                 :                :  */
                                 76                 :                : static inline void
   53 nathan@postgresql.or       77                 :GNC         359 : choose_popcount_functions(void)
                                 78                 :                : {
   16 john.naylor@postgres       79         [ +  - ]:            359 :     if (x86_feature_available(PG_POPCNT))
                                 80                 :                :     {
   53 nathan@postgresql.or       81                 :            359 :         pg_popcount_optimized = pg_popcount_sse42;
                                 82                 :            359 :         pg_popcount_masked_optimized = pg_popcount_masked_sse42;
                                 83                 :                :     }
                                 84                 :                :     else
                                 85                 :                :     {
   53 nathan@postgresql.or       86                 :UNC           0 :         pg_popcount_optimized = pg_popcount_portable;
                                 87                 :              0 :         pg_popcount_masked_optimized = pg_popcount_masked_portable;
                                 88                 :                :     }
                                 89                 :                : 
                                 90                 :                : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
   53 nathan@postgresql.or       91         [ -  + ]:GNC         359 :     if (pg_popcount_avx512_available())
                                 92                 :                :     {
   53 nathan@postgresql.or       93                 :UNC           0 :         pg_popcount_optimized = pg_popcount_avx512;
                                 94                 :              0 :         pg_popcount_masked_optimized = pg_popcount_masked_avx512;
                                 95                 :                :     }
                                 96                 :                : #endif
   53 nathan@postgresql.or       97                 :GNC         359 : }
                                 98                 :                : 
                                 99                 :                : static uint64
                                100                 :             27 : pg_popcount_choose(const char *buf, int bytes)
                                101                 :                : {
                                102                 :             27 :     choose_popcount_functions();
                                103                 :             27 :     return pg_popcount_optimized(buf, bytes);
                                104                 :                : }
                                105                 :                : 
                                106                 :                : static uint64
                                107                 :            332 : pg_popcount_masked_choose(const char *buf, int bytes, bits8 mask)
                                108                 :                : {
                                109                 :            332 :     choose_popcount_functions();
                                110                 :            332 :     return pg_popcount_masked(buf, bytes, mask);
                                111                 :                : }
                                112                 :                : 
                                113                 :                : #ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
                                114                 :                : 
                                115                 :                : /*
                                116                 :                :  * pg_popcount_avx512
                                117                 :                :  *      Returns the number of 1-bits in buf
                                118                 :                :  */
                                119                 :                : pg_attribute_target("avx512vpopcntdq,avx512bw")
                                120                 :                : static uint64
  708 nathan@postgresql.or      121                 :UNC           0 : pg_popcount_avx512(const char *buf, int bytes)
                                122                 :                : {
                                123                 :                :     __m512i     val,
                                124                 :                :                 cnt;
                                125                 :              0 :     __m512i     accum = _mm512_setzero_si512();
                                126                 :                :     const char *final;
                                127                 :                :     int         tail_idx;
                                128                 :              0 :     __mmask64   mask = ~UINT64CONST(0);
                                129                 :                : 
                                130                 :                :     /*
                                131                 :                :      * Align buffer down to avoid double load overhead from unaligned access.
                                132                 :                :      * Calculate a mask to ignore preceding bytes.  Find start offset of final
                                133                 :                :      * iteration and ensure it is not empty.
                                134                 :                :      */
                                135                 :              0 :     mask <<= ((uintptr_t) buf) % sizeof(__m512i);
                                136                 :              0 :     tail_idx = (((uintptr_t) buf + bytes - 1) % sizeof(__m512i)) + 1;
                                137                 :              0 :     final = (const char *) TYPEALIGN_DOWN(sizeof(__m512i), buf + bytes - 1);
                                138                 :              0 :     buf = (const char *) TYPEALIGN_DOWN(sizeof(__m512i), buf);
                                139                 :                : 
                                140                 :                :     /*
                                141                 :                :      * Iterate through all but the final iteration.  Starting from the second
                                142                 :                :      * iteration, the mask is ignored.
                                143                 :                :      */
                                144         [ #  # ]:              0 :     if (buf < final)
                                145                 :                :     {
                                146                 :              0 :         val = _mm512_maskz_loadu_epi8(mask, (const __m512i *) buf);
                                147                 :              0 :         cnt = _mm512_popcnt_epi64(val);
                                148                 :              0 :         accum = _mm512_add_epi64(accum, cnt);
                                149                 :                : 
                                150                 :              0 :         buf += sizeof(__m512i);
                                151                 :              0 :         mask = ~UINT64CONST(0);
                                152                 :                : 
                                153         [ #  # ]:              0 :         for (; buf < final; buf += sizeof(__m512i))
                                154                 :                :         {
                                155                 :              0 :             val = _mm512_load_si512((const __m512i *) buf);
                                156                 :              0 :             cnt = _mm512_popcnt_epi64(val);
                                157                 :              0 :             accum = _mm512_add_epi64(accum, cnt);
                                158                 :                :         }
                                159                 :                :     }
                                160                 :                : 
                                161                 :                :     /* Final iteration needs to ignore bytes that are not within the length */
                                162                 :              0 :     mask &= (~UINT64CONST(0) >> (sizeof(__m512i) - tail_idx));
                                163                 :                : 
                                164                 :              0 :     val = _mm512_maskz_loadu_epi8(mask, (const __m512i *) buf);
                                165                 :              0 :     cnt = _mm512_popcnt_epi64(val);
                                166                 :              0 :     accum = _mm512_add_epi64(accum, cnt);
                                167                 :                : 
                                168                 :              0 :     return _mm512_reduce_add_epi64(accum);
                                169                 :                : }
                                170                 :                : 
                                171                 :                : /*
                                172                 :                :  * pg_popcount_masked_avx512
                                173                 :                :  *      Returns the number of 1-bits in buf after applying the mask to each byte
                                174                 :                :  */
                                175                 :                : pg_attribute_target("avx512vpopcntdq,avx512bw")
                                176                 :                : static uint64
                                177                 :              0 : pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
                                178                 :                : {
                                179                 :                :     __m512i     val,
                                180                 :                :                 vmasked,
                                181                 :                :                 cnt;
                                182                 :              0 :     __m512i     accum = _mm512_setzero_si512();
                                183                 :                :     const char *final;
                                184                 :                :     int         tail_idx;
                                185                 :              0 :     __mmask64   bmask = ~UINT64CONST(0);
  670 tgl@sss.pgh.pa.us         186                 :              0 :     const __m512i maskv = _mm512_set1_epi8(mask);
                                187                 :                : 
                                188                 :                :     /*
                                189                 :                :      * Align buffer down to avoid double load overhead from unaligned access.
                                190                 :                :      * Calculate a mask to ignore preceding bytes.  Find start offset of final
                                191                 :                :      * iteration and ensure it is not empty.
                                192                 :                :      */
  708 nathan@postgresql.or      193                 :              0 :     bmask <<= ((uintptr_t) buf) % sizeof(__m512i);
                                194                 :              0 :     tail_idx = (((uintptr_t) buf + bytes - 1) % sizeof(__m512i)) + 1;
                                195                 :              0 :     final = (const char *) TYPEALIGN_DOWN(sizeof(__m512i), buf + bytes - 1);
                                196                 :              0 :     buf = (const char *) TYPEALIGN_DOWN(sizeof(__m512i), buf);
                                197                 :                : 
                                198                 :                :     /*
                                199                 :                :      * Iterate through all but the final iteration.  Starting from the second
                                200                 :                :      * iteration, the mask is ignored.
                                201                 :                :      */
                                202         [ #  # ]:              0 :     if (buf < final)
                                203                 :                :     {
                                204                 :              0 :         val = _mm512_maskz_loadu_epi8(bmask, (const __m512i *) buf);
                                205                 :              0 :         vmasked = _mm512_and_si512(val, maskv);
                                206                 :              0 :         cnt = _mm512_popcnt_epi64(vmasked);
                                207                 :              0 :         accum = _mm512_add_epi64(accum, cnt);
                                208                 :                : 
                                209                 :              0 :         buf += sizeof(__m512i);
                                210                 :              0 :         bmask = ~UINT64CONST(0);
                                211                 :                : 
                                212         [ #  # ]:              0 :         for (; buf < final; buf += sizeof(__m512i))
                                213                 :                :         {
                                214                 :              0 :             val = _mm512_load_si512((const __m512i *) buf);
                                215                 :              0 :             vmasked = _mm512_and_si512(val, maskv);
                                216                 :              0 :             cnt = _mm512_popcnt_epi64(vmasked);
                                217                 :              0 :             accum = _mm512_add_epi64(accum, cnt);
                                218                 :                :         }
                                219                 :                :     }
                                220                 :                : 
                                221                 :                :     /* Final iteration needs to ignore bytes that are not within the length */
                                222                 :              0 :     bmask &= (~UINT64CONST(0) >> (sizeof(__m512i) - tail_idx));
                                223                 :                : 
                                224                 :              0 :     val = _mm512_maskz_loadu_epi8(bmask, (const __m512i *) buf);
                                225                 :              0 :     vmasked = _mm512_and_si512(val, maskv);
                                226                 :              0 :     cnt = _mm512_popcnt_epi64(vmasked);
                                227                 :              0 :     accum = _mm512_add_epi64(accum, cnt);
                                228                 :                : 
                                229                 :              0 :     return _mm512_reduce_add_epi64(accum);
                                230                 :                : }
                                231                 :                : 
                                232                 :                : #endif                          /* USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
                                233                 :                : 
                                234                 :                : /*
                                235                 :                :  * pg_popcount64_sse42
                                236                 :                :  *      Return the number of 1 bits set in word
                                237                 :                :  */
                                238                 :                : static inline int
   53 nathan@postgresql.or      239                 :GNC    17911361 : pg_popcount64_sse42(uint64 word)
                                240                 :                : {
                                241                 :                : #ifdef _MSC_VER
                                242                 :                :     return __popcnt64(word);
                                243                 :                : #else
                                244                 :                :     uint64      res;
                                245                 :                : 
                                246                 :       17911361 : __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
                                247                 :       17911361 :     return (int) res;
                                248                 :                : #endif
                                249                 :                : }
                                250                 :                : 
                                251                 :                : /*
                                252                 :                :  * pg_popcount_sse42
                                253                 :                :  *      Returns the number of 1-bits in buf
                                254                 :                :  */
                                255                 :                : pg_attribute_no_sanitize_alignment()
                                256                 :                : static uint64
                                257                 :          11152 : pg_popcount_sse42(const char *buf, int bytes)
                                258                 :                : {
                                259                 :          11152 :     uint64      popcnt = 0;
   31                           260                 :          11152 :     const uint64 *words = (const uint64 *) buf;
                                261                 :                : 
                                262         [ +  + ]:         318431 :     while (bytes >= 8)
                                263                 :                :     {
                                264                 :         307279 :         popcnt += pg_popcount64_sse42(*words++);
                                265                 :         307279 :         bytes -= 8;
                                266                 :                :     }
                                267                 :                : 
                                268                 :          11152 :     buf = (const char *) words;
                                269                 :                : 
                                270                 :                :     /* Process any remaining bytes */
   53                           271         [ +  + ]:          11288 :     while (bytes--)
                                272                 :            136 :         popcnt += pg_number_of_ones[(unsigned char) *buf++];
                                273                 :                : 
                                274                 :          11152 :     return popcnt;
                                275                 :                : }
                                276                 :                : 
                                277                 :                : /*
                                278                 :                :  * pg_popcount_masked_sse42
                                279                 :                :  *      Returns the number of 1-bits in buf after applying the mask to each byte
                                280                 :                :  */
                                281                 :                : pg_attribute_no_sanitize_alignment()
                                282                 :                : static uint64
                                283                 :          17242 : pg_popcount_masked_sse42(const char *buf, int bytes, bits8 mask)
                                284                 :                : {
                                285                 :          17242 :     uint64      popcnt = 0;
                                286                 :          17242 :     uint64      maskv = ~UINT64CONST(0) / 0xFF * mask;
   31                           287                 :          17242 :     const uint64 *words = (const uint64 *) buf;
                                288                 :                : 
                                289         [ +  + ]:       17621324 :     while (bytes >= 8)
                                290                 :                :     {
                                291                 :       17604082 :         popcnt += pg_popcount64_sse42(*words++ & maskv);
                                292                 :       17604082 :         bytes -= 8;
                                293                 :                :     }
                                294                 :                : 
                                295                 :          17242 :     buf = (const char *) words;
                                296                 :                : 
                                297                 :                :     /* Process any remaining bytes */
   53                           298         [ -  + ]:          17242 :     while (bytes--)
   53 nathan@postgresql.or      299                 :UNC           0 :         popcnt += pg_number_of_ones[(unsigned char) *buf++ & mask];
                                300                 :                : 
   53 nathan@postgresql.or      301                 :GNC       17242 :     return popcnt;
                                302                 :                : }
                                303                 :                : 
                                304                 :                : #endif                          /* HAVE_X86_64_POPCNTQ */
        

Generated by: LCOV version 2.4-beta