LCOV - 0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7

LCOV - differential code coverage report

Current view:	top level - src/backend/utils/adt - varlena.c (source / functions)		Coverage	Total	Hit	UNC	LBC	UBC	GBC	GIC	GNC	CBC	EUB	ECB	DUB	DCB
Current:	0e5ff9b9b45a657aea12440478dc002e9b01f138 vs 0123ce131fca454009439dfa3b2266d1d40737d7	Lines:	90.4 %	1937	1752	2	4	179	8		46	1698		1	21	416
Current Date:	2026-03-14 14:10:32 -0400	Functions:	92.4 %	144	133			11	1		20	112				41
Baseline:	lcov-20260315-024220-baseline	Branches:	71.7 %	1195	857	7	8	323	5	2	15	835	470	252	357	339
Baseline Date:	2026-03-14 15:27:56 +0100	Line coverage date bins:
Legend:	Lines: hit not hit Branches: + taken - not taken # not executed	(7,30] days:	100.0 %	20	20							20
		(30,360] days:	91.8 %	73	67	2		4			26	41
		(360..) days:	90.3 %	1844	1665		4	175	8		20	1637		1
		Function coverage date bins:
		(7,30] days:	100.0 %	1	1							1
		(30,360] days:	100.0 %	4	4						1	3
		(360..) days:	92.1 %	139	128			11	1		19	108
		Branch coverage date bins:
		(7,30] days:	68.2 %	22	15			3				15		4
		(30,360] days:	43.8 %	64	28	3		9			11	17	15	9
		(360..) days:	44.5 %	1831	814	4	8	311	5	2	4	803	455	239

 Age         Owner                    Branch data    TLA  Line data    Source code

                                  1                 :                : /*-------------------------------------------------------------------------
                                  2                 :                :  *
                                  3                 :                :  * varlena.c
                                  4                 :                :  *    Functions for the variable-length built-in types.
                                  5                 :                :  *
                                  6                 :                :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
                                  7                 :                :  * Portions Copyright (c) 1994, Regents of the University of California
                                  8                 :                :  *
                                  9                 :                :  *
                                 10                 :                :  * IDENTIFICATION
                                 11                 :                :  *    src/backend/utils/adt/varlena.c
                                 12                 :                :  *
                                 13                 :                :  *-------------------------------------------------------------------------
                                 14                 :                :  */
                                 15                 :                : #include "postgres.h"
                                 16                 :                : 
                                 17                 :                : #include <ctype.h>
                                 18                 :                : #include <limits.h>
                                 19                 :                : 
                                 20                 :                : #include "access/detoast.h"
                                 21                 :                : #include "access/toast_compression.h"
                                 22                 :                : #include "catalog/pg_collation.h"
                                 23                 :                : #include "catalog/pg_type.h"
                                 24                 :                : #include "common/hashfn.h"
                                 25                 :                : #include "common/int.h"
                                 26                 :                : #include "common/unicode_category.h"
                                 27                 :                : #include "common/unicode_norm.h"
                                 28                 :                : #include "common/unicode_version.h"
                                 29                 :                : #include "funcapi.h"
                                 30                 :                : #include "lib/hyperloglog.h"
                                 31                 :                : #include "libpq/pqformat.h"
                                 32                 :                : #include "miscadmin.h"
                                 33                 :                : #include "nodes/execnodes.h"
                                 34                 :                : #include "parser/scansup.h"
                                 35                 :                : #include "port/pg_bswap.h"
                                 36                 :                : #include "regex/regex.h"
                                 37                 :                : #include "utils/builtins.h"
                                 38                 :                : #include "utils/guc.h"
                                 39                 :                : #include "utils/lsyscache.h"
                                 40                 :                : #include "utils/memutils.h"
                                 41                 :                : #include "utils/pg_locale.h"
                                 42                 :                : #include "utils/sortsupport.h"
                                 43                 :                : #include "utils/varlena.h"
                                 44                 :                : 
                                 45                 :                : typedef varlena VarString;
                                 46                 :                : 
                                 47                 :                : /*
                                 48                 :                :  * State for text_position_* functions.
                                 49                 :                :  */
                                 50                 :                : typedef struct
                                 51                 :                : {
                                 52                 :                :     pg_locale_t locale;         /* collation used for substring matching */
                                 53                 :                :     bool        is_multibyte_char_in_char;  /* need to check char boundaries? */
                                 54                 :                :     bool        greedy;         /* find longest possible substring? */
                                 55                 :                : 
                                 56                 :                :     char       *str1;           /* haystack string */
                                 57                 :                :     char       *str2;           /* needle string */
                                 58                 :                :     int         len1;           /* string lengths in bytes */
                                 59                 :                :     int         len2;
                                 60                 :                : 
                                 61                 :                :     /* Skip table for Boyer-Moore-Horspool search algorithm: */
                                 62                 :                :     int         skiptablemask;  /* mask for ANDing with skiptable subscripts */
                                 63                 :                :     int         skiptable[256]; /* skip distance for given mismatched char */
                                 64                 :                : 
                                 65                 :                :     /*
                                 66                 :                :      * Note that with nondeterministic collations, the length of the last
                                 67                 :                :      * match is not necessarily equal to the length of the "needle" passed in.
                                 68                 :                :      */
                                 69                 :                :     char       *last_match;     /* pointer to last match in 'str1' */
                                 70                 :                :     int         last_match_len; /* length of last match */
                                 71                 :                :     int         last_match_len_tmp; /* same but for internal use */
                                 72                 :                : 
                                 73                 :                :     /*
                                 74                 :                :      * Sometimes we need to convert the byte position of a match to a
                                 75                 :                :      * character position.  These store the last position that was converted,
                                 76                 :                :      * so that on the next call, we can continue from that point, rather than
                                 77                 :                :      * count characters from the very beginning.
                                 78                 :                :      */
                                 79                 :                :     char       *refpoint;       /* pointer within original haystack string */
                                 80                 :                :     int         refpos;         /* 0-based character offset of the same point */
                                 81                 :                : } TextPositionState;
                                 82                 :                : 
                                 83                 :                : typedef struct
                                 84                 :                : {
                                 85                 :                :     char       *buf1;           /* 1st string, or abbreviation original string
                                 86                 :                :                                  * buf */
                                 87                 :                :     char       *buf2;           /* 2nd string, or abbreviation strxfrm() buf */
                                 88                 :                :     int         buflen1;        /* Allocated length of buf1 */
                                 89                 :                :     int         buflen2;        /* Allocated length of buf2 */
                                 90                 :                :     int         last_len1;      /* Length of last buf1 string/strxfrm() input */
                                 91                 :                :     int         last_len2;      /* Length of last buf2 string/strxfrm() blob */
                                 92                 :                :     int         last_returned;  /* Last comparison result (cache) */
                                 93                 :                :     bool        cache_blob;     /* Does buf2 contain strxfrm() blob, etc? */
                                 94                 :                :     bool        collate_c;
                                 95                 :                :     Oid         typid;          /* Actual datatype (text/bpchar/name) */
                                 96                 :                :     hyperLogLogState abbr_card; /* Abbreviated key cardinality state */
                                 97                 :                :     hyperLogLogState full_card; /* Full key cardinality state */
                                 98                 :                :     double      prop_card;      /* Required cardinality proportion */
                                 99                 :                :     pg_locale_t locale;
                                100                 :                : } VarStringSortSupport;
                                101                 :                : 
                                102                 :                : /*
                                103                 :                :  * Output data for split_text(): we output either to an array or a table.
                                104                 :                :  * tupstore and tupdesc must be set up in advance to output to a table.
                                105                 :                :  */
                                106                 :                : typedef struct
                                107                 :                : {
                                108                 :                :     ArrayBuildState *astate;
                                109                 :                :     Tuplestorestate *tupstore;
                                110                 :                :     TupleDesc   tupdesc;
                                111                 :                : } SplitTextOutputData;
                                112                 :                : 
                                113                 :                : /*
                                114                 :                :  * This should be large enough that most strings will fit, but small enough
                                115                 :                :  * that we feel comfortable putting it on the stack
                                116                 :                :  */
                                117                 :                : #define TEXTBUFLEN      1024
                                118                 :                : 
                                119                 :                : #define DatumGetVarStringP(X)       ((VarString *) PG_DETOAST_DATUM(X))
                                120                 :                : #define DatumGetVarStringPP(X)      ((VarString *) PG_DETOAST_DATUM_PACKED(X))
                                121                 :                : 
                                122                 :                : static int  varstrfastcmp_c(Datum x, Datum y, SortSupport ssup);
                                123                 :                : static int  bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup);
                                124                 :                : static int  namefastcmp_c(Datum x, Datum y, SortSupport ssup);
                                125                 :                : static int  varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup);
                                126                 :                : static int  namefastcmp_locale(Datum x, Datum y, SortSupport ssup);
                                127                 :                : static int  varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup);
                                128                 :                : static Datum varstr_abbrev_convert(Datum original, SortSupport ssup);
                                129                 :                : static bool varstr_abbrev_abort(int memtupcount, SortSupport ssup);
                                130                 :                : static int32 text_length(Datum str);
                                131                 :                : static text *text_catenate(text *t1, text *t2);
                                132                 :                : static text *text_substring(Datum str,
                                133                 :                :                             int32 start,
                                134                 :                :                             int32 length,
                                135                 :                :                             bool length_not_specified);
                                136                 :                : static int  pg_mbcharcliplen_chars(const char *mbstr, int len, int limit);
                                137                 :                : static text *text_overlay(text *t1, text *t2, int sp, int sl);
                                138                 :                : static int  text_position(text *t1, text *t2, Oid collid);
                                139                 :                : static void text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state);
                                140                 :                : static bool text_position_next(TextPositionState *state);
                                141                 :                : static char *text_position_next_internal(char *start_ptr, TextPositionState *state);
                                142                 :                : static char *text_position_get_match_ptr(TextPositionState *state);
                                143                 :                : static int  text_position_get_match_pos(TextPositionState *state);
                                144                 :                : static void text_position_cleanup(TextPositionState *state);
                                145                 :                : static void check_collation_set(Oid collid);
                                146                 :                : static int  text_cmp(text *arg1, text *arg2, Oid collid);
                                147                 :                : static void appendStringInfoText(StringInfo str, const text *t);
                                148                 :                : static bool split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate);
                                149                 :                : static void split_text_accum_result(SplitTextOutputData *tstate,
                                150                 :                :                                     text *field_value,
                                151                 :                :                                     text *null_string,
                                152                 :                :                                     Oid collation);
                                153                 :                : static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
                                154                 :                :                                     const char *fldsep, const char *null_string);
                                155                 :                : static StringInfo makeStringAggState(FunctionCallInfo fcinfo);
                                156                 :                : static bool text_format_parse_digits(const char **ptr, const char *end_ptr,
                                157                 :                :                                      int *value);
                                158                 :                : static const char *text_format_parse_format(const char *start_ptr,
                                159                 :                :                                             const char *end_ptr,
                                160                 :                :                                             int *argpos, int *widthpos,
                                161                 :                :                                             int *flags, int *width);
                                162                 :                : static void text_format_string_conversion(StringInfo buf, char conversion,
                                163                 :                :                                           FmgrInfo *typOutputInfo,
                                164                 :                :                                           Datum value, bool isNull,
                                165                 :                :                                           int flags, int width);
                                166                 :                : static void text_format_append_string(StringInfo buf, const char *str,
                                167                 :                :                                       int flags, int width);
                                168                 :                : 
                                169                 :                : 
                                170                 :                : /*****************************************************************************
                                171                 :                :  *   CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                          *
                                172                 :                :  *****************************************************************************/
                                173                 :                : 
                                174                 :                : /*
                                175                 :                :  * cstring_to_text
                                176                 :                :  *
                                177                 :                :  * Create a text value from a null-terminated C string.
                                178                 :                :  *
                                179                 :                :  * The new text value is freshly palloc'd with a full-size VARHDR.
                                180                 :                :  */
                                181                 :                : text *
 6564 tgl@sss.pgh.pa.us         182                 :CBC    13144771 : cstring_to_text(const char *s)
                                183                 :                : {
                                184                 :       13144771 :     return cstring_to_text_with_len(s, strlen(s));
                                185                 :                : }
                                186                 :                : 
                                187                 :                : /*
                                188                 :                :  * cstring_to_text_with_len
                                189                 :                :  *
                                190                 :                :  * Same as cstring_to_text except the caller specifies the string length;
                                191                 :                :  * the string need not be null_terminated.
                                192                 :                :  */
                                193                 :                : text *
                                194                 :       14513841 : cstring_to_text_with_len(const char *s, int len)
                                195                 :                : {
                                196                 :       14513841 :     text       *result = (text *) palloc(len + VARHDRSZ);
                                197                 :                : 
                                198                 :       14513841 :     SET_VARSIZE(result, len + VARHDRSZ);
                                199                 :       14513841 :     memcpy(VARDATA(result), s, len);
                                200                 :                : 
                                201                 :       14513841 :     return result;
                                202                 :                : }
                                203                 :                : 
                                204                 :                : /*
                                205                 :                :  * text_to_cstring
                                206                 :                :  *
                                207                 :                :  * Create a palloc'd, null-terminated C string from a text value.
                                208                 :                :  *
                                209                 :                :  * We support being passed a compressed or toasted text value.
                                210                 :                :  * This is a bit bogus since such values shouldn't really be referred to as
                                211                 :                :  * "text *", but it seems useful for robustness.  If we didn't handle that
                                212                 :                :  * case here, we'd need another routine that did, anyway.
                                213                 :                :  */
                                214                 :                : char *
                                215                 :        9653246 : text_to_cstring(const text *t)
                                216                 :                : {
                                217                 :                :     /* must cast away the const, unfortunately */
 2698 peter_e@gmx.net           218                 :        9653246 :     text       *tunpacked = pg_detoast_datum_packed(unconstify(text *, t));
 6564 tgl@sss.pgh.pa.us         219   [ -  +  -  -  :        9653246 :     int         len = VARSIZE_ANY_EXHDR(tunpacked);
                                     -  -  -  -  +  
                                                 + ]
                                220                 :                :     char       *result;
                                221                 :                : 
                                222                 :        9653246 :     result = (char *) palloc(len + 1);
                                223         [ +  + ]:        9653246 :     memcpy(result, VARDATA_ANY(tunpacked), len);
                                224                 :        9653246 :     result[len] = '\0';
                                225                 :                : 
                                226         [ +  + ]:        9653246 :     if (tunpacked != t)
                                227                 :          24415 :         pfree(tunpacked);
                                228                 :                : 
                                229                 :        9653246 :     return result;
                                230                 :                : }
                                231                 :                : 
                                232                 :                : /*
                                233                 :                :  * text_to_cstring_buffer
                                234                 :                :  *
                                235                 :                :  * Copy a text value into a caller-supplied buffer of size dst_len.
                                236                 :                :  *
                                237                 :                :  * The text string is truncated if necessary to fit.  The result is
                                238                 :                :  * guaranteed null-terminated (unless dst_len == 0).
                                239                 :                :  *
                                240                 :                :  * We support being passed a compressed or toasted text value.
                                241                 :                :  * This is a bit bogus since such values shouldn't really be referred to as
                                242                 :                :  * "text *", but it seems useful for robustness.  If we didn't handle that
                                243                 :                :  * case here, we'd need another routine that did, anyway.
                                244                 :                :  */
                                245                 :                : void
                                246                 :            503 : text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
                                247                 :                : {
                                248                 :                :     /* must cast away the const, unfortunately */
 2698 peter_e@gmx.net           249                 :            503 :     text       *srcunpacked = pg_detoast_datum_packed(unconstify(text *, src));
 6564 tgl@sss.pgh.pa.us         250   [ -  +  -  -  :            503 :     size_t      src_len = VARSIZE_ANY_EXHDR(srcunpacked);
                                     -  -  -  -  -  
                                                 + ]
                                251                 :                : 
                                252         [ +  - ]:            503 :     if (dst_len > 0)
                                253                 :                :     {
                                254                 :            503 :         dst_len--;
                                255         [ +  - ]:            503 :         if (dst_len >= src_len)
                                256                 :            503 :             dst_len = src_len;
                                257                 :                :         else                    /* ensure truncation is encoding-safe */
 6564 tgl@sss.pgh.pa.us         258         [ #  # ]:UBC           0 :             dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
 6564 tgl@sss.pgh.pa.us         259         [ -  + ]:CBC         503 :         memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
                                260                 :            503 :         dst[dst_len] = '\0';
                                261                 :                :     }
                                262                 :                : 
                                263         [ -  + ]:            503 :     if (srcunpacked != src)
 6564 tgl@sss.pgh.pa.us         264                 :UBC           0 :         pfree(srcunpacked);
 6564 tgl@sss.pgh.pa.us         265                 :CBC         503 : }
                                266                 :                : 
                                267                 :                : 
                                268                 :                : /*****************************************************************************
                                269                 :                :  *   USER I/O ROUTINES                                                       *
                                270                 :                :  *****************************************************************************/
                                271                 :                : 
                                272                 :                : /*
                                273                 :                :  *      textin          - converts cstring to internal representation
                                274                 :                :  */
                                275                 :                : Datum
 9384                           276                 :       11372424 : textin(PG_FUNCTION_ARGS)
                                277                 :                : {
                                278                 :       11372424 :     char       *inputText = PG_GETARG_CSTRING(0);
                                279                 :                : 
 6564                           280                 :       11372424 :     PG_RETURN_TEXT_P(cstring_to_text(inputText));
                                281                 :                : }
                                282                 :                : 
                                283                 :                : /*
                                284                 :                :  *      textout         - converts internal representation to cstring
                                285                 :                :  */
                                286                 :                : Datum
 9384                           287                 :        4557788 : textout(PG_FUNCTION_ARGS)
                                288                 :                : {
 6564                           289                 :        4557788 :     Datum       txt = PG_GETARG_DATUM(0);
                                290                 :                : 
                                291                 :        4557788 :     PG_RETURN_CSTRING(TextDatumGetCString(txt));
                                292                 :                : }
                                293                 :                : 
                                294                 :                : /*
                                295                 :                :  *      textrecv            - converts external binary format to text
                                296                 :                :  */
                                297                 :                : Datum
 8346                           298                 :             24 : textrecv(PG_FUNCTION_ARGS)
                                299                 :                : {
                                300                 :             24 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
                                301                 :                :     text       *result;
                                302                 :                :     char       *str;
                                303                 :                :     int         nbytes;
                                304                 :                : 
                                305                 :             24 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
                                306                 :                : 
 6564                           307                 :             24 :     result = cstring_to_text_with_len(str, nbytes);
 8346                           308                 :             24 :     pfree(str);
                                309                 :             24 :     PG_RETURN_TEXT_P(result);
                                310                 :                : }
                                311                 :                : 
                                312                 :                : /*
                                313                 :                :  *      textsend            - converts text to binary format
                                314                 :                :  */
                                315                 :                : Datum
                                316                 :           2353 : textsend(PG_FUNCTION_ARGS)
                                317                 :                : {
 6918                           318                 :           2353 :     text       *t = PG_GETARG_TEXT_PP(0);
                                319                 :                :     StringInfoData buf;
                                320                 :                : 
 8346                           321                 :           2353 :     pq_begintypsend(&buf);
 6918                           322   [ -  +  -  -  :           2353 :     pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
                                     -  -  -  -  +  
                                           +  +  + ]
 8346                           323                 :           2353 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
                                324                 :                : }
                                325                 :                : 
                                326                 :                : 
                                327                 :                : /*
                                328                 :                :  *      unknownin           - converts cstring to internal representation
                                329                 :                :  */
                                330                 :                : Datum
 8726 bruce@momjian.us          331                 :UBC           0 : unknownin(PG_FUNCTION_ARGS)
                                332                 :                : {
 7594 tgl@sss.pgh.pa.us         333                 :              0 :     char       *str = PG_GETARG_CSTRING(0);
                                334                 :                : 
                                335                 :                :     /* representation is same as cstring */
                                336                 :              0 :     PG_RETURN_CSTRING(pstrdup(str));
                                337                 :                : }
                                338                 :                : 
                                339                 :                : /*
                                340                 :                :  *      unknownout          - converts internal representation to cstring
                                341                 :                :  */
                                342                 :                : Datum
 8726 bruce@momjian.us          343                 :CBC         469 : unknownout(PG_FUNCTION_ARGS)
                                344                 :                : {
                                345                 :                :     /* representation is same as cstring */
 7594 tgl@sss.pgh.pa.us         346                 :            469 :     char       *str = PG_GETARG_CSTRING(0);
                                347                 :                : 
                                348                 :            469 :     PG_RETURN_CSTRING(pstrdup(str));
                                349                 :                : }
                                350                 :                : 
                                351                 :                : /*
                                352                 :                :  *      unknownrecv         - converts external binary format to unknown
                                353                 :                :  */
                                354                 :                : Datum
 8346 tgl@sss.pgh.pa.us         355                 :UBC           0 : unknownrecv(PG_FUNCTION_ARGS)
                                356                 :                : {
                                357                 :              0 :     StringInfo  buf = (StringInfo) PG_GETARG_POINTER(0);
                                358                 :                :     char       *str;
                                359                 :                :     int         nbytes;
                                360                 :                : 
 7594                           361                 :              0 :     str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
                                362                 :                :     /* representation is same as cstring */
                                363                 :              0 :     PG_RETURN_CSTRING(str);
                                364                 :                : }
                                365                 :                : 
                                366                 :                : /*
                                367                 :                :  *      unknownsend         - converts unknown to binary format
                                368                 :                :  */
                                369                 :                : Datum
 8346                           370                 :              0 : unknownsend(PG_FUNCTION_ARGS)
                                371                 :                : {
                                372                 :                :     /* representation is same as cstring */
 7594                           373                 :              0 :     char       *str = PG_GETARG_CSTRING(0);
                                374                 :                :     StringInfoData buf;
                                375                 :                : 
                                376                 :              0 :     pq_begintypsend(&buf);
                                377                 :              0 :     pq_sendtext(&buf, str, strlen(str));
                                378                 :              0 :     PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
                                379                 :                : }
                                380                 :                : 
                                381                 :                : 
                                382                 :                : /* ========== PUBLIC ROUTINES ========== */
                                383                 :                : 
                                384                 :                : /*
                                385                 :                :  * textlen -
                                386                 :                :  *    returns the logical length of a text*
                                387                 :                :  *     (which is less than the VARSIZE of the text*)
                                388                 :                :  */
                                389                 :                : Datum
 9383 tgl@sss.pgh.pa.us         390                 :CBC      215476 : textlen(PG_FUNCTION_ARGS)
                                391                 :                : {
 8079                           392                 :         215476 :     Datum       str = PG_GETARG_DATUM(0);
                                393                 :                : 
                                394                 :                :     /* try to avoid decompressing argument */
                                395                 :         215476 :     PG_RETURN_INT32(text_length(str));
                                396                 :                : }
                                397                 :                : 
                                398                 :                : /*
                                399                 :                :  * text_length -
                                400                 :                :  *  Does the real work for textlen()
                                401                 :                :  *
                                402                 :                :  *  This is broken out so it can be called directly by other string processing
                                403                 :                :  *  functions.  Note that the argument is passed as a Datum, to indicate that
                                404                 :                :  *  it may still be in compressed form.  We can avoid decompressing it at all
                                405                 :                :  *  in some cases.
                                406                 :                :  */
                                407                 :                : static int32
 8606 bruce@momjian.us          408                 :         215482 : text_length(Datum str)
                                409                 :                : {
                                410                 :                :     /* fastpath when max encoding length is one */
                                411         [ +  + ]:         215482 :     if (pg_database_encoding_max_length() == 1)
  219 peter@eisentraut.org      412                 :GNC          10 :         return (toast_raw_datum_size(str) - VARHDRSZ);
                                413                 :                :     else
                                414                 :                :     {
 6918 tgl@sss.pgh.pa.us         415                 :CBC      215472 :         text       *t = DatumGetTextPP(str);
                                416                 :                : 
  219 peter@eisentraut.org      417                 :GNC      215472 :         return (pg_mbstrlen_with_len(VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t)));
                                418                 :                :     }
                                419                 :                : }
                                420                 :                : 
                                421                 :                : /*
                                422                 :                :  * textoctetlen -
                                423                 :                :  *    returns the physical length of a text*
                                424                 :                :  *     (which is less than the VARSIZE of the text*)
                                425                 :                :  */
                                426                 :                : Datum
 9383 tgl@sss.pgh.pa.us         427                 :CBC          35 : textoctetlen(PG_FUNCTION_ARGS)
                                428                 :                : {
 8079                           429                 :             35 :     Datum       str = PG_GETARG_DATUM(0);
                                430                 :                : 
                                431                 :                :     /* We need not detoast the input at all */
                                432                 :             35 :     PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
                                433                 :                : }
                                434                 :                : 
                                435                 :                : /*
                                436                 :                :  * textcat -
                                437                 :                :  *    takes two text* and returns a text* that is the concatenation of
                                438                 :                :  *    the two.
                                439                 :                :  *
                                440                 :                :  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
                                441                 :                :  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
                                442                 :                :  * Allocate space for output in all cases.
                                443                 :                :  * XXX - thomas 1997-07-10
                                444                 :                :  */
                                445                 :                : Datum
 9383                           446                 :        1132993 : textcat(PG_FUNCTION_ARGS)
                                447                 :                : {
 6918                           448                 :        1132993 :     text       *t1 = PG_GETARG_TEXT_PP(0);
                                449                 :        1132993 :     text       *t2 = PG_GETARG_TEXT_PP(1);
                                450                 :                : 
 5893                           451                 :        1132993 :     PG_RETURN_TEXT_P(text_catenate(t1, t2));
                                452                 :                : }
                                453                 :                : 
                                454                 :                : /*
                                455                 :                :  * text_catenate
                                456                 :                :  *  Guts of textcat(), broken out so it can be used by other functions
                                457                 :                :  *
                                458                 :                :  * Arguments can be in short-header form, but not compressed or out-of-line
                                459                 :                :  */
                                460                 :                : static text *
                                461                 :        1133033 : text_catenate(text *t1, text *t2)
                                462                 :                : {
                                463                 :                :     text       *result;
                                464                 :                :     int         len1,
                                465                 :                :                 len2,
                                466                 :                :                 len;
                                467                 :                :     char       *ptr;
                                468                 :                : 
 6918                           469   [ -  +  -  -  :        1133033 :     len1 = VARSIZE_ANY_EXHDR(t1);
                                     -  -  -  -  +  
                                                 + ]
 5893                           470   [ -  +  -  -  :        1133033 :     len2 = VARSIZE_ANY_EXHDR(t2);
                                     -  -  -  -  +  
                                                 + ]
                                471                 :                : 
                                472                 :                :     /* paranoia ... probably should throw error instead? */
10416 bruce@momjian.us          473         [ -  + ]:        1133033 :     if (len1 < 0)
10416 bruce@momjian.us          474                 :UBC           0 :         len1 = 0;
10416 bruce@momjian.us          475         [ -  + ]:CBC     1133033 :     if (len2 < 0)
10416 bruce@momjian.us          476                 :UBC           0 :         len2 = 0;
                                477                 :                : 
10172 lockhart@fourpalms.o      478                 :CBC     1133033 :     len = len1 + len2 + VARHDRSZ;
 9383 tgl@sss.pgh.pa.us         479                 :        1133033 :     result = (text *) palloc(len);
                                480                 :                : 
                                481                 :                :     /* Set size of result string... */
 6956                           482                 :        1133033 :     SET_VARSIZE(result, len);
                                483                 :                : 
                                484                 :                :     /* Fill data field of result string... */
10416 bruce@momjian.us          485                 :        1133033 :     ptr = VARDATA(result);
10172 lockhart@fourpalms.o      486         [ +  + ]:        1133033 :     if (len1 > 0)
 6918 tgl@sss.pgh.pa.us         487         [ +  + ]:        1131434 :         memcpy(ptr, VARDATA_ANY(t1), len1);
10172 lockhart@fourpalms.o      488         [ +  + ]:        1133033 :     if (len2 > 0)
 6918 tgl@sss.pgh.pa.us         489         [ +  + ]:        1132928 :         memcpy(ptr + len1, VARDATA_ANY(t2), len2);
                                490                 :                : 
 5893                           491                 :        1133033 :     return result;
                                492                 :                : }
                                493                 :                : 
                                494                 :                : /*
                                495                 :                :  * charlen_to_bytelen()
                                496                 :                :  *  Compute the number of bytes occupied by n characters starting at *p
                                497                 :                :  *
                                498                 :                :  * The caller shall ensure there are n complete characters.  Callers achieve
                                499                 :                :  * this by deriving "n" from regmatch_t findings from searching a wchar array.
                                500                 :                :  * pg_mb2wchar_with_len() skips any trailing incomplete character, so regex
                                501                 :                :  * matches will end no later than the last complete character.  (The string
                                502                 :                :  * need not be null-terminated.)
                                503                 :                :  */
                                504                 :                : static int
 7067                           505                 :           8708 : charlen_to_bytelen(const char *p, int n)
                                506                 :                : {
                                507         [ +  + ]:           8708 :     if (pg_database_encoding_max_length() == 1)
                                508                 :                :     {
                                509                 :                :         /* Optimization for single-byte encodings */
                                510                 :             90 :         return n;
                                511                 :                :     }
                                512                 :                :     else
                                513                 :                :     {
                                514                 :                :         const char *s;
                                515                 :                : 
                                516         [ +  + ]:        3037611 :         for (s = p; n > 0; n--)
   67 tmunro@postgresql.or      517                 :        3028993 :             s += pg_mblen_unbounded(s); /* caller verified encoding */
                                518                 :                : 
 7067 tgl@sss.pgh.pa.us         519                 :           8618 :         return s - p;
                                520                 :                :     }
                                521                 :                : }
                                522                 :                : 
                                523                 :                : /*
                                524                 :                :  * text_substr()
                                525                 :                :  * Return a substring starting at the specified position.
                                526                 :                :  * - thomas 1997-12-31
                                527                 :                :  *
                                528                 :                :  * Input:
                                529                 :                :  *  - string
                                530                 :                :  *  - starting position (is one-based)
                                531                 :                :  *  - string length
                                532                 :                :  *
                                533                 :                :  * If the starting position is zero or less, then return from the start of the string
                                534                 :                :  *  adjusting the length to be consistent with the "negative start" per SQL.
                                535                 :                :  * If the length is less than zero, return the remaining string.
                                536                 :                :  *
                                537                 :                :  * Added multibyte support.
                                538                 :                :  * - Tatsuo Ishii 1998-4-21
                                539                 :                :  * Changed behavior if starting position is less than one to conform to SQL behavior.
                                540                 :                :  * Formerly returned the entire string; now returns a portion.
                                541                 :                :  * - Thomas Lockhart 1998-12-10
                                542                 :                :  * Now uses faster TOAST-slicing interface
                                543                 :                :  * - John Gray 2002-02-22
                                544                 :                :  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
                                545                 :                :  * behaviors conflicting with SQL to meet SQL (if E = S + L < S throw
                                546                 :                :  * error; if E < 1, return '', not entire string). Fixed MB related bug when
                                547                 :                :  * S > LC and < LC + 4 sometimes garbage characters are returned.
                                548                 :                :  * - Joe Conway 2002-08-10
                                549                 :                :  */
                                550                 :                : Datum
 9406                           551                 :         331456 : text_substr(PG_FUNCTION_ARGS)
                                552                 :                : {
 8606 bruce@momjian.us          553                 :         331456 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
                                554                 :                :                                     PG_GETARG_INT32(1),
                                555                 :                :                                     PG_GETARG_INT32(2),
                                556                 :                :                                     false));
                                557                 :                : }
                                558                 :                : 
                                559                 :                : /*
                                560                 :                :  * text_substr_no_len -
                                561                 :                :  *    Wrapper to avoid opr_sanity failure due to
                                562                 :                :  *    one function accepting a different number of args.
                                563                 :                :  */
                                564                 :                : Datum
                                565                 :             18 : text_substr_no_len(PG_FUNCTION_ARGS)
                                566                 :                : {
                                567                 :             18 :     PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
                                568                 :                :                                     PG_GETARG_INT32(1),
                                569                 :                :                                     -1, true));
                                570                 :                : }
                                571                 :                : 
                                572                 :                : /*
                                573                 :                :  * text_substring -
                                574                 :                :  *  Does the real work for text_substr() and text_substr_no_len()
                                575                 :                :  *
                                576                 :                :  *  This is broken out so it can be called directly by other string processing
                                577                 :                :  *  functions.  Note that the argument is passed as a Datum, to indicate that
                                578                 :                :  *  it may still be in compressed/toasted form.  We can avoid detoasting all
                                579                 :                :  *  of it in some cases.
                                580                 :                :  *
                                581                 :                :  *  The result is always a freshly palloc'd datum.
                                582                 :                :  */
                                583                 :                : static text *
                                584                 :         351530 : text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
                                585                 :                : {
                                586                 :         351530 :     int32       eml = pg_database_encoding_max_length();
 8593                           587                 :         351530 :     int32       S = start;      /* start position */
                                588                 :                :     int32       S1;             /* adjusted start position */
                                589                 :                :     int32       L1;             /* adjusted substring length */
                                590                 :                :     int32       E;              /* end position, exclusive */
                                591                 :                : 
                                592                 :                :     /*
                                593                 :                :      * SQL99 says S can be zero or negative (which we don't document), but we
                                594                 :                :      * still must fetch from the start of the string.
                                595                 :                :      * https://www.postgresql.org/message-id/170905442373.643.11536838320909376197%40wrigleys.postgresql.org
                                596                 :                :      */
 1896 tgl@sss.pgh.pa.us         597                 :         351530 :     S1 = Max(S, 1);
                                598                 :                : 
                                599                 :                :     /* life is easy if the encoding max length is 1 */
 8606 bruce@momjian.us          600         [ +  + ]:         351530 :     if (eml == 1)
                                601                 :                :     {
 3189 tgl@sss.pgh.pa.us         602         [ -  + ]:             11 :         if (length_not_specified)   /* special case - get length to end of
                                603                 :                :                                      * string */
 8606 bruce@momjian.us          604                 :UBC           0 :             L1 = -1;
 1896 tgl@sss.pgh.pa.us         605         [ -  + ]:CBC          11 :         else if (length < 0)
                                606                 :                :         {
                                607                 :                :             /* SQL99 says to throw an error for E < S, i.e., negative length */
 1896 tgl@sss.pgh.pa.us         608         [ #  # ]:UBC           0 :             ereport(ERROR,
                                609                 :                :                     (errcode(ERRCODE_SUBSTRING_ERROR),
                                610                 :                :                      errmsg("negative substring length not allowed")));
                                611                 :                :             L1 = -1;            /* silence stupider compilers */
                                612                 :                :         }
 1896 tgl@sss.pgh.pa.us         613         [ -  + ]:CBC          11 :         else if (pg_add_s32_overflow(S, length, &E))
                                614                 :                :         {
                                615                 :                :             /*
                                616                 :                :              * L could be large enough for S + L to overflow, in which case
                                617                 :                :              * the substring must run to end of string.
                                618                 :                :              */
 1896 tgl@sss.pgh.pa.us         619                 :UBC           0 :             L1 = -1;
                                620                 :                :         }
                                621                 :                :         else
                                622                 :                :         {
                                623                 :                :             /*
                                624                 :                :              * A zero or negative value for the end position can happen if the
                                625                 :                :              * start was negative or one. SQL99 says to return a zero-length
                                626                 :                :              * string.
                                627                 :                :              */
 8606 bruce@momjian.us          628         [ -  + ]:CBC          11 :             if (E < 1)
 6564 tgl@sss.pgh.pa.us         629                 :UBC           0 :                 return cstring_to_text("");
                                630                 :                : 
 8606 bruce@momjian.us          631                 :CBC          11 :             L1 = E - S1;
                                632                 :                :         }
                                633                 :                : 
                                634                 :                :         /*
                                635                 :                :          * If the start position is past the end of the string, SQL99 says to
                                636                 :                :          * return a zero-length string -- DatumGetTextPSlice() will do that
                                637                 :                :          * for us.  We need only convert S1 to zero-based starting position.
                                638                 :                :          */
                                639                 :             11 :         return DatumGetTextPSlice(str, S1 - 1, L1);
                                640                 :                :     }
                                641         [ +  - ]:         351519 :     else if (eml > 1)
                                642                 :                :     {
                                643                 :                :         /*
                                644                 :                :          * When encoding max length is > 1, we can't get LC without
                                645                 :                :          * detoasting, so we'll grab a conservatively large slice now and go
                                646                 :                :          * back later to do the right thing
                                647                 :                :          */
                                648                 :                :         int32       slice_start;
                                649                 :                :         int32       slice_size;
                                650                 :                :         int32       slice_strlen;
                                651                 :                :         int32       slice_len;
                                652                 :                :         text       *slice;
                                653                 :                :         int32       E1;
                                654                 :                :         int32       i;
                                655                 :                :         char       *p;
                                656                 :                :         char       *s;
                                657                 :                :         text       *ret;
                                658                 :                : 
                                659                 :                :         /*
                                660                 :                :          * We need to start at position zero because there is no way to know
                                661                 :                :          * in advance which byte offset corresponds to the supplied start
                                662                 :                :          * position.
                                663                 :                :          */
                                664                 :         351519 :         slice_start = 0;
                                665                 :                : 
 3189 tgl@sss.pgh.pa.us         666         [ +  + ]:         351519 :         if (length_not_specified)   /* special case - get length to end of
                                667                 :                :                                      * string */
   27 noah@leadboat.com         668                 :             38 :             E = slice_size = L1 = -1;
 1896 tgl@sss.pgh.pa.us         669         [ +  + ]:         351481 :         else if (length < 0)
                                670                 :                :         {
                                671                 :                :             /* SQL99 says to throw an error for E < S, i.e., negative length */
                                672         [ +  - ]:              6 :             ereport(ERROR,
                                673                 :                :                     (errcode(ERRCODE_SUBSTRING_ERROR),
                                674                 :                :                      errmsg("negative substring length not allowed")));
                                675                 :                :             E = slice_size = L1 = -1;   /* silence stupider compilers */
                                676                 :                :         }
                                677         [ +  + ]:         351475 :         else if (pg_add_s32_overflow(S, length, &E))
                                678                 :                :         {
                                679                 :                :             /*
                                680                 :                :              * L could be large enough for S + L to overflow, in which case
                                681                 :                :              * the substring must run to end of string.
                                682                 :                :              */
                                683                 :              3 :             slice_size = L1 = -1;
                                684                 :                :         }
                                685                 :                :         else
                                686                 :                :         {
                                687                 :                :             /*
                                688                 :                :              * Ending at position 1, exclusive, obviously yields an empty
                                689                 :                :              * string.  A zero or negative value can happen if the start was
                                690                 :                :              * negative or one. SQL99 says to return a zero-length string.
                                691                 :                :              */
   29 noah@leadboat.com         692         [ +  + ]:         351472 :             if (E <= 1)
 6564 tgl@sss.pgh.pa.us         693                 :              6 :                 return cstring_to_text("");
                                694                 :                : 
                                695                 :                :             /*
                                696                 :                :              * if E is past the end of the string, the tuple toaster will
                                697                 :                :              * truncate the length for us
                                698                 :                :              */
 8606 bruce@momjian.us          699                 :         351466 :             L1 = E - S1;
                                700                 :                : 
                                701                 :                :             /*
                                702                 :                :              * Total slice size in bytes can't be any longer than the
                                703                 :                :              * inclusive end position times the encoding max length.  If that
                                704                 :                :              * overflows, we can just use -1.
                                705                 :                :              */
   29 noah@leadboat.com         706         [ +  + ]:         351466 :             if (pg_mul_s32_overflow(E - 1, eml, &slice_size))
 1896 tgl@sss.pgh.pa.us         707                 :              3 :                 slice_size = -1;
                                708                 :                :         }
                                709                 :                : 
                                710                 :                :         /*
                                711                 :                :          * If we're working with an untoasted source, no need to do an extra
                                712                 :                :          * copying step.
                                713                 :                :          */
 6398                           714   [ +  +  +  + ]:         702948 :         if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
 6546                           715         [ +  + ]:         351441 :             VARATT_IS_EXTERNAL(DatumGetPointer(str)))
 7067                           716                 :            204 :             slice = DatumGetTextPSlice(str, slice_start, slice_size);
                                717                 :                :         else
                                718                 :         351303 :             slice = (text *) DatumGetPointer(str);
                                719                 :                : 
                                720                 :                :         /* see if we got back an empty string */
   67 tmunro@postgresql.or      721   [ -  +  -  -  :         351507 :         slice_len = VARSIZE_ANY_EXHDR(slice);
                                     -  -  -  -  +  
                                                 + ]
                                722         [ -  + ]:         351507 :         if (slice_len == 0)
                                723                 :                :         {
 7067 tgl@sss.pgh.pa.us         724         [ #  # ]:UBC           0 :             if (slice != (text *) DatumGetPointer(str))
                                725                 :              0 :                 pfree(slice);
 6564                           726                 :              0 :             return cstring_to_text("");
                                727                 :                :         }
                                728                 :                : 
                                729                 :                :         /*
                                730                 :                :          * Now we can get the actual length of the slice in MB characters,
                                731                 :                :          * stopping at the end of the substring.  Continuing beyond the
                                732                 :                :          * substring end could find an incomplete character attributable
                                733                 :                :          * solely to DatumGetTextPSlice() chopping in the middle of a
                                734                 :                :          * character, and it would be superfluous work at best.
                                735                 :                :          */
   29 noah@leadboat.com         736                 :CBC      351501 :         slice_strlen =
                                737                 :         351507 :             (slice_size == -1 ?
                                738   [ +  +  +  + ]:         351507 :              pg_mbstrlen_with_len(VARDATA_ANY(slice), slice_len) :
                                739         [ +  + ]:         351463 :              pg_mbcharcliplen_chars(VARDATA_ANY(slice), slice_len, E - 1));
                                740                 :                : 
                                741                 :                :         /*
                                742                 :                :          * Check that the start position wasn't > slice_strlen. If so, SQL99
                                743                 :                :          * says to return a zero-length string.
                                744                 :                :          */
 8606 bruce@momjian.us          745         [ +  + ]:         351501 :         if (S1 > slice_strlen)
                                746                 :                :         {
 7067 tgl@sss.pgh.pa.us         747         [ +  + ]:             20 :             if (slice != (text *) DatumGetPointer(str))
                                748                 :              3 :                 pfree(slice);
 6564                           749                 :             20 :             return cstring_to_text("");
                                750                 :                :         }
                                751                 :                : 
                                752                 :                :         /*
                                753                 :                :          * Adjust L1 and E1 now that we know the slice string length. Again
                                754                 :                :          * remember that S1 is one based, and slice_start is zero based.
                                755                 :                :          */
 8606 bruce@momjian.us          756         [ +  + ]:         351481 :         if (L1 > -1)
 8593                           757                 :         351451 :             E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
                                758                 :                :         else
 8606                           759                 :             30 :             E1 = slice_start + 1 + slice_strlen;
                                760                 :                : 
                                761                 :                :         /*
                                762                 :                :          * Find the start position in the slice; remember S1 is not zero based
                                763                 :                :          */
 6749 tgl@sss.pgh.pa.us         764         [ +  + ]:         351481 :         p = VARDATA_ANY(slice);
 8606 bruce@momjian.us          765         [ +  + ]:        3380028 :         for (i = 0; i < S1 - 1; i++)
   67 tmunro@postgresql.or      766                 :        3028547 :             p += pg_mblen_unbounded(p);
                                767                 :                : 
                                768                 :                :         /* hang onto a pointer to our start position */
 8606 bruce@momjian.us          769                 :         351481 :         s = p;
                                770                 :                : 
                                771                 :                :         /*
                                772                 :                :          * Count the actual bytes used by the substring of the requested
                                773                 :                :          * length.
                                774                 :                :          */
                                775         [ +  + ]:        4985127 :         for (i = S1; i < E1; i++)
   67 tmunro@postgresql.or      776                 :        4633646 :             p += pg_mblen_unbounded(p);
                                777                 :                : 
 8606 bruce@momjian.us          778                 :         351481 :         ret = (text *) palloc(VARHDRSZ + (p - s));
 6956 tgl@sss.pgh.pa.us         779                 :         351481 :         SET_VARSIZE(ret, VARHDRSZ + (p - s));
 8606 bruce@momjian.us          780                 :         351481 :         memcpy(VARDATA(ret), s, (p - s));
                                781                 :                : 
 7067 tgl@sss.pgh.pa.us         782         [ +  + ]:         351481 :         if (slice != (text *) DatumGetPointer(str))
                                783                 :            198 :             pfree(slice);
                                784                 :                : 
 8606 bruce@momjian.us          785                 :         351481 :         return ret;
                                786                 :                :     }
                                787                 :                :     else
 8267 tgl@sss.pgh.pa.us         788         [ #  # ]:UBC           0 :         elog(ERROR, "invalid backend encoding: encoding max length < 1");
                                789                 :                : 
                                790                 :                :     /* not reached: suppress compiler warning */
                                791                 :                :     return NULL;
                                792                 :                : }
                                793                 :                : 
                                794                 :                : /*
                                795                 :                :  * pg_mbcharcliplen_chars -
                                796                 :                :  *  Mirror pg_mbcharcliplen(), except return value unit is chars, not bytes.
                                797                 :                :  *
                                798                 :                :  *  This mirrors all the dubious historical behavior, so it's static to
                                799                 :                :  *  discourage proliferation.  The assertions are specific to the one caller.
                                800                 :                :  */
                                801                 :                : static int
   29 noah@leadboat.com         802                 :CBC      351463 : pg_mbcharcliplen_chars(const char *mbstr, int len, int limit)
                                803                 :                : {
                                804                 :         351463 :     int         nch = 0;
                                805                 :                :     int         l;
                                806                 :                : 
                                807         [ -  + ]:         351463 :     Assert(len > 0);
                                808         [ -  + ]:         351463 :     Assert(limit > 0);
                                809         [ -  + ]:         351463 :     Assert(pg_database_encoding_max_length() > 1);
                                810                 :                : 
                                811   [ +  +  +  + ]:        6486402 :     while (len > 0 && *mbstr)
                                812                 :                :     {
                                813                 :        6486087 :         l = pg_mblen_with_len(mbstr, len);
                                814                 :        6486081 :         nch++;
                                815         [ +  + ]:        6486081 :         if (nch == limit)
                                816                 :         351142 :             break;
                                817                 :        6134939 :         len -= l;
                                818                 :        6134939 :         mbstr += l;
                                819                 :                :     }
                                820                 :         351457 :     return nch;
                                821                 :                : }
                                822                 :                : 
                                823                 :                : /*
                                824                 :                :  * textoverlay
                                825                 :                :  *  Replace specified substring of first string with second
                                826                 :                :  *
                                827                 :                :  * The SQL standard defines OVERLAY() in terms of substring and concatenation.
                                828                 :                :  * This code is a direct implementation of what the standard says.
                                829                 :                :  */
                                830                 :                : Datum
 5893 tgl@sss.pgh.pa.us         831                 :             14 : textoverlay(PG_FUNCTION_ARGS)
                                832                 :                : {
                                833                 :             14 :     text       *t1 = PG_GETARG_TEXT_PP(0);
                                834                 :             14 :     text       *t2 = PG_GETARG_TEXT_PP(1);
 3189                           835                 :             14 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
                                836                 :             14 :     int         sl = PG_GETARG_INT32(3);    /* substring length */
                                837                 :                : 
 5893                           838                 :             14 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
                                839                 :                : }
                                840                 :                : 
                                841                 :                : Datum
                                842                 :              6 : textoverlay_no_len(PG_FUNCTION_ARGS)
                                843                 :                : {
                                844                 :              6 :     text       *t1 = PG_GETARG_TEXT_PP(0);
                                845                 :              6 :     text       *t2 = PG_GETARG_TEXT_PP(1);
 3189                           846                 :              6 :     int         sp = PG_GETARG_INT32(2);    /* substring start position */
                                847                 :                :     int         sl;
                                848                 :                : 
                                849                 :              6 :     sl = text_length(PointerGetDatum(t2));  /* defaults to length(t2) */
 5893                           850                 :              6 :     PG_RETURN_TEXT_P(text_overlay(t1, t2, sp, sl));
                                851                 :                : }
                                852                 :                : 
                                853                 :                : static text *
                                854                 :             20 : text_overlay(text *t1, text *t2, int sp, int sl)
                                855                 :                : {
                                856                 :                :     text       *result;
                                857                 :                :     text       *s1;
                                858                 :                :     text       *s2;
                                859                 :                :     int         sp_pl_sl;
                                860                 :                : 
                                861                 :                :     /*
                                862                 :                :      * Check for possible integer-overflow cases.  For negative sp, throw a
                                863                 :                :      * "substring length" error because that's what should be expected
                                864                 :                :      * according to the spec's definition of OVERLAY().
                                865                 :                :      */
                                866         [ -  + ]:             20 :     if (sp <= 0)
 5893 tgl@sss.pgh.pa.us         867         [ #  # ]:UBC           0 :         ereport(ERROR,
                                868                 :                :                 (errcode(ERRCODE_SUBSTRING_ERROR),
                                869                 :                :                  errmsg("negative substring length not allowed")));
 3015 andres@anarazel.de        870         [ -  + ]:CBC          20 :     if (pg_add_s32_overflow(sp, sl, &sp_pl_sl))
 5893 tgl@sss.pgh.pa.us         871         [ #  # ]:UBC           0 :         ereport(ERROR,
                                872                 :                :                 (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                                873                 :                :                  errmsg("integer out of range")));
                                874                 :                : 
 5861 bruce@momjian.us          875                 :CBC          20 :     s1 = text_substring(PointerGetDatum(t1), 1, sp - 1, false);
 5893 tgl@sss.pgh.pa.us         876                 :             20 :     s2 = text_substring(PointerGetDatum(t1), sp_pl_sl, -1, true);
                                877                 :             20 :     result = text_catenate(s1, t2);
                                878                 :             20 :     result = text_catenate(result, s2);
                                879                 :                : 
                                880                 :             20 :     return result;
                                881                 :                : }
                                882                 :                : 
                                883                 :                : /*
                                884                 :                :  * textpos -
                                885                 :                :  *    Return the position of the specified substring.
                                886                 :                :  *    Implements the SQL POSITION() function.
                                887                 :                :  *    Ref: A Guide To The SQL Standard, Date & Darwen, 1997
                                888                 :                :  * - thomas 1997-07-27
                                889                 :                :  */
                                890                 :                : Datum
 9383                           891                 :             68 : textpos(PG_FUNCTION_ARGS)
                                892                 :                : {
 6749                           893                 :             68 :     text       *str = PG_GETARG_TEXT_PP(0);
                                894                 :             68 :     text       *search_str = PG_GETARG_TEXT_PP(1);
                                895                 :                : 
 2550 peter@eisentraut.org      896                 :             68 :     PG_RETURN_INT32((int32) text_position(str, search_str, PG_GET_COLLATION()));
                                897                 :                : }
                                898                 :                : 
                                899                 :                : /*
                                900                 :                :  * text_position -
                                901                 :                :  *  Does the real work for textpos()
                                902                 :                :  *
                                903                 :                :  * Inputs:
                                904                 :                :  *      t1 - string to be searched
                                905                 :                :  *      t2 - pattern to match within t1
                                906                 :                :  * Result:
                                907                 :                :  *      Character index of the first matched char, starting from 1,
                                908                 :                :  *      or 0 if no match.
                                909                 :                :  *
                                910                 :                :  *  This is broken out so it can be called directly by other string processing
                                911                 :                :  *  functions.
                                912                 :                :  */
                                913                 :                : static int
                                914                 :             68 : text_position(text *t1, text *t2, Oid collid)
                                915                 :                : {
                                916                 :                :     TextPositionState state;
                                917                 :                :     int         result;
                                918                 :                : 
  387                           919                 :             68 :     check_collation_set(collid);
                                920                 :                : 
                                921                 :                :     /* Empty needle always matches at position 1 */
 2330 tgl@sss.pgh.pa.us         922   [ +  +  -  -  :             68 :     if (VARSIZE_ANY_EXHDR(t2) < 1)
                                     -  -  -  -  -  
                                           +  +  + ]
                                923                 :              6 :         return 1;
                                924                 :                : 
                                925                 :                :     /* Otherwise, can't match if haystack is shorter than needle */
  387 peter@eisentraut.org      926   [ +  +  -  -  :             62 :     if (VARSIZE_ANY_EXHDR(t1) < VARSIZE_ANY_EXHDR(t2) &&
                                     -  -  -  -  +  
                                     +  -  +  -  -  
                                     -  -  -  -  -  
                                           +  +  + ]
                                927         [ +  - ]:             11 :         pg_newlocale_from_collation(collid)->deterministic)
 2606 heikki.linnakangas@i      928                 :             11 :         return 0;
                                929                 :                : 
 2550 peter@eisentraut.org      930                 :             51 :     text_position_setup(t1, t2, collid, &state);
                                931                 :                :     /* don't need greedy mode here */
  387                           932                 :             51 :     state.greedy = false;
                                933                 :                : 
 2606 heikki.linnakangas@i      934         [ +  + ]:             51 :     if (!text_position_next(&state))
                                935                 :             12 :         result = 0;
                                936                 :                :     else
                                937                 :             39 :         result = text_position_get_match_pos(&state);
 7099 tgl@sss.pgh.pa.us         938                 :             51 :     text_position_cleanup(&state);
                                939                 :             51 :     return result;
                                940                 :                : }
                                941                 :                : 
                                942                 :                : 
                                943                 :                : /*
                                944                 :                :  * text_position_setup, text_position_next, text_position_cleanup -
                                945                 :                :  *  Component steps of text_position()
                                946                 :                :  *
                                947                 :                :  * These are broken out so that a string can be efficiently searched for
                                948                 :                :  * multiple occurrences of the same pattern.  text_position_next may be
                                949                 :                :  * called multiple times, and it advances to the next match on each call.
                                950                 :                :  * text_position_get_match_ptr() and text_position_get_match_pos() return
                                951                 :                :  * a pointer or 1-based character position of the last match, respectively.
                                952                 :                :  *
                                953                 :                :  * The "state" variable is normally just a local variable in the caller.
                                954                 :                :  *
                                955                 :                :  * NOTE: text_position_next skips over the matched portion.  For example,
                                956                 :                :  * searching for "xx" in "xxx" returns only one match, not two.
                                957                 :                :  */
                                958                 :                : 
                                959                 :                : static void
 2550 peter@eisentraut.org      960                 :            991 : text_position_setup(text *t1, text *t2, Oid collid, TextPositionState *state)
                                961                 :                : {
 6749 tgl@sss.pgh.pa.us         962   [ -  +  -  -  :            991 :     int         len1 = VARSIZE_ANY_EXHDR(t1);
                                     -  -  -  -  +  
                                                 + ]
                                963   [ -  +  -  -  :            991 :     int         len2 = VARSIZE_ANY_EXHDR(t2);
                                     -  -  -  -  -  
                                                 + ]
                                964                 :                : 
 2550 peter@eisentraut.org      965                 :            991 :     check_collation_set(collid);
                                966                 :                : 
  387                           967                 :            991 :     state->locale = pg_newlocale_from_collation(collid);
                                968                 :                : 
                                969                 :                :     /*
                                970                 :                :      * Most callers need greedy mode, but some might want to unset this to
                                971                 :                :      * optimize.
                                972                 :                :      */
                                973                 :            991 :     state->greedy = true;
                                974                 :                : 
 2606 heikki.linnakangas@i      975         [ -  + ]:            991 :     Assert(len2 > 0);
                                976                 :                : 
                                977                 :                :     /*
                                978                 :                :      * Even with a multi-byte encoding, we perform the search using the raw
                                979                 :                :      * byte sequence, ignoring multibyte issues.  For UTF-8, that works fine,
                                980                 :                :      * because in UTF-8 the byte sequence of one character cannot contain
                                981                 :                :      * another character.  For other multi-byte encodings, we do the search
                                982                 :                :      * initially as a simple byte search, ignoring multibyte issues, but
                                983                 :                :      * verify afterwards that the match we found is at a character boundary,
                                984                 :                :      * and continue the search if it was a false match.
                                985                 :                :      */
 8079 tgl@sss.pgh.pa.us         986         [ +  + ]:            991 :     if (pg_database_encoding_max_length() == 1)
 2606 heikki.linnakangas@i      987                 :             54 :         state->is_multibyte_char_in_char = false;
                                988         [ +  - ]:            937 :     else if (GetDatabaseEncoding() == PG_UTF8)
                                989                 :            937 :         state->is_multibyte_char_in_char = false;
                                990                 :                :     else
 2606 heikki.linnakangas@i      991                 :UBC           0 :         state->is_multibyte_char_in_char = true;
                                992                 :                : 
 2606 heikki.linnakangas@i      993         [ +  + ]:CBC         991 :     state->str1 = VARDATA_ANY(t1);
                                994         [ -  + ]:            991 :     state->str2 = VARDATA_ANY(t2);
                                995                 :            991 :     state->len1 = len1;
                                996                 :            991 :     state->len2 = len2;
                                997                 :            991 :     state->last_match = NULL;
                                998                 :            991 :     state->refpoint = state->str1;
                                999                 :            991 :     state->refpos = 0;
                               1000                 :                : 
                               1001                 :                :     /*
                               1002                 :                :      * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
                               1003                 :                :      * notes we use the terminology that the "haystack" is the string to be
                               1004                 :                :      * searched (t1) and the "needle" is the pattern being sought (t2).
                               1005                 :                :      *
                               1006                 :                :      * If the needle is empty or bigger than the haystack then there is no
                               1007                 :                :      * point in wasting cycles initializing the table.  We also choose not to
                               1008                 :                :      * use B-M-H for needles of length 1, since the skip table can't possibly
                               1009                 :                :      * save anything in that case.
                               1010                 :                :      *
                               1011                 :                :      * (With nondeterministic collations, the search is already
                               1012                 :                :      * multibyte-aware, so we don't need this.)
                               1013                 :                :      */
  387 peter@eisentraut.org     1014   [ +  -  +  +  :            991 :     if (len1 >= len2 && len2 > 1 && state->locale->deterministic)
                                              +  + ]
                               1015                 :                :     {
 6121 bruce@momjian.us         1016                 :            824 :         int         searchlength = len1 - len2;
                               1017                 :                :         int         skiptablemask;
                               1018                 :                :         int         last;
                               1019                 :                :         int         i;
 2606 heikki.linnakangas@i     1020                 :            824 :         const char *str2 = state->str2;
                               1021                 :                : 
                               1022                 :                :         /*
                               1023                 :                :          * First we must determine how much of the skip table to use.  The
                               1024                 :                :          * declaration of TextPositionState allows up to 256 elements, but for
                               1025                 :                :          * short search problems we don't really want to have to initialize so
                               1026                 :                :          * many elements --- it would take too long in comparison to the
                               1027                 :                :          * actual search time.  So we choose a useful skip table size based on
                               1028                 :                :          * the haystack length minus the needle length.  The closer the needle
                               1029                 :                :          * length is to the haystack length the less useful skipping becomes.
                               1030                 :                :          *
                               1031                 :                :          * Note: since we use bit-masking to select table elements, the skip
                               1032                 :                :          * table size MUST be a power of 2, and so the mask must be 2^N-1.
                               1033                 :                :          */
 6398 tgl@sss.pgh.pa.us        1034         [ +  + ]:            824 :         if (searchlength < 16)
                               1035                 :             57 :             skiptablemask = 3;
                               1036         [ +  + ]:            767 :         else if (searchlength < 64)
                               1037                 :             17 :             skiptablemask = 7;
                               1038         [ +  + ]:            750 :         else if (searchlength < 128)
                               1039                 :             13 :             skiptablemask = 15;
                               1040         [ +  + ]:            737 :         else if (searchlength < 512)
                               1041                 :            171 :             skiptablemask = 31;
                               1042         [ +  + ]:            566 :         else if (searchlength < 2048)
                               1043                 :            417 :             skiptablemask = 63;
                               1044         [ +  + ]:            149 :         else if (searchlength < 4096)
                               1045                 :            106 :             skiptablemask = 127;
                               1046                 :                :         else
                               1047                 :             43 :             skiptablemask = 255;
                               1048                 :            824 :         state->skiptablemask = skiptablemask;
                               1049                 :                : 
                               1050                 :                :         /*
                               1051                 :                :          * Initialize the skip table.  We set all elements to the needle
                               1052                 :                :          * length, since this is the correct skip distance for any character
                               1053                 :                :          * not found in the needle.
                               1054                 :                :          */
                               1055         [ +  + ]:          58132 :         for (i = 0; i <= skiptablemask; i++)
                               1056                 :          57308 :             state->skiptable[i] = len2;
                               1057                 :                : 
                               1058                 :                :         /*
                               1059                 :                :          * Now examine the needle.  For each character except the last one,
                               1060                 :                :          * set the corresponding table element to the appropriate skip
                               1061                 :                :          * distance.  Note that when two characters share the same skip table
                               1062                 :                :          * entry, the one later in the needle must determine the skip
                               1063                 :                :          * distance.
                               1064                 :                :          */
                               1065                 :            824 :         last = len2 - 1;
                               1066                 :                : 
 2606 heikki.linnakangas@i     1067         [ +  + ]:          10563 :         for (i = 0; i < last; i++)
                               1068                 :           9739 :             state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
                               1069                 :                :     }
 7099 tgl@sss.pgh.pa.us        1070                 :            991 : }
                               1071                 :                : 
                               1072                 :                : /*
                               1073                 :                :  * Advance to the next match, starting from the end of the previous match
                               1074                 :                :  * (or the beginning of the string, on first call).  Returns true if a match
                               1075                 :                :  * is found.
                               1076                 :                :  *
                               1077                 :                :  * Note that this refuses to match an empty-string needle.  Most callers
                               1078                 :                :  * will have handled that case specially and we'll never see it here.
                               1079                 :                :  */
                               1080                 :                : static bool
 2606 heikki.linnakangas@i     1081                 :           5048 : text_position_next(TextPositionState *state)
                               1082                 :                : {
 6398 tgl@sss.pgh.pa.us        1083                 :           5048 :     int         needle_len = state->len2;
                               1084                 :                :     char       *start_ptr;
                               1085                 :                :     char       *matchptr;
                               1086                 :                : 
                               1087         [ -  + ]:           5048 :     if (needle_len <= 0)
 2606 heikki.linnakangas@i     1088                 :UBC           0 :         return false;           /* result for empty pattern */
                               1089                 :                : 
                               1090                 :                :     /* Start from the point right after the previous match. */
 2606 heikki.linnakangas@i     1091         [ +  + ]:CBC        5048 :     if (state->last_match)
  387 peter@eisentraut.org     1092                 :           4051 :         start_ptr = state->last_match + state->last_match_len;
                               1093                 :                :     else
 2606 heikki.linnakangas@i     1094                 :            997 :         start_ptr = state->str1;
                               1095                 :                : 
                               1096                 :           5048 : retry:
                               1097                 :           5048 :     matchptr = text_position_next_internal(start_ptr, state);
                               1098                 :                : 
                               1099         [ +  + ]:           5048 :     if (!matchptr)
                               1100                 :            946 :         return false;
                               1101                 :                : 
                               1102                 :                :     /*
                               1103                 :                :      * Found a match for the byte sequence.  If this is a multibyte encoding,
                               1104                 :                :      * where one character's byte sequence can appear inside a longer
                               1105                 :                :      * multi-byte character, we need to verify that the match was at a
                               1106                 :                :      * character boundary, not in the middle of a multi-byte character.
                               1107                 :                :      */
  387 peter@eisentraut.org     1108   [ -  +  -  - ]:           4102 :     if (state->is_multibyte_char_in_char && state->locale->deterministic)
                               1109                 :                :     {
   67 tmunro@postgresql.or     1110                 :UBC           0 :         const char *haystack_end = state->str1 + state->len1;
                               1111                 :                : 
                               1112                 :                :         /* Walk one character at a time, until we reach the match. */
                               1113                 :                : 
                               1114                 :                :         /* the search should never move backwards. */
 2606 heikki.linnakangas@i     1115         [ #  # ]:              0 :         Assert(state->refpoint <= matchptr);
                               1116                 :                : 
                               1117         [ #  # ]:              0 :         while (state->refpoint < matchptr)
                               1118                 :                :         {
                               1119                 :                :             /* step to next character. */
   67 tmunro@postgresql.or     1120                 :              0 :             state->refpoint += pg_mblen_range(state->refpoint, haystack_end);
 2606 heikki.linnakangas@i     1121                 :              0 :             state->refpos++;
                               1122                 :                : 
                               1123                 :                :             /*
                               1124                 :                :              * If we stepped over the match's start position, then it was a
                               1125                 :                :              * false positive, where the byte sequence appeared in the middle
                               1126                 :                :              * of a multi-byte character.  Skip it, and continue the search at
                               1127                 :                :              * the next character boundary.
                               1128                 :                :              */
                               1129         [ #  # ]:              0 :             if (state->refpoint > matchptr)
                               1130                 :                :             {
                               1131                 :              0 :                 start_ptr = state->refpoint;
                               1132                 :              0 :                 goto retry;
                               1133                 :                :             }
                               1134                 :                :         }
                               1135                 :                :     }
                               1136                 :                : 
 2606 heikki.linnakangas@i     1137                 :CBC        4102 :     state->last_match = matchptr;
  387 peter@eisentraut.org     1138                 :           4102 :     state->last_match_len = state->last_match_len_tmp;
 2606 heikki.linnakangas@i     1139                 :           4102 :     return true;
                               1140                 :                : }
                               1141                 :                : 
                               1142                 :                : /*
                               1143                 :                :  * Subroutine of text_position_next().  This searches for the raw byte
                               1144                 :                :  * sequence, ignoring any multi-byte encoding issues.  Returns the first
                               1145                 :                :  * match starting at 'start_ptr', or NULL if no match is found.
                               1146                 :                :  */
                               1147                 :                : static char *
                               1148                 :           5048 : text_position_next_internal(char *start_ptr, TextPositionState *state)
                               1149                 :                : {
                               1150                 :           5048 :     int         haystack_len = state->len1;
                               1151                 :           5048 :     int         needle_len = state->len2;
                               1152                 :           5048 :     int         skiptablemask = state->skiptablemask;
                               1153                 :           5048 :     const char *haystack = state->str1;
                               1154                 :           5048 :     const char *needle = state->str2;
                               1155                 :           5048 :     const char *haystack_end = &haystack[haystack_len];
                               1156                 :                :     const char *hptr;
                               1157                 :                : 
                               1158   [ +  -  -  + ]:           5048 :     Assert(start_ptr >= haystack && start_ptr <= haystack_end);
  100 tgl@sss.pgh.pa.us        1159         [ -  + ]:           5048 :     Assert(needle_len > 0);
                               1160                 :                : 
  387 peter@eisentraut.org     1161                 :           5048 :     state->last_match_len_tmp = needle_len;
                               1162                 :                : 
                               1163         [ +  + ]:           5048 :     if (!state->locale->deterministic)
                               1164                 :                :     {
                               1165                 :                :         /*
                               1166                 :                :          * With a nondeterministic collation, we have to use an unoptimized
                               1167                 :                :          * route.  We walk through the haystack and see if at each position
                               1168                 :                :          * there is a substring of the remaining string that is equal to the
                               1169                 :                :          * needle under the given collation.
                               1170                 :                :          *
                               1171                 :                :          * Note, the found substring could have a different length than the
                               1172                 :                :          * needle.  Callers that want to skip over the found string need to
                               1173                 :                :          * read the length of the found substring from last_match_len rather
                               1174                 :                :          * than just using the length of their needle.
                               1175                 :                :          *
                               1176                 :                :          * Most callers will require "greedy" semantics, meaning that we need
                               1177                 :                :          * to find the longest such substring, not the shortest.  For callers
                               1178                 :                :          * that don't need greedy semantics, we can finish on the first match.
                               1179                 :                :          *
                               1180                 :                :          * This loop depends on the assumption that the needle is nonempty and
                               1181                 :                :          * any matching substring must also be nonempty.  (Even if the
                               1182                 :                :          * collation would accept an empty match, returning one would send
                               1183                 :                :          * callers that search for successive matches into an infinite loop.)
                               1184                 :                :          */
                               1185                 :            126 :         const char *result_hptr = NULL;
                               1186                 :                : 
                               1187                 :            126 :         hptr = start_ptr;
                               1188         [ +  + ]:            339 :         while (hptr < haystack_end)
                               1189                 :                :         {
                               1190                 :                :             const char *test_end;
                               1191                 :                : 
                               1192                 :                :             /*
                               1193                 :                :              * First check the common case that there is a match in the
                               1194                 :                :              * haystack of exactly the length of the needle.
                               1195                 :                :              */
                               1196         [ +  + ]:            282 :             if (!state->greedy &&
                               1197   [ +  -  +  + ]:             54 :                 haystack_end - hptr >= needle_len &&
                               1198                 :             27 :                 pg_strncoll(hptr, needle_len, needle, needle_len, state->locale) == 0)
                               1199                 :              6 :                 return (char *) hptr;
                               1200                 :                : 
                               1201                 :                :             /*
                               1202                 :                :              * Else check if any of the non-empty substrings starting at hptr
                               1203                 :                :              * compare equal to the needle.
                               1204                 :                :              */
  100 tgl@sss.pgh.pa.us        1205                 :            276 :             test_end = hptr;
                               1206                 :                :             do
                               1207                 :                :             {
   67 tmunro@postgresql.or     1208                 :           1077 :                 test_end += pg_mblen_range(test_end, haystack_end);
  387 peter@eisentraut.org     1209         [ +  + ]:           1077 :                 if (pg_strncoll(hptr, (test_end - hptr), needle, needle_len, state->locale) == 0)
                               1210                 :                :                 {
                               1211                 :             69 :                     state->last_match_len_tmp = (test_end - hptr);
                               1212                 :             69 :                     result_hptr = hptr;
                               1213         [ -  + ]:             69 :                     if (!state->greedy)
  387 peter@eisentraut.org     1214                 :UBC           0 :                         break;
                               1215                 :                :                 }
  100 tgl@sss.pgh.pa.us        1216         [ +  + ]:CBC        1077 :             } while (test_end < haystack_end);
                               1217                 :                : 
  387 peter@eisentraut.org     1218         [ +  + ]:            276 :             if (result_hptr)
                               1219                 :             63 :                 break;
                               1220                 :                : 
   67 tmunro@postgresql.or     1221                 :            213 :             hptr += pg_mblen_range(hptr, haystack_end);
                               1222                 :                :         }
                               1223                 :                : 
  387 peter@eisentraut.org     1224                 :            120 :         return (char *) result_hptr;
                               1225                 :                :     }
                               1226         [ +  + ]:           4922 :     else if (needle_len == 1)
                               1227                 :                :     {
                               1228                 :                :         /* No point in using B-M-H for a one-character needle */
 2606 heikki.linnakangas@i     1229                 :            380 :         char        nchar = *needle;
                               1230                 :                : 
                               1231                 :            380 :         hptr = start_ptr;
                               1232         [ +  + ]:           2937 :         while (hptr < haystack_end)
                               1233                 :                :         {
                               1234         [ +  + ]:           2854 :             if (*hptr == nchar)
                               1235                 :            297 :                 return (char *) hptr;
                               1236                 :           2557 :             hptr++;
                               1237                 :                :         }
                               1238                 :                :     }
                               1239                 :                :     else
                               1240                 :                :     {
                               1241                 :           4542 :         const char *needle_last = &needle[needle_len - 1];
                               1242                 :                : 
                               1243                 :                :         /* Start at startpos plus the length of the needle */
                               1244                 :           4542 :         hptr = start_ptr + needle_len - 1;
                               1245         [ +  + ]:         111663 :         while (hptr < haystack_end)
                               1246                 :                :         {
                               1247                 :                :             /* Match the needle scanning *backward* */
                               1248                 :                :             const char *nptr;
                               1249                 :                :             const char *p;
                               1250                 :                : 
                               1251                 :         110857 :             nptr = needle_last;
                               1252                 :         110857 :             p = hptr;
                               1253         [ +  + ]:         166918 :             while (*nptr == *p)
                               1254                 :                :             {
                               1255                 :                :                 /* Matched it all?  If so, return 1-based position */
                               1256         [ +  + ]:          59797 :                 if (nptr == needle)
                               1257                 :           3736 :                     return (char *) p;
                               1258                 :          56061 :                 nptr--, p--;
                               1259                 :                :             }
                               1260                 :                : 
                               1261                 :                :             /*
                               1262                 :                :              * No match, so use the haystack char at hptr to decide how far to
                               1263                 :                :              * advance.  If the needle had any occurrence of that character
                               1264                 :                :              * (or more precisely, one sharing the same skiptable entry)
                               1265                 :                :              * before its last character, then we advance far enough to align
                               1266                 :                :              * the last such needle character with that haystack position.
                               1267                 :                :              * Otherwise we can advance by the whole needle length.
                               1268                 :                :              */
                               1269                 :         107121 :             hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
                               1270                 :                :         }
                               1271                 :                :     }
                               1272                 :                : 
                               1273                 :            889 :     return 0;                   /* not found */
                               1274                 :                : }
                               1275                 :                : 
                               1276                 :                : /*
                               1277                 :                :  * Return a pointer to the current match.
                               1278                 :                :  *
                               1279                 :                :  * The returned pointer points into the original haystack string.
                               1280                 :                :  */
                               1281                 :                : static char *
                               1282                 :           4048 : text_position_get_match_ptr(TextPositionState *state)
                               1283                 :                : {
                               1284                 :           4048 :     return state->last_match;
                               1285                 :                : }
                               1286                 :                : 
                               1287                 :                : /*
                               1288                 :                :  * Return the offset of the current match.
                               1289                 :                :  *
                               1290                 :                :  * The offset is in characters, 1-based.
                               1291                 :                :  */
                               1292                 :                : static int
                               1293                 :             39 : text_position_get_match_pos(TextPositionState *state)
                               1294                 :                : {
                               1295                 :                :     /* Convert the byte position to char position. */
 1549 john.naylor@postgres     1296                 :             78 :     state->refpos += pg_mbstrlen_with_len(state->refpoint,
                               1297                 :             39 :                                           state->last_match - state->refpoint);
                               1298                 :             39 :     state->refpoint = state->last_match;
                               1299                 :             39 :     return state->refpos + 1;
                               1300                 :                : }
                               1301                 :                : 
                               1302                 :                : /*
                               1303                 :                :  * Reset search state to the initial state installed by text_position_setup.
                               1304                 :                :  *
                               1305                 :                :  * The next call to text_position_next will search from the beginning
                               1306                 :                :  * of the string.
                               1307                 :                :  */
                               1308                 :                : static void
 1948 tgl@sss.pgh.pa.us        1309                 :              6 : text_position_reset(TextPositionState *state)
                               1310                 :                : {
                               1311                 :              6 :     state->last_match = NULL;
                               1312                 :              6 :     state->refpoint = state->str1;
                               1313                 :              6 :     state->refpos = 0;
                               1314                 :              6 : }
                               1315                 :                : 
                               1316                 :                : static void
 6695 bruce@momjian.us         1317                 :            991 : text_position_cleanup(TextPositionState *state)
                               1318                 :                : {
                               1319                 :                :     /* no cleanup needed */
 7099 tgl@sss.pgh.pa.us        1320                 :            991 : }
                               1321                 :                : 
                               1322                 :                : 
                               1323                 :                : static void
 2550 peter@eisentraut.org     1324                 :        9118149 : check_collation_set(Oid collid)
                               1325                 :                : {
                               1326         [ +  + ]:        9118149 :     if (!OidIsValid(collid))
                               1327                 :                :     {
                               1328                 :                :         /*
                               1329                 :                :          * This typically means that the parser could not resolve a conflict
                               1330                 :                :          * of implicit collations, so report it that way.
                               1331                 :                :          */
                               1332         [ +  - ]:             15 :         ereport(ERROR,
                               1333                 :                :                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
                               1334                 :                :                  errmsg("could not determine which collation to use for string comparison"),
                               1335                 :                :                  errhint("Use the COLLATE clause to set the collation explicitly.")));
                               1336                 :                :     }
                               1337                 :        9118134 : }
                               1338                 :                : 
                               1339                 :                : /*
                               1340                 :                :  * varstr_cmp()
                               1341                 :                :  *
                               1342                 :                :  * Comparison function for text strings with given lengths, using the
                               1343                 :                :  * appropriate locale. Returns an integer less than, equal to, or greater than
                               1344                 :                :  * zero, indicating whether arg1 is less than, equal to, or greater than arg2.
                               1345                 :                :  *
                               1346                 :                :  * Note: many functions that depend on this are marked leakproof; therefore,
                               1347                 :                :  * avoid reporting the actual contents of the input when throwing errors.
                               1348                 :                :  * All errors herein should be things that can't happen except on corrupt
                               1349                 :                :  * data, anyway; otherwise we will have trouble with indexing strings that
                               1350                 :                :  * would cause them.
                               1351                 :                :  */
                               1352                 :                : int
 3057 peter_e@gmx.net          1353                 :        5285589 : varstr_cmp(const char *arg1, int len1, const char *arg2, int len2, Oid collid)
                               1354                 :                : {
                               1355                 :                :     int         result;
                               1356                 :                :     pg_locale_t mylocale;
                               1357                 :                : 
 2550 peter@eisentraut.org     1358                 :        5285589 :     check_collation_set(collid);
                               1359                 :                : 
  557 jdavis@postgresql.or     1360                 :        5285580 :     mylocale = pg_newlocale_from_collation(collid);
                               1361                 :                : 
                               1362         [ +  + ]:        5285580 :     if (mylocale->collate_is_c)
                               1363                 :                :     {
 5563 rhaas@postgresql.org     1364                 :        1980452 :         result = memcmp(arg1, arg2, Min(len1, len2));
 7506 tgl@sss.pgh.pa.us        1365   [ +  +  +  + ]:        1980452 :         if ((result == 0) && (len1 != len2))
                               1366         [ +  + ]:          71219 :             result = (len1 < len2) ? -1 : 1;
                               1367                 :                :     }
                               1368                 :                :     else
                               1369                 :                :     {
                               1370                 :                :         /*
                               1371                 :                :          * memcmp() can't tell us which of two unequal strings sorts first,
                               1372                 :                :          * but it's a cheap way to tell if they're equal.  Testing shows that
                               1373                 :                :          * memcmp() followed by strcoll() is only trivially slower than
                               1374                 :                :          * strcoll() by itself, so we don't lose much if this doesn't work out
                               1375                 :                :          * very often, and if it does - for example, because there are many
                               1376                 :                :          * equal strings in the input - then we win big by avoiding expensive
                               1377                 :                :          * collation-aware comparisons.
                               1378                 :                :          */
 4195 rhaas@postgresql.org     1379   [ +  +  +  + ]:        3305128 :         if (len1 == len2 && memcmp(arg1, arg2, len1) == 0)
                               1380                 :         837835 :             return 0;
                               1381                 :                : 
 1116 jdavis@postgresql.or     1382                 :        2467293 :         result = pg_strncoll(arg1, len1, arg2, len2, mylocale);
                               1383                 :                : 
                               1384                 :                :         /* Break tie if necessary. */
  549                          1385   [ +  +  -  + ]:        2467293 :         if (result == 0 && mylocale->deterministic)
                               1386                 :                :         {
 1116 jdavis@postgresql.or     1387                 :UBC           0 :             result = memcmp(arg1, arg2, Min(len1, len2));
                               1388   [ #  #  #  # ]:              0 :             if ((result == 0) && (len1 != len2))
                               1389         [ #  # ]:              0 :                 result = (len1 < len2) ? -1 : 1;
                               1390                 :                :         }
                               1391                 :                :     }
                               1392                 :                : 
10057 bruce@momjian.us         1393                 :CBC     4447745 :     return result;
                               1394                 :                : }
                               1395                 :                : 
                               1396                 :                : /* text_cmp()
                               1397                 :                :  * Internal comparison function for text strings.
                               1398                 :                :  * Returns -1, 0 or 1
                               1399                 :                :  */
                               1400                 :                : static int
 5514 peter_e@gmx.net          1401                 :        4293564 : text_cmp(text *arg1, text *arg2, Oid collid)
                               1402                 :                : {
                               1403                 :                :     char       *a1p,
                               1404                 :                :                *a2p;
                               1405                 :                :     int         len1,
                               1406                 :                :                 len2;
                               1407                 :                : 
 6918 tgl@sss.pgh.pa.us        1408         [ +  + ]:        4293564 :     a1p = VARDATA_ANY(arg1);
                               1409         [ +  + ]:        4293564 :     a2p = VARDATA_ANY(arg2);
                               1410                 :                : 
                               1411   [ -  +  -  -  :        4293564 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                                     -  -  -  -  +  
                                                 + ]
                               1412   [ -  +  -  -  :        4293564 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                                     -  -  -  -  +  
                                                 + ]
                               1413                 :                : 
 5514 peter_e@gmx.net          1414                 :        4293564 :     return varstr_cmp(a1p, len1, a2p, len2, collid);
                               1415                 :                : }
                               1416                 :                : 
                               1417                 :                : /*
                               1418                 :                :  * Comparison functions for text strings.
                               1419                 :                :  *
                               1420                 :                :  * Note: btree indexes need these routines not to leak memory; therefore,
                               1421                 :                :  * be careful to free working copies of toasted datums.  Most places don't
                               1422                 :                :  * need to be so careful.
                               1423                 :                :  */
                               1424                 :                : 
                               1425                 :                : Datum
 9082 tgl@sss.pgh.pa.us        1426                 :        3402656 : texteq(PG_FUNCTION_ARGS)
                               1427                 :                : {
 2550 peter@eisentraut.org     1428                 :        3402656 :     Oid         collid = PG_GET_COLLATION();
 1403 tgl@sss.pgh.pa.us        1429                 :        3402656 :     pg_locale_t mylocale = 0;
                               1430                 :                :     bool        result;
                               1431                 :                : 
 2550 peter@eisentraut.org     1432                 :        3402656 :     check_collation_set(collid);
                               1433                 :                : 
  587 jdavis@postgresql.or     1434                 :        3402656 :     mylocale = pg_newlocale_from_collation(collid);
                               1435                 :                : 
  549                          1436         [ +  + ]:        3402656 :     if (mylocale->deterministic)
                               1437                 :                :     {
 2550 peter@eisentraut.org     1438                 :        3400476 :         Datum       arg1 = PG_GETARG_DATUM(0);
                               1439                 :        3400476 :         Datum       arg2 = PG_GETARG_DATUM(1);
                               1440                 :                :         Size        len1,
                               1441                 :                :                     len2;
                               1442                 :                : 
                               1443                 :                :         /*
                               1444                 :                :          * Since we only care about equality or not-equality, we can avoid all
                               1445                 :                :          * the expense of strcoll() here, and just do bitwise comparison.  In
                               1446                 :                :          * fact, we don't even have to do a bitwise comparison if we can show
                               1447                 :                :          * the lengths of the strings are unequal; which might save us from
                               1448                 :                :          * having to detoast one or both values.
                               1449                 :                :          */
                               1450                 :        3400476 :         len1 = toast_raw_datum_size(arg1);
                               1451                 :        3400476 :         len2 = toast_raw_datum_size(arg2);
                               1452         [ +  + ]:        3400476 :         if (len1 != len2)
                               1453                 :        1647746 :             result = false;
                               1454                 :                :         else
                               1455                 :                :         {
                               1456                 :        1752730 :             text       *targ1 = DatumGetTextPP(arg1);
                               1457                 :        1752730 :             text       *targ2 = DatumGetTextPP(arg2);
                               1458                 :                : 
                               1459   [ +  +  +  + ]:        1752730 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
                               1460                 :                :                              len1 - VARHDRSZ) == 0);
                               1461                 :                : 
                               1462         [ +  + ]:        1752730 :             PG_FREE_IF_COPY(targ1, 0);
                               1463         [ -  + ]:        1752730 :             PG_FREE_IF_COPY(targ2, 1);
                               1464                 :                :         }
                               1465                 :                :     }
                               1466                 :                :     else
                               1467                 :                :     {
                               1468                 :           2180 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1469                 :           2180 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1470                 :                : 
                               1471                 :           2180 :         result = (text_cmp(arg1, arg2, collid) == 0);
                               1472                 :                : 
                               1473         [ -  + ]:           2180 :         PG_FREE_IF_COPY(arg1, 0);
                               1474         [ -  + ]:           2180 :         PG_FREE_IF_COPY(arg2, 1);
                               1475                 :                :     }
                               1476                 :                : 
 9082 tgl@sss.pgh.pa.us        1477                 :        3402656 :     PG_RETURN_BOOL(result);
                               1478                 :                : }
                               1479                 :                : 
                               1480                 :                : Datum
                               1481                 :         204312 : textne(PG_FUNCTION_ARGS)
                               1482                 :                : {
 2550 peter@eisentraut.org     1483                 :         204312 :     Oid         collid = PG_GET_COLLATION();
                               1484                 :                :     pg_locale_t mylocale;
                               1485                 :                :     bool        result;
                               1486                 :                : 
                               1487                 :         204312 :     check_collation_set(collid);
                               1488                 :                : 
  587 jdavis@postgresql.or     1489                 :         204312 :     mylocale = pg_newlocale_from_collation(collid);
                               1490                 :                : 
  549                          1491         [ +  + ]:         204312 :     if (mylocale->deterministic)
                               1492                 :                :     {
 2550 peter@eisentraut.org     1493                 :         204300 :         Datum       arg1 = PG_GETARG_DATUM(0);
                               1494                 :         204300 :         Datum       arg2 = PG_GETARG_DATUM(1);
                               1495                 :                :         Size        len1,
                               1496                 :                :                     len2;
                               1497                 :                : 
                               1498                 :                :         /* See comment in texteq() */
                               1499                 :         204300 :         len1 = toast_raw_datum_size(arg1);
                               1500                 :         204300 :         len2 = toast_raw_datum_size(arg2);
                               1501         [ +  + ]:         204300 :         if (len1 != len2)
                               1502                 :          11189 :             result = true;
                               1503                 :                :         else
                               1504                 :                :         {
                               1505                 :         193111 :             text       *targ1 = DatumGetTextPP(arg1);
                               1506                 :         193111 :             text       *targ2 = DatumGetTextPP(arg2);
                               1507                 :                : 
                               1508   [ +  +  +  + ]:         193111 :             result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
                               1509                 :                :                              len1 - VARHDRSZ) != 0);
                               1510                 :                : 
                               1511         [ -  + ]:         193111 :             PG_FREE_IF_COPY(targ1, 0);
                               1512         [ -  + ]:         193111 :             PG_FREE_IF_COPY(targ2, 1);
                               1513                 :                :         }
                               1514                 :                :     }
                               1515                 :                :     else
                               1516                 :                :     {
                               1517                 :             12 :         text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1518                 :             12 :         text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1519                 :                : 
                               1520                 :             12 :         result = (text_cmp(arg1, arg2, collid) != 0);
                               1521                 :                : 
                               1522         [ -  + ]:             12 :         PG_FREE_IF_COPY(arg1, 0);
                               1523         [ -  + ]:             12 :         PG_FREE_IF_COPY(arg2, 1);
                               1524                 :                :     }
                               1525                 :                : 
 9082 tgl@sss.pgh.pa.us        1526                 :         204312 :     PG_RETURN_BOOL(result);
                               1527                 :                : }
                               1528                 :                : 
                               1529                 :                : Datum
 9383                          1530                 :         204834 : text_lt(PG_FUNCTION_ARGS)
                               1531                 :                : {
 6918                          1532                 :         204834 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1533                 :         204834 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1534                 :                :     bool        result;
                               1535                 :                : 
 5514 peter_e@gmx.net          1536                 :         204834 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0);
                               1537                 :                : 
 9377 tgl@sss.pgh.pa.us        1538         [ +  + ]:         204825 :     PG_FREE_IF_COPY(arg1, 0);
                               1539         [ -  + ]:         204825 :     PG_FREE_IF_COPY(arg2, 1);
                               1540                 :                : 
                               1541                 :         204825 :     PG_RETURN_BOOL(result);
                               1542                 :                : }
                               1543                 :                : 
                               1544                 :                : Datum
 9383                          1545                 :         158888 : text_le(PG_FUNCTION_ARGS)
                               1546                 :                : {
 6918                          1547                 :         158888 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1548                 :         158888 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1549                 :                :     bool        result;
                               1550                 :                : 
 5514 peter_e@gmx.net          1551                 :         158888 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) <= 0);
                               1552                 :                : 
 9377 tgl@sss.pgh.pa.us        1553         [ +  + ]:         158888 :     PG_FREE_IF_COPY(arg1, 0);
                               1554         [ +  + ]:         158888 :     PG_FREE_IF_COPY(arg2, 1);
                               1555                 :                : 
                               1556                 :         158888 :     PG_RETURN_BOOL(result);
                               1557                 :                : }
                               1558                 :                : 
                               1559                 :                : Datum
 9383                          1560                 :         198054 : text_gt(PG_FUNCTION_ARGS)
                               1561                 :                : {
 6918                          1562                 :         198054 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1563                 :         198054 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1564                 :                :     bool        result;
                               1565                 :                : 
 5514 peter_e@gmx.net          1566                 :         198054 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0);
                               1567                 :                : 
 9377 tgl@sss.pgh.pa.us        1568         [ +  + ]:         198054 :     PG_FREE_IF_COPY(arg1, 0);
                               1569         [ -  + ]:         198054 :     PG_FREE_IF_COPY(arg2, 1);
                               1570                 :                : 
                               1571                 :         198054 :     PG_RETURN_BOOL(result);
                               1572                 :                : }
                               1573                 :                : 
                               1574                 :                : Datum
 9383                          1575                 :          88250 : text_ge(PG_FUNCTION_ARGS)
                               1576                 :                : {
 6918                          1577                 :          88250 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1578                 :          88250 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1579                 :                :     bool        result;
                               1580                 :                : 
 5514 peter_e@gmx.net          1581                 :          88250 :     result = (text_cmp(arg1, arg2, PG_GET_COLLATION()) >= 0);
                               1582                 :                : 
 9377 tgl@sss.pgh.pa.us        1583         [ +  + ]:          88250 :     PG_FREE_IF_COPY(arg1, 0);
                               1584         [ -  + ]:          88250 :     PG_FREE_IF_COPY(arg2, 1);
                               1585                 :                : 
                               1586                 :          88250 :     PG_RETURN_BOOL(result);
                               1587                 :                : }
                               1588                 :                : 
                               1589                 :                : Datum
 2903 teodor@sigaev.ru         1590                 :          18957 : text_starts_with(PG_FUNCTION_ARGS)
                               1591                 :                : {
                               1592                 :          18957 :     Datum       arg1 = PG_GETARG_DATUM(0);
                               1593                 :          18957 :     Datum       arg2 = PG_GETARG_DATUM(1);
 2550 peter@eisentraut.org     1594                 :          18957 :     Oid         collid = PG_GET_COLLATION();
                               1595                 :                :     pg_locale_t mylocale;
                               1596                 :                :     bool        result;
                               1597                 :                :     Size        len1,
                               1598                 :                :                 len2;
                               1599                 :                : 
                               1600                 :          18957 :     check_collation_set(collid);
                               1601                 :                : 
  587 jdavis@postgresql.or     1602                 :          18957 :     mylocale = pg_newlocale_from_collation(collid);
                               1603                 :                : 
  549                          1604         [ -  + ]:          18957 :     if (!mylocale->deterministic)
 2550 peter@eisentraut.org     1605         [ #  # ]:UBC           0 :         ereport(ERROR,
                               1606                 :                :                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
                               1607                 :                :                  errmsg("nondeterministic collations are not supported for substring searches")));
                               1608                 :                : 
 2903 teodor@sigaev.ru         1609                 :CBC       18957 :     len1 = toast_raw_datum_size(arg1);
                               1610                 :          18957 :     len2 = toast_raw_datum_size(arg2);
                               1611         [ -  + ]:          18957 :     if (len2 > len1)
 2903 teodor@sigaev.ru         1612                 :UBC           0 :         result = false;
                               1613                 :                :     else
                               1614                 :                :     {
 2539 sfrost@snowman.net       1615                 :CBC       18957 :         text       *targ1 = text_substring(arg1, 1, len2, false);
 2903 teodor@sigaev.ru         1616                 :          18957 :         text       *targ2 = DatumGetTextPP(arg2);
                               1617                 :                : 
                               1618   [ -  +  -  + ]:          18957 :         result = (memcmp(VARDATA_ANY(targ1), VARDATA_ANY(targ2),
 2903 teodor@sigaev.ru         1619   [ -  +  -  -  :ECB     (18957) :                          VARSIZE_ANY_EXHDR(targ2)) == 0);
                                     -  -  -  -  -  
                                                 + ]
                               1620                 :                : 
 2903 teodor@sigaev.ru         1621         [ +  - ]:CBC       18957 :         PG_FREE_IF_COPY(targ1, 0);
                               1622         [ -  + ]:          18957 :         PG_FREE_IF_COPY(targ2, 1);
                               1623                 :                :     }
                               1624                 :                : 
                               1625                 :          18957 :     PG_RETURN_BOOL(result);
                               1626                 :                : }
                               1627                 :                : 
                               1628                 :                : Datum
 9082 tgl@sss.pgh.pa.us        1629                 :        3483528 : bttextcmp(PG_FUNCTION_ARGS)
                               1630                 :                : {
 6918                          1631                 :        3483528 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               1632                 :        3483528 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               1633                 :                :     int32       result;
                               1634                 :                : 
 5514 peter_e@gmx.net          1635                 :        3483528 :     result = text_cmp(arg1, arg2, PG_GET_COLLATION());
                               1636                 :                : 
 9082 tgl@sss.pgh.pa.us        1637         [ +  + ]:        3483528 :     PG_FREE_IF_COPY(arg1, 0);
                               1638         [ +  + ]:        3483528 :     PG_FREE_IF_COPY(arg2, 1);
                               1639                 :                : 
                               1640                 :        3483528 :     PG_RETURN_INT32(result);
                               1641                 :                : }
                               1642                 :                : 
                               1643                 :                : Datum
 4231 rhaas@postgresql.org     1644                 :          44946 : bttextsortsupport(PG_FUNCTION_ARGS)
                               1645                 :                : {
 3949 bruce@momjian.us         1646                 :          44946 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
                               1647                 :          44946 :     Oid         collid = ssup->ssup_collation;
                               1648                 :                :     MemoryContext oldcontext;
                               1649                 :                : 
 4231 rhaas@postgresql.org     1650                 :          44946 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
                               1651                 :                : 
                               1652                 :                :     /* Use generic string SortSupport */
 2643 tgl@sss.pgh.pa.us        1653                 :          44946 :     varstr_sortsupport(ssup, TEXTOID, collid);
                               1654                 :                : 
 4231 rhaas@postgresql.org     1655                 :          44940 :     MemoryContextSwitchTo(oldcontext);
                               1656                 :                : 
                               1657                 :          44940 :     PG_RETURN_VOID();
                               1658                 :                : }
                               1659                 :                : 
                               1660                 :                : /*
                               1661                 :                :  * Generic sortsupport interface for character type's operator classes.
                               1662                 :                :  * Includes locale support, and support for BpChar semantics (i.e. removing
                               1663                 :                :  * trailing spaces before comparison).
                               1664                 :                :  *
                               1665                 :                :  * Relies on the assumption that text, VarChar, and BpChar all have the
                               1666                 :                :  * same representation.
                               1667                 :                :  */
                               1668                 :                : void
 2621 tgl@sss.pgh.pa.us        1669                 :          72203 : varstr_sortsupport(SortSupport ssup, Oid typid, Oid collid)
                               1670                 :                : {
 3949 bruce@momjian.us         1671                 :          72203 :     bool        abbreviate = ssup->abbreviate;
                               1672                 :          72203 :     bool        collate_c = false;
                               1673                 :                :     VarStringSortSupport *sss;
                               1674                 :                :     pg_locale_t locale;
                               1675                 :                : 
 2550 peter@eisentraut.org     1676                 :          72203 :     check_collation_set(collid);
                               1677                 :                : 
  557 jdavis@postgresql.or     1678                 :          72197 :     locale = pg_newlocale_from_collation(collid);
                               1679                 :                : 
                               1680                 :                :     /*
                               1681                 :                :      * If possible, set ssup->comparator to a function which can be used to
                               1682                 :                :      * directly compare two datums.  If we can do this, we'll avoid the
                               1683                 :                :      * overhead of a trip through the fmgr layer for every comparison, which
                               1684                 :                :      * can be substantial.
                               1685                 :                :      *
                               1686                 :                :      * Most typically, we'll set the comparator to varlenafastcmp_locale,
                               1687                 :                :      * which uses strcoll() to perform comparisons.  We use that for the
                               1688                 :                :      * BpChar case too, but type NAME uses namefastcmp_locale. However, if
                               1689                 :                :      * LC_COLLATE = C, we can make things quite a bit faster with
                               1690                 :                :      * varstrfastcmp_c, bpcharfastcmp_c, or namefastcmp_c, all of which use
                               1691                 :                :      * memcmp() rather than strcoll().
                               1692                 :                :      */
                               1693         [ +  + ]:          72197 :     if (locale->collate_is_c)
                               1694                 :                :     {
 2621 tgl@sss.pgh.pa.us        1695         [ +  + ]:          48347 :         if (typid == BPCHAROID)
 3693 rhaas@postgresql.org     1696                 :            155 :             ssup->comparator = bpcharfastcmp_c;
 2621 tgl@sss.pgh.pa.us        1697         [ +  + ]:          48192 :         else if (typid == NAMEOID)
                               1698                 :                :         {
 2643                          1699                 :          26738 :             ssup->comparator = namefastcmp_c;
                               1700                 :                :             /* Not supporting abbreviation with type NAME, for now */
                               1701                 :          26738 :             abbreviate = false;
                               1702                 :                :         }
                               1703                 :                :         else
                               1704                 :          21454 :             ssup->comparator = varstrfastcmp_c;
                               1705                 :                : 
 4070 rhaas@postgresql.org     1706                 :          48347 :         collate_c = true;
                               1707                 :                :     }
                               1708                 :                :     else
                               1709                 :                :     {
                               1710                 :                :         /*
                               1711                 :                :          * We use varlenafastcmp_locale except for type NAME.
                               1712                 :                :          */
 2621 tgl@sss.pgh.pa.us        1713         [ -  + ]:          23850 :         if (typid == NAMEOID)
                               1714                 :                :         {
 2643 tgl@sss.pgh.pa.us        1715                 :UBC           0 :             ssup->comparator = namefastcmp_locale;
                               1716                 :                :             /* Not supporting abbreviation with type NAME, for now */
                               1717                 :              0 :             abbreviate = false;
                               1718                 :                :         }
                               1719                 :                :         else
 2643 tgl@sss.pgh.pa.us        1720                 :CBC       23850 :             ssup->comparator = varlenafastcmp_locale;
                               1721                 :                : 
                               1722                 :                :         /*
                               1723                 :                :          * Unfortunately, it seems that abbreviation for non-C collations is
                               1724                 :                :          * broken on many common platforms; see pg_strxfrm_enabled().
                               1725                 :                :          *
                               1726                 :                :          * Even apart from the risk of broken locales, it's possible that
                               1727                 :                :          * there are platforms where the use of abbreviated keys should be
                               1728                 :                :          * disabled at compile time.  For example, macOS's strxfrm()
                               1729                 :                :          * implementation is known to not effectively concentrate a
                               1730                 :                :          * significant amount of entropy from the original string in earlier
                               1731                 :                :          * transformed blobs.  It's possible that other supported platforms
                               1732                 :                :          * are similarly encumbered.  So, if we ever get past disabling this
                               1733                 :                :          * categorically, we may still want or need to disable it for
                               1734                 :                :          * particular platforms.
                               1735                 :                :          */
  572 jdavis@postgresql.or     1736         [ +  + ]:          23850 :         if (!pg_strxfrm_enabled(locale))
                               1737                 :          23452 :             abbreviate = false;
                               1738                 :                :     }
                               1739                 :                : 
                               1740                 :                :     /*
                               1741                 :                :      * If we're using abbreviated keys, or if we're using a locale-aware
                               1742                 :                :      * comparison, we need to initialize a VarStringSortSupport object. Both
                               1743                 :                :      * cases will make use of the temporary buffers we initialize here for
                               1744                 :                :      * scratch space (and to detect requirement for BpChar semantics from
                               1745                 :                :      * caller), and the abbreviation case requires additional state.
                               1746                 :                :      */
 4070 rhaas@postgresql.org     1747   [ +  +  +  + ]:          72197 :     if (abbreviate || !collate_c)
                               1748                 :                :     {
   95 michael@paquier.xyz      1749                 :GNC       35928 :         sss = palloc_object(VarStringSortSupport);
 3693 rhaas@postgresql.org     1750                 :CBC       35928 :         sss->buf1 = palloc(TEXTBUFLEN);
                               1751                 :          35928 :         sss->buflen1 = TEXTBUFLEN;
                               1752                 :          35928 :         sss->buf2 = palloc(TEXTBUFLEN);
                               1753                 :          35928 :         sss->buflen2 = TEXTBUFLEN;
                               1754                 :                :         /* Start with invalid values */
                               1755                 :          35928 :         sss->last_len1 = -1;
                               1756                 :          35928 :         sss->last_len2 = -1;
                               1757                 :                :         /* Initialize */
                               1758                 :          35928 :         sss->last_returned = 0;
  557 jdavis@postgresql.or     1759         [ +  + ]:          35928 :         if (collate_c)
                               1760                 :          12078 :             sss->locale = NULL;
                               1761                 :                :         else
                               1762                 :          23850 :             sss->locale = locale;
                               1763                 :                : 
                               1764                 :                :         /*
                               1765                 :                :          * To avoid somehow confusing a strxfrm() blob and an original string,
                               1766                 :                :          * constantly keep track of the variety of data that buf1 and buf2
                               1767                 :                :          * currently contain.
                               1768                 :                :          *
                               1769                 :                :          * Comparisons may be interleaved with conversion calls.  Frequently,
                               1770                 :                :          * conversions and comparisons are batched into two distinct phases,
                               1771                 :                :          * but the correctness of caching cannot hinge upon this.  For
                               1772                 :                :          * comparison caching, buffer state is only trusted if cache_blob is
                               1773                 :                :          * found set to false, whereas strxfrm() caching only trusts the state
                               1774                 :                :          * when cache_blob is found set to true.
                               1775                 :                :          *
                               1776                 :                :          * Arbitrarily initialize cache_blob to true.
                               1777                 :                :          */
 3693 rhaas@postgresql.org     1778                 :          35928 :         sss->cache_blob = true;
                               1779                 :          35928 :         sss->collate_c = collate_c;
 2621 tgl@sss.pgh.pa.us        1780                 :          35928 :         sss->typid = typid;
 3693 rhaas@postgresql.org     1781                 :          35928 :         ssup->ssup_extra = sss;
                               1782                 :                : 
                               1783                 :                :         /*
                               1784                 :                :          * If possible, plan to use the abbreviated keys optimization.  The
                               1785                 :                :          * core code may switch back to authoritative comparator should
                               1786                 :                :          * abbreviation be aborted.
                               1787                 :                :          */
 4070                          1788         [ +  + ]:          35928 :         if (abbreviate)
                               1789                 :                :         {
 3693                          1790                 :          12377 :             sss->prop_card = 0.20;
                               1791                 :          12377 :             initHyperLogLog(&sss->abbr_card, 10);
                               1792                 :          12377 :             initHyperLogLog(&sss->full_card, 10);
 4070                          1793                 :          12377 :             ssup->abbrev_full_comparator = ssup->comparator;
 1443 john.naylor@postgres     1794                 :          12377 :             ssup->comparator = ssup_datum_unsigned_cmp;
 3693 rhaas@postgresql.org     1795                 :          12377 :             ssup->abbrev_converter = varstr_abbrev_convert;
                               1796                 :          12377 :             ssup->abbrev_abort = varstr_abbrev_abort;
                               1797                 :                :         }
                               1798                 :                :     }
 4231                          1799                 :          72197 : }
                               1800                 :                : 
                               1801                 :                : /*
                               1802                 :                :  * sortsupport comparison func (for C locale case)
                               1803                 :                :  */
                               1804                 :                : static int
 3693                          1805                 :       24922029 : varstrfastcmp_c(Datum x, Datum y, SortSupport ssup)
                               1806                 :                : {
 3688 tgl@sss.pgh.pa.us        1807                 :       24922029 :     VarString  *arg1 = DatumGetVarStringPP(x);
                               1808                 :       24922029 :     VarString  *arg2 = DatumGetVarStringPP(y);
                               1809                 :                :     char       *a1p,
                               1810                 :                :                *a2p;
                               1811                 :                :     int         len1,
                               1812                 :                :                 len2,
                               1813                 :                :                 result;
                               1814                 :                : 
 4231 rhaas@postgresql.org     1815         [ +  + ]:       24922029 :     a1p = VARDATA_ANY(arg1);
                               1816         [ +  + ]:       24922029 :     a2p = VARDATA_ANY(arg2);
                               1817                 :                : 
                               1818   [ -  +  -  -  :       24922029 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                                     -  -  -  -  +  
                                                 + ]
                               1819   [ -  +  -  -  :       24922029 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                                     -  -  -  -  +  
                                                 + ]
                               1820                 :                : 
                               1821                 :       24922029 :     result = memcmp(a1p, a2p, Min(len1, len2));
                               1822   [ +  +  +  + ]:       24922029 :     if ((result == 0) && (len1 != len2))
                               1823         [ +  + ]:         770161 :         result = (len1 < len2) ? -1 : 1;
                               1824                 :                : 
                               1825                 :                :     /* We can't afford to leak memory here. */
                               1826         [ -  + ]:       24922029 :     if (PointerGetDatum(arg1) != x)
 4231 rhaas@postgresql.org     1827                 :LBC         (1) :         pfree(arg1);
 4231 rhaas@postgresql.org     1828         [ -  + ]:CBC    24922029 :     if (PointerGetDatum(arg2) != y)
 4231 rhaas@postgresql.org     1829                 :LBC         (1) :         pfree(arg2);
                               1830                 :                : 
 4231 rhaas@postgresql.org     1831                 :CBC    24922029 :     return result;
                               1832                 :                : }
                               1833                 :                : 
                               1834                 :                : /*
                               1835                 :                :  * sortsupport comparison func (for BpChar C locale case)
                               1836                 :                :  *
                               1837                 :                :  * BpChar outsources its sortsupport to this module.  Specialization for the
                               1838                 :                :  * varstr_sortsupport BpChar case, modeled on
                               1839                 :                :  * internal_bpchar_pattern_compare().
                               1840                 :                :  */
                               1841                 :                : static int
 3693                          1842                 :          31934 : bpcharfastcmp_c(Datum x, Datum y, SortSupport ssup)
                               1843                 :                : {
                               1844                 :          31934 :     BpChar     *arg1 = DatumGetBpCharPP(x);
                               1845                 :          31934 :     BpChar     *arg2 = DatumGetBpCharPP(y);
                               1846                 :                :     char       *a1p,
                               1847                 :                :                *a2p;
                               1848                 :                :     int         len1,
                               1849                 :                :                 len2,
                               1850                 :                :                 result;
                               1851                 :                : 
                               1852         [ +  + ]:          31934 :     a1p = VARDATA_ANY(arg1);
                               1853         [ +  + ]:          31934 :     a2p = VARDATA_ANY(arg2);
                               1854                 :                : 
                               1855   [ -  +  -  -  :          31934 :     len1 = bpchartruelen(a1p, VARSIZE_ANY_EXHDR(arg1));
                                     -  -  -  -  +  
                                                 + ]
                               1856   [ -  +  -  -  :          31934 :     len2 = bpchartruelen(a2p, VARSIZE_ANY_EXHDR(arg2));
                                     -  -  -  -  +  
                                                 + ]
                               1857                 :                : 
                               1858                 :          31934 :     result = memcmp(a1p, a2p, Min(len1, len2));
                               1859   [ +  +  +  + ]:          31934 :     if ((result == 0) && (len1 != len2))
                               1860         [ -  + ]:              2 :         result = (len1 < len2) ? -1 : 1;
                               1861                 :                : 
                               1862                 :                :     /* We can't afford to leak memory here. */
                               1863         [ -  + ]:          31934 :     if (PointerGetDatum(arg1) != x)
 3693 rhaas@postgresql.org     1864                 :UBC           0 :         pfree(arg1);
 3693 rhaas@postgresql.org     1865         [ -  + ]:CBC       31934 :     if (PointerGetDatum(arg2) != y)
 3693 rhaas@postgresql.org     1866                 :UBC           0 :         pfree(arg2);
                               1867                 :                : 
 3693 rhaas@postgresql.org     1868                 :CBC       31934 :     return result;
                               1869                 :                : }
                               1870                 :                : 
                               1871                 :                : /*
                               1872                 :                :  * sortsupport comparison func (for NAME C locale case)
                               1873                 :                :  */
                               1874                 :                : static int
 2643 tgl@sss.pgh.pa.us        1875                 :       24122837 : namefastcmp_c(Datum x, Datum y, SortSupport ssup)
                               1876                 :                : {
                               1877                 :       24122837 :     Name        arg1 = DatumGetName(x);
                               1878                 :       24122837 :     Name        arg2 = DatumGetName(y);
                               1879                 :                : 
                               1880                 :       24122837 :     return strncmp(NameStr(*arg1), NameStr(*arg2), NAMEDATALEN);
                               1881                 :                : }
                               1882                 :                : 
                               1883                 :                : /*
                               1884                 :                :  * sortsupport comparison func (for locale case with all varlena types)
                               1885                 :                :  */
                               1886                 :                : static int
                               1887                 :       15372356 : varlenafastcmp_locale(Datum x, Datum y, SortSupport ssup)
                               1888                 :                : {
 3688                          1889                 :       15372356 :     VarString  *arg1 = DatumGetVarStringPP(x);
                               1890                 :       15372356 :     VarString  *arg2 = DatumGetVarStringPP(y);
                               1891                 :                :     char       *a1p,
                               1892                 :                :                *a2p;
                               1893                 :                :     int         len1,
                               1894                 :                :                 len2,
                               1895                 :                :                 result;
                               1896                 :                : 
 4231 rhaas@postgresql.org     1897         [ +  + ]:       15372356 :     a1p = VARDATA_ANY(arg1);
                               1898         [ +  + ]:       15372356 :     a2p = VARDATA_ANY(arg2);
                               1899                 :                : 
                               1900   [ -  +  -  -  :       15372356 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                                     -  -  -  -  +  
                                                 + ]
                               1901   [ -  +  -  -  :       15372356 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                                     -  -  -  -  +  
                                                 + ]
                               1902                 :                : 
 2643 tgl@sss.pgh.pa.us        1903                 :       15372356 :     result = varstrfastcmp_locale(a1p, len1, a2p, len2, ssup);
                               1904                 :                : 
                               1905                 :                :     /* We can't afford to leak memory here. */
                               1906         [ -  + ]:       15372356 :     if (PointerGetDatum(arg1) != x)
 2643 tgl@sss.pgh.pa.us        1907                 :LBC         (2) :         pfree(arg1);
 2643 tgl@sss.pgh.pa.us        1908         [ -  + ]:CBC    15372356 :     if (PointerGetDatum(arg2) != y)
 2643 tgl@sss.pgh.pa.us        1909                 :LBC         (2) :         pfree(arg2);
                               1910                 :                : 
 2643 tgl@sss.pgh.pa.us        1911                 :CBC    15372356 :     return result;
                               1912                 :                : }
                               1913                 :                : 
                               1914                 :                : /*
                               1915                 :                :  * sortsupport comparison func (for locale case with NAME type)
                               1916                 :                :  */
                               1917                 :                : static int
 2643 tgl@sss.pgh.pa.us        1918                 :UBC           0 : namefastcmp_locale(Datum x, Datum y, SortSupport ssup)
                               1919                 :                : {
                               1920                 :              0 :     Name        arg1 = DatumGetName(x);
                               1921                 :              0 :     Name        arg2 = DatumGetName(y);
                               1922                 :                : 
                               1923                 :              0 :     return varstrfastcmp_locale(NameStr(*arg1), strlen(NameStr(*arg1)),
                               1924                 :              0 :                                 NameStr(*arg2), strlen(NameStr(*arg2)),
                               1925                 :                :                                 ssup);
                               1926                 :                : }
                               1927                 :                : 
                               1928                 :                : /*
                               1929                 :                :  * sortsupport comparison func for locale cases
                               1930                 :                :  */
                               1931                 :                : static int
 2643 tgl@sss.pgh.pa.us        1932                 :CBC    15372356 : varstrfastcmp_locale(char *a1p, int len1, char *a2p, int len2, SortSupport ssup)
                               1933                 :                : {
                               1934                 :       15372356 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
                               1935                 :                :     int         result;
                               1936                 :                :     bool        arg1_match;
                               1937                 :                : 
                               1938                 :                :     /* Fast pre-check for equality, as discussed in varstr_cmp() */
 4195 rhaas@postgresql.org     1939   [ +  +  +  + ]:       15372356 :     if (len1 == len2 && memcmp(a1p, a2p, len1) == 0)
                               1940                 :                :     {
                               1941                 :                :         /*
                               1942                 :                :          * No change in buf1 or buf2 contents, so avoid changing last_len1 or
                               1943                 :                :          * last_len2.  Existing contents of buffers might still be used by
                               1944                 :                :          * next call.
                               1945                 :                :          *
                               1946                 :                :          * It's fine to allow the comparison of BpChar padding bytes here,
                               1947                 :                :          * even though that implies that the memcmp() will usually be
                               1948                 :                :          * performed for BpChar callers (though multibyte characters could
                               1949                 :                :          * still prevent that from occurring).  The memcmp() is still very
                               1950                 :                :          * cheap, and BpChar's funny semantics have us remove trailing spaces
                               1951                 :                :          * (not limited to padding), so we need make no distinction between
                               1952                 :                :          * padding space characters and "real" space characters.
                               1953                 :                :          */
 2643 tgl@sss.pgh.pa.us        1954                 :        4823865 :         return 0;
                               1955                 :                :     }
                               1956                 :                : 
 2621                          1957         [ +  + ]:       10548491 :     if (sss->typid == BPCHAROID)
                               1958                 :                :     {
                               1959                 :                :         /* Get true number of bytes, ignoring trailing spaces */
 3693 rhaas@postgresql.org     1960                 :          17749 :         len1 = bpchartruelen(a1p, len1);
                               1961                 :          17749 :         len2 = bpchartruelen(a2p, len2);
                               1962                 :                :     }
                               1963                 :                : 
                               1964         [ +  + ]:       10548491 :     if (len1 >= sss->buflen1)
                               1965                 :                :     {
                               1966         [ +  - ]:              5 :         sss->buflen1 = Max(len1 + 1, Min(sss->buflen1 * 2, MaxAllocSize));
 1309 tgl@sss.pgh.pa.us        1967                 :              5 :         sss->buf1 = repalloc(sss->buf1, sss->buflen1);
                               1968                 :                :     }
 3693 rhaas@postgresql.org     1969         [ +  + ]:       10548491 :     if (len2 >= sss->buflen2)
                               1970                 :                :     {
                               1971         [ +  - ]:              3 :         sss->buflen2 = Max(len2 + 1, Min(sss->buflen2 * 2, MaxAllocSize));
 1309 tgl@sss.pgh.pa.us        1972                 :              3 :         sss->buf2 = repalloc(sss->buf2, sss->buflen2);
                               1973                 :                :     }
                               1974                 :                : 
                               1975                 :                :     /*
                               1976                 :                :      * We're likely to be asked to compare the same strings repeatedly, and
                               1977                 :                :      * memcmp() is so much cheaper than strcoll() that it pays to try to cache
                               1978                 :                :      * comparisons, even though in general there is no reason to think that
                               1979                 :                :      * that will work out (every string datum may be unique).  Caching does
                               1980                 :                :      * not slow things down measurably when it doesn't work out, and can speed
                               1981                 :                :      * things up by rather a lot when it does.  In part, this is because the
                               1982                 :                :      * memcmp() compares data from cachelines that are needed in L1 cache even
                               1983                 :                :      * when the last comparison's result cannot be reused.
                               1984                 :                :      */
 3810 rhaas@postgresql.org     1985                 :       10548491 :     arg1_match = true;
 3693                          1986   [ +  +  +  + ]:       10548491 :     if (len1 != sss->last_len1 || memcmp(sss->buf1, a1p, len1) != 0)
                               1987                 :                :     {
 3810                          1988                 :        9577917 :         arg1_match = false;
 3693                          1989                 :        9577917 :         memcpy(sss->buf1, a1p, len1);
                               1990                 :        9577917 :         sss->buf1[len1] = '\0';
                               1991                 :        9577917 :         sss->last_len1 = len1;
                               1992                 :                :     }
                               1993                 :                : 
                               1994                 :                :     /*
                               1995                 :                :      * If we're comparing the same two strings as last time, we can return the
                               1996                 :                :      * same answer without calling strcoll() again.  This is more likely than
                               1997                 :                :      * it seems (at least with moderate to low cardinality sets), because
                               1998                 :                :      * quicksort compares the same pivot against many values.
                               1999                 :                :      */
                               2000   [ +  +  +  + ]:       10548491 :     if (len2 != sss->last_len2 || memcmp(sss->buf2, a2p, len2) != 0)
                               2001                 :                :     {
                               2002                 :        1644030 :         memcpy(sss->buf2, a2p, len2);
                               2003                 :        1644030 :         sss->buf2[len2] = '\0';
                               2004                 :        1644030 :         sss->last_len2 = len2;
                               2005                 :                :     }
                               2006   [ +  +  +  - ]:        8904461 :     else if (arg1_match && !sss->cache_blob)
                               2007                 :                :     {
                               2008                 :                :         /* Use result cached following last actual strcoll() call */
 2643 tgl@sss.pgh.pa.us        2009                 :         804396 :         return sss->last_returned;
                               2010                 :                :     }
                               2011                 :                : 
 1116 jdavis@postgresql.or     2012                 :        9744095 :     result = pg_strcoll(sss->buf1, sss->buf2, sss->locale);
                               2013                 :                : 
                               2014                 :                :     /* Break tie if necessary. */
  549                          2015   [ +  +  -  + ]:        9744095 :     if (result == 0 && sss->locale->deterministic)
 3693 rhaas@postgresql.org     2016                 :UBC           0 :         result = strcmp(sss->buf1, sss->buf2);
                               2017                 :                : 
                               2018                 :                :     /* Cache result, perhaps saving an expensive strcoll() call next time */
 3693 rhaas@postgresql.org     2019                 :CBC     9744095 :     sss->cache_blob = false;
                               2020                 :        9744095 :     sss->last_returned = result;
 4231                          2021                 :        9744095 :     return result;
                               2022                 :                : }
                               2023                 :                : 
                               2024                 :                : /*
                               2025                 :                :  * Conversion routine for sortsupport.  Converts original to abbreviated key
                               2026                 :                :  * representation.  Our encoding strategy is simple -- pack the first 8 bytes
                               2027                 :                :  * of a strxfrm() blob into a Datum (on little-endian machines, the 8 bytes are
                               2028                 :                :  * stored in reverse order), and treat it as an unsigned integer.  When the "C"
                               2029                 :                :  * locale is used just memcpy() from original instead.
                               2030                 :                :  */
                               2031                 :                : static Datum
 3693                          2032                 :         422572 : varstr_abbrev_convert(Datum original, SortSupport ssup)
                               2033                 :                : {
 1116 jdavis@postgresql.or     2034                 :         422572 :     const size_t max_prefix_bytes = sizeof(Datum);
 3688 tgl@sss.pgh.pa.us        2035                 :         422572 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
                               2036                 :         422572 :     VarString  *authoritative = DatumGetVarStringPP(original);
                               2037         [ +  + ]:         422572 :     char       *authoritative_data = VARDATA_ANY(authoritative);
                               2038                 :                : 
                               2039                 :                :     /* working state */
                               2040                 :                :     Datum       res;
                               2041                 :                :     char       *pres;
                               2042                 :                :     int         len;
                               2043                 :                :     uint32      hash;
                               2044                 :                : 
 4073 rhaas@postgresql.org     2045                 :         422572 :     pres = (char *) &res;
                               2046                 :                :     /* memset(), so any non-overwritten bytes are NUL */
 1116 jdavis@postgresql.or     2047                 :         422572 :     memset(pres, 0, max_prefix_bytes);
 4073 rhaas@postgresql.org     2048   [ -  +  -  -  :         422572 :     len = VARSIZE_ANY_EXHDR(authoritative);
                                     -  -  -  -  +  
                                                 + ]
                               2049                 :                : 
                               2050                 :                :     /* Get number of bytes, ignoring trailing spaces */
 2621 tgl@sss.pgh.pa.us        2051         [ +  + ]:         422572 :     if (sss->typid == BPCHAROID)
 3693 rhaas@postgresql.org     2052                 :            505 :         len = bpchartruelen(authoritative_data, len);
                               2053                 :                : 
                               2054                 :                :     /*
                               2055                 :                :      * If we're using the C collation, use memcpy(), rather than strxfrm(), to
                               2056                 :                :      * abbreviate keys.  The full comparator for the C locale is also
                               2057                 :                :      * memcmp().  This should be faster than strxfrm().
                               2058                 :                :      */
                               2059         [ +  + ]:         422572 :     if (sss->collate_c)
 1116 jdavis@postgresql.or     2060                 :         421654 :         memcpy(pres, authoritative_data, Min(len, max_prefix_bytes));
                               2061                 :                :     else
                               2062                 :                :     {
                               2063                 :                :         Size        bsize;
                               2064                 :                : 
                               2065                 :                :         /*
                               2066                 :                :          * We're not using the C collation, so fall back on strxfrm or ICU
                               2067                 :                :          * analogs.
                               2068                 :                :          */
                               2069                 :                : 
                               2070                 :                :         /* By convention, we use buffer 1 to store and NUL-terminate */
 3693 rhaas@postgresql.org     2071         [ -  + ]:            918 :         if (len >= sss->buflen1)
                               2072                 :                :         {
 3693 rhaas@postgresql.org     2073         [ #  # ]:UBC           0 :             sss->buflen1 = Max(len + 1, Min(sss->buflen1 * 2, MaxAllocSize));
 1309 tgl@sss.pgh.pa.us        2074                 :              0 :             sss->buf1 = repalloc(sss->buf1, sss->buflen1);
                               2075                 :                :         }
                               2076                 :                : 
                               2077                 :                :         /* Might be able to reuse strxfrm() blob from last call */
 3693 rhaas@postgresql.org     2078   [ +  +  +  - ]:CBC         918 :         if (sss->last_len1 == len && sss->cache_blob &&
                               2079         [ +  + ]:            459 :             memcmp(sss->buf1, authoritative_data, len) == 0)
                               2080                 :                :         {
 1116 jdavis@postgresql.or     2081                 :             84 :             memcpy(pres, sss->buf2, Min(max_prefix_bytes, sss->last_len2));
                               2082                 :                :             /* No change affecting cardinality, so no hashing required */
 3810 rhaas@postgresql.org     2083                 :             84 :             goto done;
                               2084                 :                :         }
                               2085                 :                : 
 3693                          2086                 :            834 :         memcpy(sss->buf1, authoritative_data, len);
                               2087                 :                : 
                               2088                 :                :         /*
                               2089                 :                :          * pg_strxfrm() and pg_strxfrm_prefix expect NUL-terminated strings.
                               2090                 :                :          */
                               2091                 :            834 :         sss->buf1[len] = '\0';
                               2092                 :            834 :         sss->last_len1 = len;
                               2093                 :                : 
 1116 jdavis@postgresql.or     2094         [ +  - ]:            834 :         if (pg_strxfrm_prefix_enabled(sss->locale))
                               2095                 :                :         {
                               2096         [ -  + ]:            834 :             if (sss->buflen2 < max_prefix_bytes)
                               2097                 :                :             {
 1116 jdavis@postgresql.or     2098         [ #  # ]:UBC           0 :                 sss->buflen2 = Max(max_prefix_bytes,
                               2099                 :                :                                    Min(sss->buflen2 * 2, MaxAllocSize));
                               2100                 :              0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
                               2101                 :                :             }
                               2102                 :                : 
 1116 jdavis@postgresql.or     2103                 :CBC         834 :             bsize = pg_strxfrm_prefix(sss->buf2, sss->buf1,
                               2104                 :                :                                       max_prefix_bytes, sss->locale);
 1086                          2105                 :            834 :             sss->last_len2 = bsize;
                               2106                 :                :         }
                               2107                 :                :         else
                               2108                 :                :         {
                               2109                 :                :             /*
                               2110                 :                :              * Loop: Call pg_strxfrm(), possibly enlarge buffer, and try
                               2111                 :                :              * again.  The pg_strxfrm() function leaves the result buffer
                               2112                 :                :              * content undefined if the result did not fit, so we need to
                               2113                 :                :              * retry until everything fits, even though we only need the first
                               2114                 :                :              * few bytes in the end.
                               2115                 :                :              */
                               2116                 :                :             for (;;)
                               2117                 :                :             {
 1116 jdavis@postgresql.or     2118                 :UBC           0 :                 bsize = pg_strxfrm(sss->buf2, sss->buf1, sss->buflen2,
                               2119                 :                :                                    sss->locale);
                               2120                 :                : 
                               2121                 :              0 :                 sss->last_len2 = bsize;
                               2122         [ #  # ]:              0 :                 if (bsize < sss->buflen2)
                               2123                 :              0 :                     break;
                               2124                 :                : 
                               2125                 :                :                 /*
                               2126                 :                :                  * Grow buffer and retry.
                               2127                 :                :                  */
                               2128         [ #  # ]:              0 :                 sss->buflen2 = Max(bsize + 1,
                               2129                 :                :                                    Min(sss->buflen2 * 2, MaxAllocSize));
                               2130                 :              0 :                 sss->buf2 = repalloc(sss->buf2, sss->buflen2);
                               2131                 :                :             }
                               2132                 :                :         }
                               2133                 :                : 
                               2134                 :                :         /*
                               2135                 :                :          * Every Datum byte is always compared.  This is safe because the
                               2136                 :                :          * strxfrm() blob is itself NUL terminated, leaving no danger of
                               2137                 :                :          * misinterpreting any NUL bytes not intended to be interpreted as
                               2138                 :                :          * logically representing termination.
                               2139                 :                :          */
 1116 jdavis@postgresql.or     2140                 :CBC         834 :         memcpy(pres, sss->buf2, Min(max_prefix_bytes, bsize));
                               2141                 :                :     }
                               2142                 :                : 
                               2143                 :                :     /*
                               2144                 :                :      * Maintain approximate cardinality of both abbreviated keys and original,
                               2145                 :                :      * authoritative keys using HyperLogLog.  Used as cheap insurance against
                               2146                 :                :      * the worst case, where we do many string transformations for no saving
                               2147                 :                :      * in full strcoll()-based comparisons.  These statistics are used by
                               2148                 :                :      * varstr_abbrev_abort().
                               2149                 :                :      *
                               2150                 :                :      * First, Hash key proper, or a significant fraction of it.  Mix in length
                               2151                 :                :      * in order to compensate for cases where differences are past
                               2152                 :                :      * PG_CACHE_LINE_SIZE bytes, so as to limit the overhead of hashing.
                               2153                 :                :      */
 4000 rhaas@postgresql.org     2154                 :         422488 :     hash = DatumGetUInt32(hash_any((unsigned char *) authoritative_data,
                               2155                 :                :                                    Min(len, PG_CACHE_LINE_SIZE)));
                               2156                 :                : 
 4073                          2157         [ +  + ]:         422488 :     if (len > PG_CACHE_LINE_SIZE)
                               2158                 :             96 :         hash ^= DatumGetUInt32(hash_uint32((uint32) len));
                               2159                 :                : 
 3693                          2160                 :         422488 :     addHyperLogLog(&sss->full_card, hash);
                               2161                 :                : 
                               2162                 :                :     /* Hash abbreviated key */
                               2163                 :                :     {
                               2164                 :                :         uint32      tmp;
                               2165                 :                : 
  214 tgl@sss.pgh.pa.us        2166                 :GNC      422488 :         tmp = DatumGetUInt32(res) ^ (uint32) (DatumGetUInt64(res) >> 32);
                               2167                 :         422488 :         hash = DatumGetUInt32(hash_uint32(tmp));
                               2168                 :                :     }
                               2169                 :                : 
 3693 rhaas@postgresql.org     2170                 :CBC      422488 :     addHyperLogLog(&sss->abbr_card, hash);
                               2171                 :                : 
                               2172                 :                :     /* Cache result, perhaps saving an expensive strxfrm() call next time */
                               2173                 :         422488 :     sss->cache_blob = true;
 3810                          2174                 :         422572 : done:
                               2175                 :                : 
                               2176                 :                :     /*
                               2177                 :                :      * Byteswap on little-endian machines.
                               2178                 :                :      *
                               2179                 :                :      * This is needed so that ssup_datum_unsigned_cmp() (an unsigned integer
                               2180                 :                :      * 3-way comparator) works correctly on all platforms.  If we didn't do
                               2181                 :                :      * this, the comparator would have to call memcmp() with a pair of
                               2182                 :                :      * pointers to the first byte of each abbreviated key, which is slower.
                               2183                 :                :      */
                               2184                 :         422572 :     res = DatumBigEndianToNative(res);
                               2185                 :                : 
                               2186                 :                :     /* Don't leak memory here */
 3912                          2187         [ +  + ]:         422572 :     if (PointerGetDatum(authoritative) != original)
                               2188                 :              1 :         pfree(authoritative);
                               2189                 :                : 
 4073                          2190                 :         422572 :     return res;
                               2191                 :                : }
                               2192                 :                : 
                               2193                 :                : /*
                               2194                 :                :  * Callback for estimating effectiveness of abbreviated key optimization, using
                               2195                 :                :  * heuristic rules.  Returns value indicating if the abbreviation optimization
                               2196                 :                :  * should be aborted, based on its projected effectiveness.
                               2197                 :                :  */
                               2198                 :                : static bool
 3693                          2199                 :           1201 : varstr_abbrev_abort(int memtupcount, SortSupport ssup)
                               2200                 :                : {
 3688 tgl@sss.pgh.pa.us        2201                 :           1201 :     VarStringSortSupport *sss = (VarStringSortSupport *) ssup->ssup_extra;
                               2202                 :                :     double      abbrev_distinct,
                               2203                 :                :                 key_distinct;
                               2204                 :                : 
 4073 rhaas@postgresql.org     2205         [ -  + ]:           1201 :     Assert(ssup->abbreviate);
                               2206                 :                : 
                               2207                 :                :     /* Have a little patience */
 3999                          2208         [ +  + ]:           1201 :     if (memtupcount < 100)
 4073                          2209                 :            702 :         return false;
                               2210                 :                : 
 3693                          2211                 :            499 :     abbrev_distinct = estimateHyperLogLog(&sss->abbr_card);
                               2212                 :            499 :     key_distinct = estimateHyperLogLog(&sss->full_card);
                               2213                 :                : 
                               2214                 :                :     /*
                               2215                 :                :      * Clamp cardinality estimates to at least one distinct value.  While
                               2216                 :                :      * NULLs are generally disregarded, if only NULL values were seen so far,
                               2217                 :                :      * that might misrepresent costs if we failed to clamp.
                               2218                 :                :      */
   89 john.naylor@postgres     2219         [ -  + ]:GNC         499 :     if (abbrev_distinct < 1.0)
 4073 rhaas@postgresql.org     2220                 :UBC           0 :         abbrev_distinct = 1.0;
                               2221                 :                : 
   89 john.naylor@postgres     2222         [ -  + ]:GNC         499 :     if (key_distinct < 1.0)
 4073 rhaas@postgresql.org     2223                 :UBC           0 :         key_distinct = 1.0;
                               2224                 :                : 
                               2225                 :                :     /*
                               2226                 :                :      * In the worst case all abbreviated keys are identical, while at the same
                               2227                 :                :      * time there are differences within full key strings not captured in
                               2228                 :                :      * abbreviations.
                               2229                 :                :      */
 3995 rhaas@postgresql.org     2230         [ -  + ]:CBC         499 :     if (trace_sort)
                               2231                 :                :     {
 3949 bruce@momjian.us         2232                 :UBC           0 :         double      norm_abbrev_card = abbrev_distinct / (double) memtupcount;
                               2233                 :                : 
 3693 rhaas@postgresql.org     2234         [ #  # ]:              0 :         elog(LOG, "varstr_abbrev: abbrev_distinct after %d: %f "
                               2235                 :                :              "(key_distinct: %f, norm_abbrev_card: %f, prop_card: %f)",
                               2236                 :                :              memtupcount, abbrev_distinct, key_distinct, norm_abbrev_card,
                               2237                 :                :              sss->prop_card);
                               2238                 :                :     }
                               2239                 :                : 
                               2240                 :                :     /*
                               2241                 :                :      * If the number of distinct abbreviated keys approximately matches the
                               2242                 :                :      * number of distinct authoritative original keys, that's reason enough to
                               2243                 :                :      * proceed.  We can win even with a very low cardinality set if most
                               2244                 :                :      * tie-breakers only memcmp().  This is by far the most important
                               2245                 :                :      * consideration.
                               2246                 :                :      *
                               2247                 :                :      * While comparisons that are resolved at the abbreviated key level are
                               2248                 :                :      * considerably cheaper than tie-breakers resolved with memcmp(), both of
                               2249                 :                :      * those two outcomes are so much cheaper than a full strcoll() once
                               2250                 :                :      * sorting is underway that it doesn't seem worth it to weigh abbreviated
                               2251                 :                :      * cardinality against the overall size of the set in order to more
                               2252                 :                :      * accurately model costs.  Assume that an abbreviated comparison, and an
                               2253                 :                :      * abbreviated comparison with a cheap memcmp()-based authoritative
                               2254                 :                :      * resolution are equivalent.
                               2255                 :                :      */
 3693 rhaas@postgresql.org     2256         [ +  - ]:CBC         499 :     if (abbrev_distinct > key_distinct * sss->prop_card)
                               2257                 :                :     {
                               2258                 :                :         /*
                               2259                 :                :          * When we have exceeded 10,000 tuples, decay required cardinality
                               2260                 :                :          * aggressively for next call.
                               2261                 :                :          *
                               2262                 :                :          * This is useful because the number of comparisons required on
                               2263                 :                :          * average increases at a linearithmic rate, and at roughly 10,000
                               2264                 :                :          * tuples that factor will start to dominate over the linear costs of
                               2265                 :                :          * string transformation (this is a conservative estimate).  The decay
                               2266                 :                :          * rate is chosen to be a little less aggressive than halving -- which
                               2267                 :                :          * (since we're called at points at which memtupcount has doubled)
                               2268                 :                :          * would never see the cost model actually abort past the first call
                               2269                 :                :          * following a decay.  This decay rate is mostly a precaution against
                               2270                 :                :          * a sudden, violent swing in how well abbreviated cardinality tracks
                               2271                 :                :          * full key cardinality.  The decay also serves to prevent a marginal
                               2272                 :                :          * case from being aborted too late, when too much has already been
                               2273                 :                :          * invested in string transformation.
                               2274                 :                :          *
                               2275                 :                :          * It's possible for sets of several million distinct strings with
                               2276                 :                :          * mere tens of thousands of distinct abbreviated keys to still
                               2277                 :                :          * benefit very significantly.  This will generally occur provided
                               2278                 :                :          * each abbreviated key is a proxy for a roughly uniform number of the
                               2279                 :                :          * set's full keys. If it isn't so, we hope to catch that early and
                               2280                 :                :          * abort.  If it isn't caught early, by the time the problem is
                               2281                 :                :          * apparent it's probably not worth aborting.
                               2282                 :                :          */
 3999                          2283         [ +  + ]:            499 :         if (memtupcount > 10000)
 3693                          2284                 :              2 :             sss->prop_card *= 0.65;
                               2285                 :                : 
 4073                          2286                 :            499 :         return false;
                               2287                 :                :     }
                               2288                 :                : 
                               2289                 :                :     /*
                               2290                 :                :      * Abort abbreviation strategy.
                               2291                 :                :      *
                               2292                 :                :      * The worst case, where all abbreviated keys are identical while all
                               2293                 :                :      * original strings differ will typically only see a regression of about
                               2294                 :                :      * 10% in execution time for small to medium sized lists of strings.
                               2295                 :                :      * Whereas on modern CPUs where cache stalls are the dominant cost, we can
                               2296                 :                :      * often expect very large improvements, particularly with sets of strings
                               2297                 :                :      * of moderately high to high abbreviated cardinality.  There is little to
                               2298                 :                :      * lose but much to gain, which our strategy reflects.
                               2299                 :                :      */
 3995 rhaas@postgresql.org     2300         [ #  # ]:UBC           0 :     if (trace_sort)
 3693                          2301         [ #  # ]:              0 :         elog(LOG, "varstr_abbrev: aborted abbreviation at %d "
                               2302                 :                :              "(abbrev_distinct: %f, key_distinct: %f, prop_card: %f)",
                               2303                 :                :              memtupcount, abbrev_distinct, key_distinct, sss->prop_card);
                               2304                 :                : 
 4073                          2305                 :              0 :     return true;
                               2306                 :                : }
                               2307                 :                : 
                               2308                 :                : /*
                               2309                 :                :  * Generic equalimage support function for character type's operator classes.
                               2310                 :                :  * Disables the use of deduplication with nondeterministic collations.
                               2311                 :                :  */
                               2312                 :                : Datum
 2209 pg@bowt.ie               2313                 :CBC        4627 : btvarstrequalimage(PG_FUNCTION_ARGS)
                               2314                 :                : {
                               2315                 :                : #ifdef NOT_USED
                               2316                 :                :     Oid         opcintype = PG_GETARG_OID(0);
                               2317                 :                : #endif
                               2318                 :           4627 :     Oid         collid = PG_GET_COLLATION();
                               2319                 :                :     pg_locale_t locale;
                               2320                 :                : 
                               2321                 :           4627 :     check_collation_set(collid);
                               2322                 :                : 
  557 jdavis@postgresql.or     2323                 :           4627 :     locale = pg_newlocale_from_collation(collid);
                               2324                 :                : 
  549                          2325                 :           4627 :     PG_RETURN_BOOL(locale->deterministic);
                               2326                 :                : }
                               2327                 :                : 
                               2328                 :                : Datum
 9383 tgl@sss.pgh.pa.us        2329                 :         114780 : text_larger(PG_FUNCTION_ARGS)
                               2330                 :                : {
 6918                          2331                 :         114780 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2332                 :         114780 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2333                 :                :     text       *result;
                               2334                 :                : 
 5514 peter_e@gmx.net          2335         [ +  + ]:         114780 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) > 0) ? arg1 : arg2);
                               2336                 :                : 
 9383 tgl@sss.pgh.pa.us        2337                 :         114780 :     PG_RETURN_TEXT_P(result);
                               2338                 :                : }
                               2339                 :                : 
                               2340                 :                : Datum
                               2341                 :          43038 : text_smaller(PG_FUNCTION_ARGS)
                               2342                 :                : {
 6918                          2343                 :          43038 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2344                 :          43038 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2345                 :                :     text       *result;
                               2346                 :                : 
 5514 peter_e@gmx.net          2347         [ +  + ]:          43038 :     result = ((text_cmp(arg1, arg2, PG_GET_COLLATION()) < 0) ? arg1 : arg2);
                               2348                 :                : 
 9383 tgl@sss.pgh.pa.us        2349                 :          43038 :     PG_RETURN_TEXT_P(result);
                               2350                 :                : }
                               2351                 :                : 
                               2352                 :                : 
                               2353                 :                : /*
                               2354                 :                :  * Cross-type comparison functions for types text and name.
                               2355                 :                :  */
                               2356                 :                : 
                               2357                 :                : Datum
 2643                          2358                 :         124839 : nameeqtext(PG_FUNCTION_ARGS)
                               2359                 :                : {
                               2360                 :         124839 :     Name        arg1 = PG_GETARG_NAME(0);
                               2361                 :         124839 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2362                 :         124839 :     size_t      len1 = strlen(NameStr(*arg1));
                               2363   [ -  +  -  -  :         124839 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
                                     -  -  -  -  +  
                                                 + ]
 2550 peter@eisentraut.org     2364                 :         124839 :     Oid         collid = PG_GET_COLLATION();
                               2365                 :                :     bool        result;
                               2366                 :                : 
                               2367                 :         124839 :     check_collation_set(collid);
                               2368                 :                : 
                               2369         [ +  + ]:         124839 :     if (collid == C_COLLATION_OID)
                               2370         [ +  + ]:         128788 :         result = (len1 == len2 &&
                               2371   [ +  +  +  + ]:          62264 :                   memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
                               2372                 :                :     else
                               2373                 :          58315 :         result = (varstr_cmp(NameStr(*arg1), len1,
                               2374         [ -  + ]:          58315 :                              VARDATA_ANY(arg2), len2,
                               2375                 :                :                              collid) == 0);
                               2376                 :                : 
 2643 tgl@sss.pgh.pa.us        2377         [ -  + ]:         124839 :     PG_FREE_IF_COPY(arg2, 1);
                               2378                 :                : 
                               2379                 :         124839 :     PG_RETURN_BOOL(result);
                               2380                 :                : }
                               2381                 :                : 
                               2382                 :                : Datum
                               2383                 :           3889 : texteqname(PG_FUNCTION_ARGS)
                               2384                 :                : {
                               2385                 :           3889 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2386                 :           3889 :     Name        arg2 = PG_GETARG_NAME(1);
                               2387   [ -  +  -  -  :           3889 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
                                     -  -  -  -  +  
                                                 + ]
                               2388                 :           3889 :     size_t      len2 = strlen(NameStr(*arg2));
 2550 peter@eisentraut.org     2389                 :           3889 :     Oid         collid = PG_GET_COLLATION();
                               2390                 :                :     bool        result;
                               2391                 :                : 
                               2392                 :           3889 :     check_collation_set(collid);
                               2393                 :                : 
                               2394         [ +  + ]:           3889 :     if (collid == C_COLLATION_OID)
                               2395         [ +  + ]:            284 :         result = (len1 == len2 &&
                               2396   [ +  -  +  - ]:             91 :                   memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
                               2397                 :                :     else
                               2398                 :           3696 :         result = (varstr_cmp(VARDATA_ANY(arg1), len1,
                               2399         [ -  + ]:           3696 :                              NameStr(*arg2), len2,
                               2400                 :                :                              collid) == 0);
                               2401                 :                : 
 2643 tgl@sss.pgh.pa.us        2402         [ -  + ]:           3889 :     PG_FREE_IF_COPY(arg1, 0);
                               2403                 :                : 
                               2404                 :           3889 :     PG_RETURN_BOOL(result);
                               2405                 :                : }
                               2406                 :                : 
                               2407                 :                : Datum
                               2408                 :              9 : namenetext(PG_FUNCTION_ARGS)
                               2409                 :                : {
                               2410                 :              9 :     Name        arg1 = PG_GETARG_NAME(0);
                               2411                 :              9 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2412                 :              9 :     size_t      len1 = strlen(NameStr(*arg1));
                               2413   [ -  +  -  -  :              9 :     size_t      len2 = VARSIZE_ANY_EXHDR(arg2);
                                     -  -  -  -  -  
                                                 + ]
 2550 peter@eisentraut.org     2414                 :              9 :     Oid         collid = PG_GET_COLLATION();
                               2415                 :                :     bool        result;
                               2416                 :                : 
                               2417                 :              9 :     check_collation_set(collid);
                               2418                 :                : 
                               2419         [ -  + ]:              9 :     if (collid == C_COLLATION_OID)
 2550 peter@eisentraut.org     2420         [ #  # ]:UBC           0 :         result = !(len1 == len2 &&
                               2421   [ #  #  #  # ]:              0 :                    memcmp(NameStr(*arg1), VARDATA_ANY(arg2), len1) == 0);
                               2422                 :                :     else
 2550 peter@eisentraut.org     2423                 :CBC           9 :         result = !(varstr_cmp(NameStr(*arg1), len1,
                               2424         [ -  + ]:              9 :                               VARDATA_ANY(arg2), len2,
                               2425                 :                :                               collid) == 0);
                               2426                 :                : 
 2643 tgl@sss.pgh.pa.us        2427         [ -  + ]:              9 :     PG_FREE_IF_COPY(arg2, 1);
                               2428                 :                : 
                               2429                 :              9 :     PG_RETURN_BOOL(result);
                               2430                 :                : }
                               2431                 :                : 
                               2432                 :                : Datum
                               2433                 :              9 : textnename(PG_FUNCTION_ARGS)
                               2434                 :                : {
                               2435                 :              9 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2436                 :              9 :     Name        arg2 = PG_GETARG_NAME(1);
                               2437   [ -  +  -  -  :              9 :     size_t      len1 = VARSIZE_ANY_EXHDR(arg1);
                                     -  -  -  -  -  
                                                 + ]
                               2438                 :              9 :     size_t      len2 = strlen(NameStr(*arg2));
 2550 peter@eisentraut.org     2439                 :              9 :     Oid         collid = PG_GET_COLLATION();
                               2440                 :                :     bool        result;
                               2441                 :                : 
                               2442                 :              9 :     check_collation_set(collid);
                               2443                 :                : 
                               2444         [ -  + ]:              9 :     if (collid == C_COLLATION_OID)
 2550 peter@eisentraut.org     2445         [ #  # ]:UBC           0 :         result = !(len1 == len2 &&
                               2446   [ #  #  #  # ]:              0 :                    memcmp(VARDATA_ANY(arg1), NameStr(*arg2), len1) == 0);
                               2447                 :                :     else
 2550 peter@eisentraut.org     2448                 :CBC           9 :         result = !(varstr_cmp(VARDATA_ANY(arg1), len1,
                               2449         [ -  + ]:              9 :                               NameStr(*arg2), len2,
                               2450                 :                :                               collid) == 0);
                               2451                 :                : 
 2643 tgl@sss.pgh.pa.us        2452         [ -  + ]:              9 :     PG_FREE_IF_COPY(arg1, 0);
                               2453                 :                : 
                               2454                 :              9 :     PG_RETURN_BOOL(result);
                               2455                 :                : }
                               2456                 :                : 
                               2457                 :                : Datum
                               2458                 :          81321 : btnametextcmp(PG_FUNCTION_ARGS)
                               2459                 :                : {
                               2460                 :          81321 :     Name        arg1 = PG_GETARG_NAME(0);
                               2461                 :          81321 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2462                 :                :     int32       result;
                               2463                 :                : 
                               2464                 :          81321 :     result = varstr_cmp(NameStr(*arg1), strlen(NameStr(*arg1)),
                               2465   [ -  +  -  -  :          81321 :                         VARDATA_ANY(arg2), VARSIZE_ANY_EXHDR(arg2),
                                     -  -  -  -  +  
                                           +  +  + ]
                               2466                 :                :                         PG_GET_COLLATION());
                               2467                 :                : 
                               2468         [ -  + ]:          81321 :     PG_FREE_IF_COPY(arg2, 1);
                               2469                 :                : 
                               2470                 :          81321 :     PG_RETURN_INT32(result);
                               2471                 :                : }
                               2472                 :                : 
                               2473                 :                : Datum
 2643 tgl@sss.pgh.pa.us        2474                 :GBC          22 : bttextnamecmp(PG_FUNCTION_ARGS)
                               2475                 :                : {
                               2476                 :             22 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2477                 :             22 :     Name        arg2 = PG_GETARG_NAME(1);
                               2478                 :                :     int32       result;
                               2479                 :                : 
                               2480   [ #  #  #  #  :             22 :     result = varstr_cmp(VARDATA_ANY(arg1), VARSIZE_ANY_EXHDR(arg1),
                                     #  #  #  #  #  
                                                 # ]
                               2481         [ #  # ]:             22 :                         NameStr(*arg2), strlen(NameStr(*arg2)),
                               2482                 :                :                         PG_GET_COLLATION());
                               2483                 :                : 
                               2484         [ -  + ]:             22 :     PG_FREE_IF_COPY(arg1, 0);
                               2485                 :                : 
                               2486                 :             22 :     PG_RETURN_INT32(result);
                               2487                 :                : }
                               2488                 :                : 
                               2489                 :                : #define CmpCall(cmpfunc) \
                               2490                 :                :     DatumGetInt32(DirectFunctionCall2Coll(cmpfunc, \
                               2491                 :                :                                           PG_GET_COLLATION(), \
                               2492                 :                :                                           PG_GETARG_DATUM(0), \
                               2493                 :                :                                           PG_GETARG_DATUM(1)))
                               2494                 :                : 
                               2495                 :                : Datum
 2643 tgl@sss.pgh.pa.us        2496                 :CBC       38885 : namelttext(PG_FUNCTION_ARGS)
                               2497                 :                : {
                               2498                 :          38885 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) < 0);
                               2499                 :                : }
                               2500                 :                : 
                               2501                 :                : Datum
 2643 tgl@sss.pgh.pa.us        2502                 :UBC           0 : nameletext(PG_FUNCTION_ARGS)
                               2503                 :                : {
                               2504                 :              0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) <= 0);
                               2505                 :                : }
                               2506                 :                : 
                               2507                 :                : Datum
                               2508                 :              0 : namegttext(PG_FUNCTION_ARGS)
                               2509                 :                : {
                               2510                 :              0 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) > 0);
                               2511                 :                : }
                               2512                 :                : 
                               2513                 :                : Datum
 2643 tgl@sss.pgh.pa.us        2514                 :CBC       36143 : namegetext(PG_FUNCTION_ARGS)
                               2515                 :                : {
                               2516                 :          36143 :     PG_RETURN_BOOL(CmpCall(btnametextcmp) >= 0);
                               2517                 :                : }
                               2518                 :                : 
                               2519                 :                : Datum
 2643 tgl@sss.pgh.pa.us        2520                 :UBC           0 : textltname(PG_FUNCTION_ARGS)
                               2521                 :                : {
                               2522                 :              0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) < 0);
                               2523                 :                : }
                               2524                 :                : 
                               2525                 :                : Datum
                               2526                 :              0 : textlename(PG_FUNCTION_ARGS)
                               2527                 :                : {
                               2528                 :              0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) <= 0);
                               2529                 :                : }
                               2530                 :                : 
                               2531                 :                : Datum
                               2532                 :              0 : textgtname(PG_FUNCTION_ARGS)
                               2533                 :                : {
                               2534                 :              0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) > 0);
                               2535                 :                : }
                               2536                 :                : 
                               2537                 :                : Datum
                               2538                 :              0 : textgename(PG_FUNCTION_ARGS)
                               2539                 :                : {
                               2540                 :              0 :     PG_RETURN_BOOL(CmpCall(bttextnamecmp) >= 0);
                               2541                 :                : }
                               2542                 :                : 
                               2543                 :                : #undef CmpCall
                               2544                 :                : 
                               2545                 :                : 
                               2546                 :                : /*
                               2547                 :                :  * The following operators support character-by-character comparison
                               2548                 :                :  * of text datums, to allow building indexes suitable for LIKE clauses.
                               2549                 :                :  * Note that the regular texteq/textne comparison operators, and regular
                               2550                 :                :  * support functions 1 and 2 with "C" collation are assumed to be
                               2551                 :                :  * compatible with these!
                               2552                 :                :  */
                               2553                 :                : 
                               2554                 :                : static int
 2367 tgl@sss.pgh.pa.us        2555                 :CBC       80222 : internal_text_pattern_compare(text *arg1, text *arg2)
                               2556                 :                : {
                               2557                 :                :     int         result;
                               2558                 :                :     int         len1,
                               2559                 :                :                 len2;
                               2560                 :                : 
 6501                          2561   [ -  +  -  -  :          80222 :     len1 = VARSIZE_ANY_EXHDR(arg1);
                                     -  -  -  -  +  
                                                 + ]
                               2562   [ -  +  -  -  :          80222 :     len2 = VARSIZE_ANY_EXHDR(arg2);
                                     -  -  -  -  -  
                                                 + ]
                               2563                 :                : 
 5563 rhaas@postgresql.org     2564   [ -  +  +  + ]:          80222 :     result = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
 8340 peter_e@gmx.net          2565         [ +  + ]:          80222 :     if (result != 0)
                               2566                 :          80156 :         return result;
 6501 tgl@sss.pgh.pa.us        2567         [ -  + ]:             66 :     else if (len1 < len2)
 8340 peter_e@gmx.net          2568                 :UBC           0 :         return -1;
 6501 tgl@sss.pgh.pa.us        2569         [ +  + ]:CBC          66 :     else if (len1 > len2)
 8340 peter_e@gmx.net          2570                 :             42 :         return 1;
                               2571                 :                :     else
                               2572                 :             24 :         return 0;
                               2573                 :                : }
                               2574                 :                : 
                               2575                 :                : 
                               2576                 :                : Datum
                               2577                 :          23933 : text_pattern_lt(PG_FUNCTION_ARGS)
                               2578                 :                : {
 6918 tgl@sss.pgh.pa.us        2579                 :          23933 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2580                 :          23933 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2581                 :                :     int         result;
                               2582                 :                : 
 2367                          2583                 :          23933 :     result = internal_text_pattern_compare(arg1, arg2);
                               2584                 :                : 
 8340 peter_e@gmx.net          2585         [ -  + ]:          23933 :     PG_FREE_IF_COPY(arg1, 0);
                               2586         [ -  + ]:          23933 :     PG_FREE_IF_COPY(arg2, 1);
                               2587                 :                : 
                               2588                 :          23933 :     PG_RETURN_BOOL(result < 0);
                               2589                 :                : }
                               2590                 :                : 
                               2591                 :                : 
                               2592                 :                : Datum
                               2593                 :          18755 : text_pattern_le(PG_FUNCTION_ARGS)
                               2594                 :                : {
 6918 tgl@sss.pgh.pa.us        2595                 :          18755 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2596                 :          18755 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2597                 :                :     int         result;
                               2598                 :                : 
 2367                          2599                 :          18755 :     result = internal_text_pattern_compare(arg1, arg2);
                               2600                 :                : 
 8340 peter_e@gmx.net          2601         [ -  + ]:          18755 :     PG_FREE_IF_COPY(arg1, 0);
                               2602         [ -  + ]:          18755 :     PG_FREE_IF_COPY(arg2, 1);
                               2603                 :                : 
                               2604                 :          18755 :     PG_RETURN_BOOL(result <= 0);
                               2605                 :                : }
                               2606                 :                : 
                               2607                 :                : 
                               2608                 :                : Datum
                               2609                 :          18767 : text_pattern_ge(PG_FUNCTION_ARGS)
                               2610                 :                : {
 6918 tgl@sss.pgh.pa.us        2611                 :          18767 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2612                 :          18767 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2613                 :                :     int         result;
                               2614                 :                : 
 2367                          2615                 :          18767 :     result = internal_text_pattern_compare(arg1, arg2);
                               2616                 :                : 
 8340 peter_e@gmx.net          2617         [ -  + ]:          18767 :     PG_FREE_IF_COPY(arg1, 0);
                               2618         [ -  + ]:          18767 :     PG_FREE_IF_COPY(arg2, 1);
                               2619                 :                : 
                               2620                 :          18767 :     PG_RETURN_BOOL(result >= 0);
                               2621                 :                : }
                               2622                 :                : 
                               2623                 :                : 
                               2624                 :                : Datum
 8340 peter_e@gmx.net          2625                 :GNC       18755 : text_pattern_gt(PG_FUNCTION_ARGS)
                               2626                 :                : {
 6918 tgl@sss.pgh.pa.us        2627                 :          18755 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2628                 :          18755 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2629                 :                :     int         result;
                               2630                 :                : 
 2367                          2631                 :          18755 :     result = internal_text_pattern_compare(arg1, arg2);
                               2632                 :                : 
 8340 peter_e@gmx.net          2633         [ -  + ]:          18755 :     PG_FREE_IF_COPY(arg1, 0);
                               2634         [ -  + ]:          18755 :     PG_FREE_IF_COPY(arg2, 1);
                               2635                 :                : 
                               2636                 :          18755 :     PG_RETURN_BOOL(result > 0);
                               2637                 :                : }
                               2638                 :                : 
                               2639                 :                : 
                               2640                 :                : Datum
                               2641                 :             12 : bttext_pattern_cmp(PG_FUNCTION_ARGS)
                               2642                 :                : {
 6918 tgl@sss.pgh.pa.us        2643                 :             12 :     text       *arg1 = PG_GETARG_TEXT_PP(0);
                               2644                 :             12 :     text       *arg2 = PG_GETARG_TEXT_PP(1);
                               2645                 :                :     int         result;
                               2646                 :                : 
 2367                          2647                 :             12 :     result = internal_text_pattern_compare(arg1, arg2);
                               2648                 :                : 
 8340 peter_e@gmx.net          2649         [ -  + ]:             12 :     PG_FREE_IF_COPY(arg1, 0);
                               2650         [ -  + ]:             12 :     PG_FREE_IF_COPY(arg2, 1);
                               2651                 :                : 
                               2652                 :             12 :     PG_RETURN_INT32(result);
                               2653                 :                : }
                               2654                 :                : 
                               2655                 :                : 
                               2656                 :                : Datum
 3693 rhaas@postgresql.org     2657                 :             58 : bttext_pattern_sortsupport(PG_FUNCTION_ARGS)
                               2658                 :                : {
                               2659                 :             58 :     SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0);
                               2660                 :                :     MemoryContext oldcontext;
                               2661                 :                : 
                               2662                 :             58 :     oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt);
                               2663                 :                : 
                               2664                 :                :     /* Use generic string SortSupport, forcing "C" collation */
 2643 tgl@sss.pgh.pa.us        2665                 :             58 :     varstr_sortsupport(ssup, TEXTOID, C_COLLATION_OID);
                               2666                 :                : 
 3693 rhaas@postgresql.org     2667                 :             58 :     MemoryContextSwitchTo(oldcontext);
                               2668                 :                : 
                               2669                 :             58 :     PG_RETURN_VOID();
                               2670                 :                : }
                               2671                 :                : 
                               2672                 :                : 
                               2673                 :                : /* text_name()
                               2674                 :                :  * Converts a text type to a Name type.
                               2675                 :                :  */
                               2676                 :                : Datum
  256 michael@paquier.xyz      2677                 :CBC       15457 : text_name(PG_FUNCTION_ARGS)
                               2678                 :                : {
                               2679                 :          15457 :     text       *s = PG_GETARG_TEXT_PP(0);
                               2680                 :                :     Name        result;
                               2681                 :                :     int         len;
                               2682                 :                : 
                               2683   [ -  +  -  -  :          15457 :     len = VARSIZE_ANY_EXHDR(s);
                                     -  -  -  -  +  
                                                 + ]
                               2684                 :                : 
                               2685                 :                :     /* Truncate oversize input */
                               2686         [ +  + ]:          15457 :     if (len >= NAMEDATALEN)
                               2687         [ -  + ]:              3 :         len = pg_mbcliplen(VARDATA_ANY(s), len, NAMEDATALEN - 1);
                               2688                 :                : 
                               2689                 :                :     /* We use palloc0 here to ensure result is zero-padded */
                               2690                 :          15457 :     result = (Name) palloc0(NAMEDATALEN);
                               2691         [ +  + ]:          15457 :     memcpy(NameStr(*result), VARDATA_ANY(s), len);
                               2692                 :                : 
                               2693                 :          15457 :     PG_RETURN_NAME(result);
                               2694                 :                : }
                               2695                 :                : 
                               2696                 :                : /* name_text()
                               2697                 :                :  * Converts a Name type to a text type.
                               2698                 :                :  */
                               2699                 :                : Datum
                               2700                 :         327841 : name_text(PG_FUNCTION_ARGS)
                               2701                 :                : {
                               2702                 :         327841 :     Name        s = PG_GETARG_NAME(0);
                               2703                 :                : 
                               2704                 :         327841 :     PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
                               2705                 :                : }
                               2706                 :                : 
                               2707                 :                : 
                               2708                 :                : /*
                               2709                 :                :  * textToQualifiedNameList - convert a text object to list of names
                               2710                 :                :  *
                               2711                 :                :  * This implements the input parsing needed by nextval() and other
                               2712                 :                :  * functions that take a text parameter representing a qualified name.
                               2713                 :                :  * We split the name at dots, downcase if not double-quoted, and
                               2714                 :                :  * truncate names if they're too long.
                               2715                 :                :  */
                               2716                 :                : List *
                               2717                 :           2716 : textToQualifiedNameList(text *textval)
                               2718                 :                : {
                               2719                 :                :     char       *rawname;
                               2720                 :           2716 :     List       *result = NIL;
                               2721                 :                :     List       *namelist;
                               2722                 :                :     ListCell   *l;
                               2723                 :                : 
                               2724                 :                :     /* Convert to C string (handles possible detoasting). */
                               2725                 :                :     /* Note we rely on being able to modify rawname below. */
                               2726                 :           2716 :     rawname = text_to_cstring(textval);
                               2727                 :                : 
                               2728         [ -  + ]:           2716 :     if (!SplitIdentifierString(rawname, '.', &namelist))
  256 michael@paquier.xyz      2729         [ #  # ]:UBC           0 :         ereport(ERROR,
                               2730                 :                :                 (errcode(ERRCODE_INVALID_NAME),
                               2731                 :                :                  errmsg("invalid name syntax")));
                               2732                 :                : 
  256 michael@paquier.xyz      2733         [ -  + ]:CBC        2716 :     if (namelist == NIL)
  256 michael@paquier.xyz      2734         [ #  # ]:UBC           0 :         ereport(ERROR,
                               2735                 :                :                 (errcode(ERRCODE_INVALID_NAME),
                               2736                 :                :                  errmsg("invalid name syntax")));
                               2737                 :                : 
  256 michael@paquier.xyz      2738   [ +  -  +  +  :CBC        5490 :     foreach(l, namelist)
                                              +  + ]
                               2739                 :                :     {
                               2740                 :           2774 :         char       *curname = (char *) lfirst(l);
                               2741                 :                : 
                               2742                 :           2774 :         result = lappend(result, makeString(pstrdup(curname)));
                               2743                 :                :     }
                               2744                 :                : 
                               2745                 :           2716 :     pfree(rawname);
                               2746                 :           2716 :     list_free(namelist);
                               2747                 :                : 
 5893 tgl@sss.pgh.pa.us        2748                 :           2716 :     return result;
                               2749                 :                : }
                               2750                 :                : 
                               2751                 :                : /*
                               2752                 :                :  * SplitIdentifierString --- parse a string containing identifiers
                               2753                 :                :  *
                               2754                 :                :  * This is the guts of textToQualifiedNameList, and is exported for use in
                               2755                 :                :  * other situations such as parsing GUC variables.  In the GUC case, it's
                               2756                 :                :  * important to avoid memory leaks, so the API is designed to minimize the
                               2757                 :                :  * amount of stuff that needs to be allocated and freed.
                               2758                 :                :  *
                               2759                 :                :  * Inputs:
                               2760                 :                :  *  rawstring: the input string; must be overwritable!  On return, it's
                               2761                 :                :  *             been modified to contain the separated identifiers.
                               2762                 :                :  *  separator: the separator punctuation expected between identifiers
                               2763                 :                :  *             (typically '.' or ',').  Whitespace may also appear around
                               2764                 :                :  *             identifiers.
                               2765                 :                :  * Outputs:
                               2766                 :                :  *  namelist: filled with a palloc'd list of pointers to identifiers within
                               2767                 :                :  *            rawstring.  Caller should list_free() this even on error return.
                               2768                 :                :  *
                               2769                 :                :  * Returns true if okay, false if there is a syntax error in the string.
                               2770                 :                :  *
                               2771                 :                :  * Note that an empty string is considered okay here, though not in
                               2772                 :                :  * textToQualifiedNameList.
                               2773                 :                :  */
                               2774                 :                : bool
  256 michael@paquier.xyz      2775                 :          90967 : SplitIdentifierString(char *rawstring, char separator,
                               2776                 :                :                       List **namelist)
                               2777                 :                : {
                               2778                 :          90967 :     char       *nextp = rawstring;
                               2779                 :          90967 :     bool        done = false;
                               2780                 :                : 
                               2781                 :          90967 :     *namelist = NIL;
                               2782                 :                : 
                               2783         [ +  + ]:          90970 :     while (scanner_isspace(*nextp))
                               2784                 :              3 :         nextp++;                /* skip leading whitespace */
                               2785                 :                : 
                               2786         [ +  + ]:          90967 :     if (*nextp == '\0')
  131 tgl@sss.pgh.pa.us        2787                 :GNC       14656 :         return true;            /* empty string represents empty list */
                               2788                 :                : 
                               2789                 :                :     /* At the top of the loop, we are at start of a new identifier. */
                               2790                 :                :     do
                               2791                 :                :     {
                               2792                 :                :         char       *curname;
                               2793                 :                :         char       *endp;
                               2794                 :                : 
 3736 peter_e@gmx.net          2795         [ +  + ]:CBC      128247 :         if (*nextp == '"')
                               2796                 :                :         {
                               2797                 :                :             /* Quoted name --- collapse quote-quote pairs, no downcasing */
 8751 tgl@sss.pgh.pa.us        2798                 :          21849 :             curname = nextp + 1;
                               2799                 :                :             for (;;)
                               2800                 :                :             {
 3736 peter_e@gmx.net          2801                 :          21851 :                 endp = strchr(nextp + 1, '"');
 8751 tgl@sss.pgh.pa.us        2802         [ -  + ]:          21850 :                 if (endp == NULL)
 3189 tgl@sss.pgh.pa.us        2803                 :UBC           0 :                     return false;   /* mismatched quotes */
 3736 peter_e@gmx.net          2804         [ +  + ]:CBC       21850 :                 if (endp[1] != '"')
 8751 tgl@sss.pgh.pa.us        2805                 :          21849 :                     break;      /* found end of quoted name */
                               2806                 :                :                 /* Collapse adjacent quotes into one quote, and look again */
 8593 bruce@momjian.us         2807                 :              1 :                 memmove(endp, endp + 1, strlen(endp));
 8751 tgl@sss.pgh.pa.us        2808                 :              1 :                 nextp = endp;
                               2809                 :                :             }
                               2810                 :                :             /* endp now points at the terminating quote */
                               2811                 :          21849 :             nextp = endp + 1;
                               2812                 :                :         }
                               2813                 :                :         else
                               2814                 :                :         {
                               2815                 :                :             /* Unquoted name --- extends to separator or whitespace */
                               2816                 :                :             char       *downname;
                               2817                 :                :             int         len;
                               2818                 :                : 
                               2819                 :         106398 :             curname = nextp;
 8749                          2820   [ +  +  +  + ]:         907118 :             while (*nextp && *nextp != separator &&
 3217                          2821         [ +  + ]:         800721 :                    !scanner_isspace(*nextp))
 8749                          2822                 :         800720 :                 nextp++;
                               2823                 :         106398 :             endp = nextp;
                               2824         [ -  + ]:         106398 :             if (curname == nextp)
 8749 tgl@sss.pgh.pa.us        2825                 :UBC           0 :                 return false;   /* empty unquoted name not allowed */
                               2826                 :                : 
                               2827                 :                :             /*
                               2828                 :                :              * Downcase the identifier, using same code as main lexer does.
                               2829                 :                :              *
                               2830                 :                :              * XXX because we want to overwrite the input in-place, we cannot
                               2831                 :                :              * support a downcasing transformation that increases the string
                               2832                 :                :              * length.  This is not a problem given the current implementation
                               2833                 :                :              * of downcase_truncate_identifier, but we'll probably have to do
                               2834                 :                :              * something about this someday.
                               2835                 :                :              */
 8058 tgl@sss.pgh.pa.us        2836                 :CBC      106398 :             len = endp - curname;
                               2837                 :         106398 :             downname = downcase_truncate_identifier(curname, len, false);
                               2838         [ -  + ]:         106398 :             Assert(strlen(downname) <= len);
 4068                          2839                 :         106398 :             strncpy(curname, downname, len);    /* strncpy is required here */
 8058                          2840                 :         106398 :             pfree(downname);
                               2841                 :                :         }
                               2842                 :                : 
 3217                          2843         [ +  + ]:         128248 :         while (scanner_isspace(*nextp))
 8749                          2844                 :              1 :             nextp++;            /* skip trailing whitespace */
                               2845                 :                : 
                               2846         [ +  + ]:         128247 :         if (*nextp == separator)
                               2847                 :                :         {
                               2848                 :          51936 :             nextp++;
 3217                          2849         [ +  + ]:          90875 :             while (scanner_isspace(*nextp))
 8749                          2850                 :          38939 :                 nextp++;        /* skip leading whitespace for next */
                               2851                 :                :             /* we expect another name, so done remains false */
                               2852                 :                :         }
                               2853         [ +  + ]:          76311 :         else if (*nextp == '\0')
                               2854                 :          76310 :             done = true;
                               2855                 :                :         else
                               2856                 :              1 :             return false;       /* invalid syntax */
                               2857                 :                : 
                               2858                 :                :         /* Now safe to overwrite separator with a null */
                               2859                 :         128246 :         *endp = '\0';
                               2860                 :                : 
                               2861                 :                :         /* Truncate name if it's overlength */
 8058                          2862                 :         128246 :         truncate_identifier(curname, strlen(curname), false);
                               2863                 :                : 
                               2864                 :                :         /*
                               2865                 :                :          * Finished isolating current name --- add it to list
                               2866                 :                :          */
 8749                          2867                 :         128246 :         *namelist = lappend(*namelist, curname);
                               2868                 :                : 
                               2869                 :                :         /* Loop back if we didn't reach end of string */
                               2870         [ +  + ]:         128246 :     } while (!done);
                               2871                 :                : 
                               2872                 :          76310 :     return true;
                               2873                 :                : }
                               2874                 :                : 
                               2875                 :                : 
                               2876                 :                : /*
                               2877                 :                :  * SplitDirectoriesString --- parse a string containing file/directory names
                               2878                 :                :  *
                               2879                 :                :  * This works fine on file names too; the function name is historical.
                               2880                 :                :  *
                               2881                 :                :  * This is similar to SplitIdentifierString, except that the parsing
                               2882                 :                :  * rules are meant to handle pathnames instead of identifiers: there is
                               2883                 :                :  * no downcasing, embedded spaces are allowed, the max length is MAXPGPATH-1,
                               2884                 :                :  * and we apply canonicalize_path() to each extracted string.  Because of the
                               2885                 :                :  * last, the returned strings are separately palloc'd rather than being
                               2886                 :                :  * pointers into rawstring --- but we still scribble on rawstring.
                               2887                 :                :  *
                               2888                 :                :  * Inputs:
                               2889                 :                :  *  rawstring: the input string; must be modifiable!
                               2890                 :                :  *  separator: the separator punctuation expected between directories
                               2891                 :                :  *             (typically ',' or ';').  Whitespace may also appear around
                               2892                 :                :  *             directories.
                               2893                 :                :  * Outputs:
                               2894                 :                :  *  namelist: filled with a palloc'd list of directory names.
                               2895                 :                :  *            Caller should list_free_deep() this even on error return.
                               2896                 :                :  *
                               2897                 :                :  * Returns true if okay, false if there is a syntax error in the string.
                               2898                 :                :  *
                               2899                 :                :  * Note that an empty string is considered okay here.
                               2900                 :                :  */
                               2901                 :                : bool
 4965                          2902                 :            990 : SplitDirectoriesString(char *rawstring, char separator,
                               2903                 :                :                        List **namelist)
                               2904                 :                : {
                               2905                 :            990 :     char       *nextp = rawstring;
                               2906                 :            990 :     bool        done = false;
                               2907                 :                : 
                               2908                 :            990 :     *namelist = NIL;
                               2909                 :                : 
 3217                          2910         [ -  + ]:            990 :     while (scanner_isspace(*nextp))
 4965 tgl@sss.pgh.pa.us        2911                 :UBC           0 :         nextp++;                /* skip leading whitespace */
                               2912                 :                : 
 4965 tgl@sss.pgh.pa.us        2913         [ -  + ]:CBC         990 :     if (*nextp == '\0')
  131 tgl@sss.pgh.pa.us        2914                 :UNC           0 :         return true;            /* empty string represents empty list */
                               2915                 :                : 
                               2916                 :                :     /* At the top of the loop, we are at start of a new directory. */
                               2917                 :                :     do
                               2918                 :                :     {
                               2919                 :                :         char       *curname;
                               2920                 :                :         char       *endp;
                               2921                 :                : 
 3736 peter_e@gmx.net          2922         [ -  + ]:CBC        1000 :         if (*nextp == '"')
                               2923                 :                :         {
                               2924                 :                :             /* Quoted name --- collapse quote-quote pairs */
 4965 tgl@sss.pgh.pa.us        2925                 :UBC           0 :             curname = nextp + 1;
                               2926                 :                :             for (;;)
                               2927                 :                :             {
 3736 peter_e@gmx.net          2928                 :              0 :                 endp = strchr(nextp + 1, '"');
 4965 tgl@sss.pgh.pa.us        2929         [ #  # ]:              0 :                 if (endp == NULL)
 3189                          2930                 :              0 :                     return false;   /* mismatched quotes */
 3736 peter_e@gmx.net          2931         [ #  # ]:              0 :                 if (endp[1] != '"')
 4965 tgl@sss.pgh.pa.us        2932                 :              0 :                     break;      /* found end of quoted name */
                               2933                 :                :                 /* Collapse adjacent quotes into one quote, and look again */
                               2934                 :              0 :                 memmove(endp, endp + 1, strlen(endp));
                               2935                 :              0 :                 nextp = endp;
                               2936                 :                :             }
                               2937                 :                :             /* endp now points at the terminating quote */
                               2938                 :              0 :             nextp = endp + 1;
                               2939                 :                :         }
                               2940                 :                :         else
                               2941                 :                :         {
                               2942                 :                :             /* Unquoted name --- extends to separator or end of string */
 4938 tgl@sss.pgh.pa.us        2943                 :CBC        1000 :             curname = endp = nextp;
                               2944   [ +  +  +  + ]:          16532 :             while (*nextp && *nextp != separator)
                               2945                 :                :             {
                               2946                 :                :                 /* trailing whitespace should not be included in name */
 3217                          2947         [ +  - ]:          15532 :                 if (!scanner_isspace(*nextp))
 4938                          2948                 :          15532 :                     endp = nextp + 1;
 4965                          2949                 :          15532 :                 nextp++;
                               2950                 :                :             }
 4938                          2951         [ -  + ]:           1000 :             if (curname == endp)
 4965 tgl@sss.pgh.pa.us        2952                 :UBC           0 :                 return false;   /* empty unquoted name not allowed */
                               2953                 :                :         }
                               2954                 :                : 
 3217 tgl@sss.pgh.pa.us        2955         [ -  + ]:CBC        1000 :         while (scanner_isspace(*nextp))
 4965 tgl@sss.pgh.pa.us        2956                 :UBC           0 :             nextp++;            /* skip trailing whitespace */
                               2957                 :                : 
 4965 tgl@sss.pgh.pa.us        2958         [ +  + ]:CBC        1000 :         if (*nextp == separator)
                               2959                 :                :         {
                               2960                 :             10 :             nextp++;
 3217                          2961         [ +  + ]:             18 :             while (scanner_isspace(*nextp))
 4965                          2962                 :              8 :                 nextp++;        /* skip leading whitespace for next */
                               2963                 :                :             /* we expect another name, so done remains false */
                               2964                 :                :         }
                               2965         [ +  - ]:            990 :         else if (*nextp == '\0')
                               2966                 :            990 :             done = true;
                               2967                 :                :         else
 4965 tgl@sss.pgh.pa.us        2968                 :UBC           0 :             return false;       /* invalid syntax */
                               2969                 :                : 
                               2970                 :                :         /* Now safe to overwrite separator with a null */
 4965 tgl@sss.pgh.pa.us        2971                 :CBC        1000 :         *endp = '\0';
                               2972                 :                : 
                               2973                 :                :         /* Truncate path if it's overlength */
                               2974         [ -  + ]:           1000 :         if (strlen(curname) >= MAXPGPATH)
 4965 tgl@sss.pgh.pa.us        2975                 :UBC           0 :             curname[MAXPGPATH - 1] = '\0';
                               2976                 :                : 
                               2977                 :                :         /*
                               2978                 :                :          * Finished isolating current name --- add it to list
                               2979                 :                :          */
 4965 tgl@sss.pgh.pa.us        2980                 :CBC        1000 :         curname = pstrdup(curname);
                               2981                 :           1000 :         canonicalize_path(curname);
                               2982                 :           1000 :         *namelist = lappend(*namelist, curname);
                               2983                 :                : 
                               2984                 :                :         /* Loop back if we didn't reach end of string */
                               2985         [ +  + ]:           1000 :     } while (!done);
                               2986                 :                : 
                               2987                 :            990 :     return true;
                               2988                 :                : }
                               2989                 :                : 
                               2990                 :                : 
                               2991                 :                : /*
                               2992                 :                :  * SplitGUCList --- parse a string containing identifiers or file names
                               2993                 :                :  *
                               2994                 :                :  * This is used to split the value of a GUC_LIST_QUOTE GUC variable, without
                               2995                 :                :  * presuming whether the elements will be taken as identifiers or file names.
                               2996                 :                :  * We assume the input has already been through flatten_set_variable_args(),
                               2997                 :                :  * so that we need never downcase (if appropriate, that was done already).
                               2998                 :                :  * Nor do we ever truncate, since we don't know the correct max length.
                               2999                 :                :  * We disallow embedded whitespace for simplicity (it shouldn't matter,
                               3000                 :                :  * because any embedded whitespace should have led to double-quoting).
                               3001                 :                :  * Otherwise the API is identical to SplitIdentifierString.
                               3002                 :                :  *
                               3003                 :                :  * XXX it's annoying to have so many copies of this string-splitting logic.
                               3004                 :                :  * However, it's not clear that having one function with a bunch of option
                               3005                 :                :  * flags would be much better.
                               3006                 :                :  *
                               3007                 :                :  * XXX there is a version of this function in src/bin/pg_dump/dumputils.c.
                               3008                 :                :  * Be sure to update that if you have to change this.
                               3009                 :                :  *
                               3010                 :                :  * Inputs:
                               3011                 :                :  *  rawstring: the input string; must be overwritable!  On return, it's
                               3012                 :                :  *             been modified to contain the separated identifiers.
                               3013                 :                :  *  separator: the separator punctuation expected between identifiers
                               3014                 :                :  *             (typically '.' or ',').  Whitespace may also appear around
                               3015                 :                :  *             identifiers.
                               3016                 :                :  * Outputs:
                               3017                 :                :  *  namelist: filled with a palloc'd list of pointers to identifiers within
                               3018                 :                :  *            rawstring.  Caller should list_free() this even on error return.
                               3019                 :                :  *
                               3020                 :                :  * Returns true if okay, false if there is a syntax error in the string.
                               3021                 :                :  */
                               3022                 :                : bool
 2784                          3023                 :           3800 : SplitGUCList(char *rawstring, char separator,
                               3024                 :                :              List **namelist)
                               3025                 :                : {
                               3026                 :           3800 :     char       *nextp = rawstring;
                               3027                 :           3800 :     bool        done = false;
                               3028                 :                : 
                               3029                 :           3800 :     *namelist = NIL;
                               3030                 :                : 
                               3031         [ -  + ]:           3800 :     while (scanner_isspace(*nextp))
 2784 tgl@sss.pgh.pa.us        3032                 :UBC           0 :         nextp++;                /* skip leading whitespace */
                               3033                 :                : 
 2784 tgl@sss.pgh.pa.us        3034         [ +  + ]:CBC        3800 :     if (*nextp == '\0')
  131 tgl@sss.pgh.pa.us        3035                 :GNC        2069 :         return true;            /* empty string represents empty list */
                               3036                 :                : 
                               3037                 :                :     /* At the top of the loop, we are at start of a new identifier. */
                               3038                 :                :     do
                               3039                 :                :     {
                               3040                 :                :         char       *curname;
                               3041                 :                :         char       *endp;
                               3042                 :                : 
 2784 tgl@sss.pgh.pa.us        3043         [ +  + ]:CBC        1792 :         if (*nextp == '"')
                               3044                 :                :         {
                               3045                 :                :             /* Quoted name --- collapse quote-quote pairs */
                               3046                 :             12 :             curname = nextp + 1;
                               3047                 :                :             for (;;)
                               3048                 :                :             {
                               3049                 :             18 :                 endp = strchr(nextp + 1, '"');
                               3050         [ -  + ]:             15 :                 if (endp == NULL)
 2784 tgl@sss.pgh.pa.us        3051                 :UBC           0 :                     return false;   /* mismatched quotes */
 2784 tgl@sss.pgh.pa.us        3052         [ +  + ]:CBC          15 :                 if (endp[1] != '"')
                               3053                 :             12 :                     break;      /* found end of quoted name */
                               3054                 :                :                 /* Collapse adjacent quotes into one quote, and look again */
                               3055                 :              3 :                 memmove(endp, endp + 1, strlen(endp));
                               3056                 :              3 :                 nextp = endp;
                               3057                 :                :             }
                               3058                 :                :             /* endp now points at the terminating quote */
                               3059                 :             12 :             nextp = endp + 1;
                               3060                 :                :         }
                               3061                 :                :         else
                               3062                 :                :         {
                               3063                 :                :             /* Unquoted name --- extends to separator or whitespace */
                               3064                 :           1780 :             curname = nextp;
                               3065   [ +  +  +  + ]:          14370 :             while (*nextp && *nextp != separator &&
                               3066         [ +  - ]:          12590 :                    !scanner_isspace(*nextp))
                               3067                 :          12590 :                 nextp++;
                               3068                 :           1780 :             endp = nextp;
                               3069         [ -  + ]:           1780 :             if (curname == nextp)
 2784 tgl@sss.pgh.pa.us        3070                 :UBC           0 :                 return false;   /* empty unquoted name not allowed */
                               3071                 :                :         }
                               3072                 :                : 
 2784 tgl@sss.pgh.pa.us        3073         [ -  + ]:CBC        1792 :         while (scanner_isspace(*nextp))
 2784 tgl@sss.pgh.pa.us        3074                 :UBC           0 :             nextp++;            /* skip trailing whitespace */
                               3075                 :                : 
 2784 tgl@sss.pgh.pa.us        3076         [ +  + ]:CBC        1792 :         if (*nextp == separator)
                               3077                 :                :         {
                               3078                 :             61 :             nextp++;
                               3079         [ +  + ]:            118 :             while (scanner_isspace(*nextp))
                               3080                 :             57 :                 nextp++;        /* skip leading whitespace for next */
                               3081                 :                :             /* we expect another name, so done remains false */
                               3082                 :                :         }
                               3083         [ +  - ]:           1731 :         else if (*nextp == '\0')
                               3084                 :           1731 :             done = true;
                               3085                 :                :         else
 2784 tgl@sss.pgh.pa.us        3086                 :UBC           0 :             return false;       /* invalid syntax */
                               3087                 :                : 
                               3088                 :                :         /* Now safe to overwrite separator with a null */
 2784 tgl@sss.pgh.pa.us        3089                 :CBC        1792 :         *endp = '\0';
                               3090                 :                : 
                               3091                 :                :         /*
                               3092                 :                :          * Finished isolating current name --- add it to list
                               3093                 :                :          */
                               3094                 :           1792 :         *namelist = lappend(*namelist, curname);
                               3095                 :                : 
                               3096                 :                :         /* Loop back if we didn't reach end of string */
                               3097         [ +  + ]:           1792 :     } while (!done);
                               3098                 :                : 
                               3099                 :           1731 :     return true;
                               3100                 :                : }
                               3101                 :                : 
                               3102                 :                : /*
                               3103                 :                :  * appendStringInfoText
                               3104                 :                :  *
                               3105                 :                :  * Append a text to str.
                               3106                 :                :  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
                               3107                 :                :  */
                               3108                 :                : static void
 7559 bruce@momjian.us         3109                 :        1087956 : appendStringInfoText(StringInfo str, const text *t)
                               3110                 :                : {
 6749 tgl@sss.pgh.pa.us        3111   [ -  +  -  -  :        1087956 :     appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
                                     -  -  -  -  +  
                                           +  +  + ]
 7559 bruce@momjian.us         3112                 :        1087956 : }
                               3113                 :                : 
                               3114                 :                : /*
                               3115                 :                :  * replace_text
                               3116                 :                :  * replace all occurrences of 'old_sub_str' in 'orig_str'
                               3117                 :                :  * with 'new_sub_str' to form 'new_str'
                               3118                 :                :  *
                               3119                 :                :  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
                               3120                 :                :  * otherwise returns 'new_str'
                               3121                 :                :  */
                               3122                 :                : Datum
 8606                          3123                 :            810 : replace_text(PG_FUNCTION_ARGS)
                               3124                 :                : {
 6749 tgl@sss.pgh.pa.us        3125                 :            810 :     text       *src_text = PG_GETARG_TEXT_PP(0);
                               3126                 :            810 :     text       *from_sub_text = PG_GETARG_TEXT_PP(1);
                               3127                 :            810 :     text       *to_sub_text = PG_GETARG_TEXT_PP(2);
                               3128                 :                :     int         src_text_len;
                               3129                 :                :     int         from_sub_text_len;
                               3130                 :                :     TextPositionState state;
                               3131                 :                :     text       *ret_text;
                               3132                 :                :     int         chunk_len;
                               3133                 :                :     char       *curr_ptr;
                               3134                 :                :     char       *start_ptr;
                               3135                 :                :     StringInfoData str;
                               3136                 :                :     bool        found;
                               3137                 :                : 
 2606 heikki.linnakangas@i     3138   [ -  +  -  -  :            810 :     src_text_len = VARSIZE_ANY_EXHDR(src_text);
                                     -  -  -  -  +  
                                                 + ]
                               3139   [ -  +  -  -  :            810 :     from_sub_text_len = VARSIZE_ANY_EXHDR(from_sub_text);
                                     -  -  -  -  -  
                                                 + ]
                               3140                 :                : 
                               3141                 :                :     /* Return unmodified source string if empty source or pattern */
 6814 tgl@sss.pgh.pa.us        3142   [ +  -  -  + ]:            810 :     if (src_text_len < 1 || from_sub_text_len < 1)
                               3143                 :                :     {
 6814 tgl@sss.pgh.pa.us        3144                 :UBC           0 :         PG_RETURN_TEXT_P(src_text);
                               3145                 :                :     }
                               3146                 :                : 
 2550 peter@eisentraut.org     3147                 :CBC         810 :     text_position_setup(src_text, from_sub_text, PG_GET_COLLATION(), &state);
                               3148                 :                : 
 2606 heikki.linnakangas@i     3149                 :            810 :     found = text_position_next(&state);
                               3150                 :                : 
                               3151                 :                :     /* When the from_sub_text is not found, there is nothing to do. */
                               3152         [ +  + ]:            810 :     if (!found)
                               3153                 :                :     {
 7099 tgl@sss.pgh.pa.us        3154                 :            169 :         text_position_cleanup(&state);
 7559 bruce@momjian.us         3155                 :            169 :         PG_RETURN_TEXT_P(src_text);
                               3156                 :                :     }
 2606 heikki.linnakangas@i     3157                 :            641 :     curr_ptr = text_position_get_match_ptr(&state);
 6749 tgl@sss.pgh.pa.us        3158         [ +  + ]:            641 :     start_ptr = VARDATA_ANY(src_text);
                               3159                 :                : 
 7319 neilc@samurai.com        3160                 :            641 :     initStringInfo(&str);
                               3161                 :                : 
                               3162                 :                :     do
                               3163                 :                :     {
 6814 tgl@sss.pgh.pa.us        3164         [ -  + ]:           3744 :         CHECK_FOR_INTERRUPTS();
                               3165                 :                : 
                               3166                 :                :         /* copy the data skipped over by last text_position_next() */
 2606 heikki.linnakangas@i     3167                 :           3744 :         chunk_len = curr_ptr - start_ptr;
 7067 tgl@sss.pgh.pa.us        3168                 :           3744 :         appendBinaryStringInfo(&str, start_ptr, chunk_len);
                               3169                 :                : 
 7319 neilc@samurai.com        3170                 :           3744 :         appendStringInfoText(&str, to_sub_text);
                               3171                 :                : 
  387 peter@eisentraut.org     3172                 :           3744 :         start_ptr = curr_ptr + state.last_match_len;
                               3173                 :                : 
 2606 heikki.linnakangas@i     3174                 :           3744 :         found = text_position_next(&state);
                               3175         [ +  + ]:           3744 :         if (found)
                               3176                 :           3103 :             curr_ptr = text_position_get_match_ptr(&state);
                               3177                 :                :     }
                               3178         [ +  + ]:           3744 :     while (found);
                               3179                 :                : 
                               3180                 :                :     /* copy trailing data */
 6749 tgl@sss.pgh.pa.us        3181   [ -  +  -  -  :            641 :     chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
                                     -  -  -  -  +  
                                                 + ]
 7067                          3182                 :            641 :     appendBinaryStringInfo(&str, start_ptr, chunk_len);
                               3183                 :                : 
 7099                          3184                 :            641 :     text_position_cleanup(&state);
                               3185                 :                : 
 6564                          3186                 :            641 :     ret_text = cstring_to_text_with_len(str.data, str.len);
 7319 neilc@samurai.com        3187                 :            641 :     pfree(str.data);
                               3188                 :                : 
 8606 bruce@momjian.us         3189                 :            641 :     PG_RETURN_TEXT_P(ret_text);
                               3190                 :                : }
                               3191                 :                : 
                               3192                 :                : /*
                               3193                 :                :  * check_replace_text_has_escape
                               3194                 :                :  *
                               3195                 :                :  * Returns 0 if text contains no backslashes that need processing.
                               3196                 :                :  * Returns 1 if text contains backslashes, but not regexp submatch specifiers.
                               3197                 :                :  * Returns 2 if text contains regexp submatch specifiers (\1 .. \9).
                               3198                 :                :  */
                               3199                 :                : static int
 1679 tgl@sss.pgh.pa.us        3200                 :           9417 : check_replace_text_has_escape(const text *replace_text)
                               3201                 :                : {
                               3202                 :           9417 :     int         result = 0;
 6749                          3203         [ -  + ]:           9417 :     const char *p = VARDATA_ANY(replace_text);
                               3204   [ -  +  -  -  :           9417 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
                                     -  -  -  -  -  
                                                 + ]
                               3205                 :                : 
 1679                          3206         [ +  + ]:          18856 :     while (p < p_end)
                               3207                 :                :     {
                               3208                 :                :         /* Find next escape char, if any. */
                               3209                 :           8834 :         p = memchr(p, '\\', p_end - p);
                               3210         [ +  + ]:           8834 :         if (p == NULL)
                               3211                 :           8410 :             break;
                               3212                 :            424 :         p++;
                               3213                 :                :         /* Note: a backslash at the end doesn't require extra processing. */
                               3214         [ +  - ]:            424 :         if (p < p_end)
                               3215                 :                :         {
                               3216   [ +  +  +  + ]:            424 :             if (*p >= '1' && *p <= '9')
                               3217                 :            402 :                 return 2;       /* Found a submatch specifier, so done */
                               3218                 :             22 :             result = 1;         /* Found some other sequence, keep looking */
                               3219                 :             22 :             p++;
                               3220                 :                :         }
                               3221                 :                :     }
                               3222                 :           9015 :     return result;
                               3223                 :                : }
                               3224                 :                : 
                               3225                 :                : /*
                               3226                 :                :  * appendStringInfoRegexpSubstr
                               3227                 :                :  *
                               3228                 :                :  * Append replace_text to str, substituting regexp back references for
                               3229                 :                :  * \n escapes.  start_ptr is the start of the match in the source string,
                               3230                 :                :  * at logical character position data_pos.
                               3231                 :                :  */
                               3232                 :                : static void
 7553 bruce@momjian.us         3233                 :            127 : appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
                               3234                 :                :                              regmatch_t *pmatch,
                               3235                 :                :                              char *start_ptr, int data_pos)
                               3236                 :                : {
 6749 tgl@sss.pgh.pa.us        3237         [ -  + ]:            127 :     const char *p = VARDATA_ANY(replace_text);
                               3238   [ -  +  -  -  :            127 :     const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
                                     -  -  -  -  -  
                                                 + ]
                               3239                 :                : 
 1679                          3240         [ +  + ]:            305 :     while (p < p_end)
                               3241                 :                :     {
 7453                          3242                 :            268 :         const char *chunk_start = p;
                               3243                 :                :         int         so;
                               3244                 :                :         int         eo;
                               3245                 :                : 
                               3246                 :                :         /* Find next escape char, if any. */
 1679                          3247                 :            268 :         p = memchr(p, '\\', p_end - p);
                               3248         [ +  + ]:            268 :         if (p == NULL)
                               3249                 :             87 :             p = p_end;
                               3250                 :                : 
                               3251                 :                :         /* Copy the text we just scanned over, if any. */
 7453                          3252         [ +  + ]:            268 :         if (p > chunk_start)
                               3253                 :            159 :             appendBinaryStringInfo(str, chunk_start, p - chunk_start);
                               3254                 :                : 
                               3255                 :                :         /* Done if at end of string, else advance over escape char. */
                               3256         [ +  + ]:            268 :         if (p >= p_end)
 7553 bruce@momjian.us         3257                 :             87 :             break;
                               3258                 :            181 :         p++;
                               3259                 :                : 
 7453 tgl@sss.pgh.pa.us        3260         [ +  + ]:            181 :         if (p >= p_end)
                               3261                 :                :         {
                               3262                 :                :             /* Escape at very end of input.  Treat same as unexpected char */
                               3263                 :              3 :             appendStringInfoChar(str, '\\');
                               3264                 :              3 :             break;
                               3265                 :                :         }
                               3266                 :                : 
 7553 bruce@momjian.us         3267   [ +  +  +  + ]:            178 :         if (*p >= '1' && *p <= '9')
                               3268                 :            148 :         {
                               3269                 :                :             /* Use the back reference of regexp. */
 7456                          3270                 :            148 :             int         idx = *p - '0';
                               3271                 :                : 
 7553                          3272                 :            148 :             so = pmatch[idx].rm_so;
                               3273                 :            148 :             eo = pmatch[idx].rm_eo;
                               3274                 :            148 :             p++;
                               3275                 :                :         }
                               3276         [ +  + ]:             30 :         else if (*p == '&')
                               3277                 :                :         {
                               3278                 :                :             /* Use the entire matched string. */
                               3279                 :              9 :             so = pmatch[0].rm_so;
                               3280                 :              9 :             eo = pmatch[0].rm_eo;
                               3281                 :              9 :             p++;
                               3282                 :                :         }
 7453 tgl@sss.pgh.pa.us        3283         [ +  + ]:             21 :         else if (*p == '\\')
                               3284                 :                :         {
                               3285                 :                :             /* \\ means transfer one \ to output. */
                               3286                 :             18 :             appendStringInfoChar(str, '\\');
                               3287                 :             18 :             p++;
                               3288                 :             18 :             continue;
                               3289                 :                :         }
                               3290                 :                :         else
                               3291                 :                :         {
                               3292                 :                :             /*
                               3293                 :                :              * If escape char is not followed by any expected char, just treat
                               3294                 :                :              * it as ordinary data to copy.  (XXX would it be better to throw
                               3295                 :                :              * an error?)
                               3296                 :                :              */
                               3297                 :              3 :             appendStringInfoChar(str, '\\');
                               3298                 :              3 :             continue;
                               3299                 :                :         }
                               3300                 :                : 
 1679                          3301   [ +  -  +  - ]:            157 :         if (so >= 0 && eo >= 0)
                               3302                 :                :         {
                               3303                 :                :             /*
                               3304                 :                :              * Copy the text that is back reference of regexp.  Note so and eo
                               3305                 :                :              * are counted in characters not bytes.
                               3306                 :                :              */
                               3307                 :                :             char       *chunk_start;
                               3308                 :                :             int         chunk_len;
                               3309                 :                : 
 7067                          3310         [ -  + ]:            157 :             Assert(so >= data_pos);
                               3311                 :            157 :             chunk_start = start_ptr;
                               3312                 :            157 :             chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
                               3313                 :            157 :             chunk_len = charlen_to_bytelen(chunk_start, eo - so);
                               3314                 :            157 :             appendBinaryStringInfo(str, chunk_start, chunk_len);
                               3315                 :                :         }
                               3316                 :                :     }
 7553 bruce@momjian.us         3317                 :            127 : }
                               3318                 :                : 
                               3319                 :                : /*
                               3320                 :                :  * replace_text_regexp
                               3321                 :                :  *
                               3322                 :                :  * replace substring(s) in src_text that match pattern with replace_text.
                               3323                 :                :  * The replace_text can contain backslash markers to substitute
                               3324                 :                :  * (parts of) the matched text.
                               3325                 :                :  *
                               3326                 :                :  * cflags: regexp compile flags.
                               3327                 :                :  * collation: collation to use.
                               3328                 :                :  * search_start: the character (not byte) offset in src_text at which to
                               3329                 :                :  * begin searching.
                               3330                 :                :  * n: if 0, replace all matches; if > 0, replace only the N'th match.
                               3331                 :                :  */
                               3332                 :                : text *
 1679 tgl@sss.pgh.pa.us        3333                 :           9417 : replace_text_regexp(text *src_text, text *pattern_text,
                               3334                 :                :                     text *replace_text,
                               3335                 :                :                     int cflags, Oid collation,
                               3336                 :                :                     int search_start, int n)
                               3337                 :                : {
                               3338                 :                :     text       *ret_text;
                               3339                 :                :     regex_t    *re;
 6749                          3340   [ -  +  -  -  :           9417 :     int         src_text_len = VARSIZE_ANY_EXHDR(src_text);
                                     -  -  -  -  +  
                                                 + ]
 1685                          3341                 :           9417 :     int         nmatches = 0;
                               3342                 :                :     StringInfoData buf;
                               3343                 :                :     regmatch_t  pmatch[10];     /* main match, plus \1 to \9 */
 1679                          3344                 :           9417 :     int         nmatch = lengthof(pmatch);
                               3345                 :                :     pg_wchar   *data;
                               3346                 :                :     size_t      data_len;
                               3347                 :                :     int         data_pos;
                               3348                 :                :     char       *start_ptr;
                               3349                 :                :     int         escape_status;
                               3350                 :                : 
 7319 neilc@samurai.com        3351                 :           9417 :     initStringInfo(&buf);
                               3352                 :                : 
                               3353                 :                :     /* Convert data string to wide characters. */
 7553 bruce@momjian.us         3354                 :           9417 :     data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
 6749 tgl@sss.pgh.pa.us        3355         [ +  + ]:           9417 :     data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
                               3356                 :                : 
                               3357                 :                :     /* Check whether replace_text has escapes, especially regexp submatches. */
 1679                          3358                 :           9417 :     escape_status = check_replace_text_has_escape(replace_text);
                               3359                 :                : 
                               3360                 :                :     /* If no regexp submatches, we can use REG_NOSUB. */
                               3361         [ +  + ]:           9417 :     if (escape_status < 2)
                               3362                 :                :     {
                               3363                 :           9015 :         cflags |= REG_NOSUB;
                               3364                 :                :         /* Also tell pg_regexec we only want the whole-match location. */
                               3365                 :           9015 :         nmatch = 1;
                               3366                 :                :     }
                               3367                 :                : 
                               3368                 :                :     /* Prepare the regexp. */
                               3369                 :           9417 :     re = RE_compile_and_cache(pattern_text, cflags, collation);
                               3370                 :                : 
                               3371                 :                :     /* start_ptr points to the data_pos'th character of src_text */
 6749                          3372         [ +  + ]:           9417 :     start_ptr = (char *) VARDATA_ANY(src_text);
 7067                          3373                 :           9417 :     data_pos = 0;
                               3374                 :                : 
                               3375         [ +  + ]:          12653 :     while (search_start <= data_len)
                               3376                 :                :     {
                               3377                 :                :         int         regexec_result;
                               3378                 :                : 
                               3379         [ -  + ]:          12650 :         CHECK_FOR_INTERRUPTS();
                               3380                 :                : 
 7553 bruce@momjian.us         3381                 :          12650 :         regexec_result = pg_regexec(re,
                               3382                 :                :                                     data,
                               3383                 :                :                                     data_len,
                               3384                 :                :                                     search_start,
                               3385                 :                :                                     NULL,   /* no details */
                               3386                 :                :                                     nmatch,
                               3387                 :                :                                     pmatch,
                               3388                 :                :                                     0);
                               3389                 :                : 
 7319 neilc@samurai.com        3390         [ +  + ]:          12650 :         if (regexec_result == REG_NOMATCH)
                               3391                 :           8379 :             break;
                               3392                 :                : 
                               3393         [ -  + ]:           4271 :         if (regexec_result != REG_OKAY)
                               3394                 :                :         {
                               3395                 :                :             char        errMsg[100];
                               3396                 :                : 
 7553 bruce@momjian.us         3397                 :UBC           0 :             pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
                               3398         [ #  # ]:              0 :             ereport(ERROR,
                               3399                 :                :                     (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
                               3400                 :                :                      errmsg("regular expression failed: %s", errMsg)));
                               3401                 :                :         }
                               3402                 :                : 
                               3403                 :                :         /*
                               3404                 :                :          * Count matches, and decide whether to replace this match.
                               3405                 :                :          */
 1685 tgl@sss.pgh.pa.us        3406                 :CBC        4271 :         nmatches++;
                               3407   [ +  +  +  + ]:           4271 :         if (n > 0 && nmatches != n)
                               3408                 :                :         {
                               3409                 :                :             /*
                               3410                 :                :              * No, so advance search_start, but not start_ptr/data_pos. (Thus,
                               3411                 :                :              * we treat the matched text as if it weren't matched, and copy it
                               3412                 :                :              * to the output later.)
                               3413                 :                :              */
                               3414                 :             30 :             search_start = pmatch[0].rm_eo;
                               3415         [ -  + ]:             30 :             if (pmatch[0].rm_so == pmatch[0].rm_eo)
 1685 tgl@sss.pgh.pa.us        3416                 :UBC           0 :                 search_start++;
 1685 tgl@sss.pgh.pa.us        3417                 :CBC          30 :             continue;
                               3418                 :                :         }
                               3419                 :                : 
                               3420                 :                :         /*
                               3421                 :                :          * Copy the text to the left of the match position.  Note we are given
                               3422                 :                :          * character not byte indexes.
                               3423                 :                :          */
 7553 bruce@momjian.us         3424         [ +  + ]:           4241 :         if (pmatch[0].rm_so - data_pos > 0)
                               3425                 :                :         {
                               3426                 :                :             int         chunk_len;
                               3427                 :                : 
 7067 tgl@sss.pgh.pa.us        3428                 :           4153 :             chunk_len = charlen_to_bytelen(start_ptr,
                               3429                 :           4153 :                                            pmatch[0].rm_so - data_pos);
                               3430                 :           4153 :             appendBinaryStringInfo(&buf, start_ptr, chunk_len);
                               3431                 :                : 
                               3432                 :                :             /*
                               3433                 :                :              * Advance start_ptr over that text, to avoid multiple rescans of
                               3434                 :                :              * it if the replace_text contains multiple back-references.
                               3435                 :                :              */
                               3436                 :           4153 :             start_ptr += chunk_len;
                               3437                 :           4153 :             data_pos = pmatch[0].rm_so;
                               3438                 :                :         }
                               3439                 :                : 
                               3440                 :                :         /*
                               3441                 :                :          * Copy the replace_text, processing escapes if any are present.
                               3442                 :                :          */
 1679                          3443         [ +  + ]:           4241 :         if (escape_status > 0)
 7067                          3444                 :            127 :             appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
                               3445                 :                :                                          start_ptr, data_pos);
                               3446                 :                :         else
 7319 neilc@samurai.com        3447                 :           4114 :             appendStringInfoText(&buf, replace_text);
                               3448                 :                : 
                               3449                 :                :         /* Advance start_ptr and data_pos over the matched text. */
 7067 tgl@sss.pgh.pa.us        3450                 :           8482 :         start_ptr += charlen_to_bytelen(start_ptr,
                               3451                 :           4241 :                                         pmatch[0].rm_eo - data_pos);
                               3452                 :           4241 :         data_pos = pmatch[0].rm_eo;
                               3453                 :                : 
                               3454                 :                :         /*
                               3455                 :                :          * If we only want to replace one occurrence, we're done.
                               3456                 :                :          */
 1685                          3457         [ +  + ]:           4241 :         if (n > 0)
 7553 bruce@momjian.us         3458                 :           1035 :             break;
                               3459                 :                : 
                               3460                 :                :         /*
                               3461                 :                :          * Advance search position.  Normally we start the next search at the
                               3462                 :                :          * end of the previous match; but if the match was of zero length, we
                               3463                 :                :          * have to advance by one character, or we'd just find the same match
                               3464                 :                :          * again.
                               3465                 :                :          */
 7067 tgl@sss.pgh.pa.us        3466                 :           3206 :         search_start = data_pos;
 7553 bruce@momjian.us         3467         [ +  + ]:           3206 :         if (pmatch[0].rm_so == pmatch[0].rm_eo)
                               3468                 :              6 :             search_start++;
                               3469                 :                :     }
                               3470                 :                : 
                               3471                 :                :     /*
                               3472                 :                :      * Copy the text to the right of the last match.
                               3473                 :                :      */
                               3474         [ +  + ]:           9417 :     if (data_pos < data_len)
                               3475                 :                :     {
                               3476                 :                :         int         chunk_len;
                               3477                 :                : 
 6749 tgl@sss.pgh.pa.us        3478   [ -  +  -  -  :           8970 :         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
                                     -  -  -  -  +  
                                                 + ]
 7067                          3479                 :           8970 :         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
                               3480                 :                :     }
                               3481                 :                : 
 6564                          3482                 :           9417 :     ret_text = cstring_to_text_with_len(buf.data, buf.len);
 7319 neilc@samurai.com        3483                 :           9417 :     pfree(buf.data);
 7553 bruce@momjian.us         3484                 :           9417 :     pfree(data);
                               3485                 :                : 
 7453 tgl@sss.pgh.pa.us        3486                 :           9417 :     return ret_text;
                               3487                 :                : }
                               3488                 :                : 
                               3489                 :                : /*
                               3490                 :                :  * split_part
                               3491                 :                :  * parse input string based on provided field separator
                               3492                 :                :  * return N'th item (1 based, negative counts from end)
                               3493                 :                :  */
                               3494                 :                : Datum
 2020                          3495                 :             75 : split_part(PG_FUNCTION_ARGS)
                               3496                 :                : {
 6749                          3497                 :             75 :     text       *inputstring = PG_GETARG_TEXT_PP(0);
                               3498                 :             75 :     text       *fldsep = PG_GETARG_TEXT_PP(1);
 8606 bruce@momjian.us         3499                 :             75 :     int         fldnum = PG_GETARG_INT32(2);
                               3500                 :                :     int         inputstring_len;
                               3501                 :                :     int         fldsep_len;
                               3502                 :                :     TextPositionState state;
                               3503                 :                :     char       *start_ptr;
                               3504                 :                :     char       *end_ptr;
                               3505                 :                :     text       *result_text;
                               3506                 :                :     bool        found;
                               3507                 :                : 
                               3508                 :                :     /* field number is 1 based */
 1948 tgl@sss.pgh.pa.us        3509         [ +  + ]:             75 :     if (fldnum == 0)
 8079                          3510         [ +  - ]:              3 :         ereport(ERROR,
                               3511                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               3512                 :                :                  errmsg("field position must not be zero")));
                               3513                 :                : 
 2606 heikki.linnakangas@i     3514   [ -  +  -  -  :             72 :     inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
                                     -  -  -  -  +  
                                                 + ]
                               3515   [ -  +  -  -  :             72 :     fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
                                     -  -  -  -  -  
                                                 + ]
                               3516                 :                : 
                               3517                 :                :     /* return empty string for empty input string */
 8606 bruce@momjian.us         3518         [ +  + ]:             72 :     if (inputstring_len < 1)
 6564 tgl@sss.pgh.pa.us        3519                 :              6 :         PG_RETURN_TEXT_P(cstring_to_text(""));
                               3520                 :                : 
                               3521                 :                :     /* handle empty field separator */
 8606 bruce@momjian.us         3522         [ +  + ]:             66 :     if (fldsep_len < 1)
                               3523                 :                :     {
                               3524                 :                :         /* if first or last field, return input string, else empty string */
 1948 tgl@sss.pgh.pa.us        3525   [ +  +  +  + ]:             12 :         if (fldnum == 1 || fldnum == -1)
 8606 bruce@momjian.us         3526                 :              6 :             PG_RETURN_TEXT_P(inputstring);
                               3527                 :                :         else
 6564 tgl@sss.pgh.pa.us        3528                 :              6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
                               3529                 :                :     }
                               3530                 :                : 
                               3531                 :                :     /* find the first field separator */
 2550 peter@eisentraut.org     3532                 :             54 :     text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state);
                               3533                 :                : 
 2606 heikki.linnakangas@i     3534                 :             54 :     found = text_position_next(&state);
                               3535                 :                : 
                               3536                 :                :     /* special case if fldsep not found at all */
                               3537         [ +  + ]:             54 :     if (!found)
                               3538                 :                :     {
 7099 tgl@sss.pgh.pa.us        3539                 :             12 :         text_position_cleanup(&state);
                               3540                 :                :         /* if first or last field, return input string, else empty string */
 1948                          3541   [ +  +  +  + ]:             12 :         if (fldnum == 1 || fldnum == -1)
 8606 bruce@momjian.us         3542                 :              6 :             PG_RETURN_TEXT_P(inputstring);
                               3543                 :                :         else
 6564 tgl@sss.pgh.pa.us        3544                 :              6 :             PG_RETURN_TEXT_P(cstring_to_text(""));
                               3545                 :                :     }
                               3546                 :                : 
                               3547                 :                :     /*
                               3548                 :                :      * take care of a negative field number (i.e. count from the right) by
                               3549                 :                :      * converting to a positive field number; we need total number of fields
                               3550                 :                :      */
 1948                          3551         [ +  + ]:             42 :     if (fldnum < 0)
                               3552                 :                :     {
                               3553                 :                :         /* we found a fldsep, so there are at least two fields */
                               3554                 :             21 :         int         numfields = 2;
                               3555                 :                : 
                               3556         [ +  + ]:             27 :         while (text_position_next(&state))
                               3557                 :              6 :             numfields++;
                               3558                 :                : 
                               3559                 :                :         /* special case of last field does not require an extra pass */
                               3560         [ +  + ]:             21 :         if (fldnum == -1)
                               3561                 :                :         {
  387 peter@eisentraut.org     3562                 :             12 :             start_ptr = text_position_get_match_ptr(&state) + state.last_match_len;
 1948 tgl@sss.pgh.pa.us        3563         [ +  + ]:             12 :             end_ptr = VARDATA_ANY(inputstring) + inputstring_len;
                               3564                 :             12 :             text_position_cleanup(&state);
                               3565                 :             12 :             PG_RETURN_TEXT_P(cstring_to_text_with_len(start_ptr,
                               3566                 :                :                                                       end_ptr - start_ptr));
                               3567                 :                :         }
                               3568                 :                : 
                               3569                 :                :         /* else, convert fldnum to positive notation */
                               3570                 :              9 :         fldnum += numfields + 1;
                               3571                 :                : 
                               3572                 :                :         /* if nonexistent field, return empty string */
                               3573         [ +  + ]:              9 :         if (fldnum <= 0)
                               3574                 :                :         {
                               3575                 :              3 :             text_position_cleanup(&state);
                               3576                 :              3 :             PG_RETURN_TEXT_P(cstring_to_text(""));
                               3577                 :                :         }
                               3578                 :                : 
                               3579                 :                :         /* reset to pointing at first match, but now with positive fldnum */
                               3580                 :              6 :         text_position_reset(&state);
                               3581                 :              6 :         found = text_position_next(&state);
                               3582         [ -  + ]:              6 :         Assert(found);
                               3583                 :                :     }
                               3584                 :                : 
                               3585                 :                :     /* identify bounds of first field */
                               3586         [ +  + ]:             27 :     start_ptr = VARDATA_ANY(inputstring);
 2606 heikki.linnakangas@i     3587                 :             27 :     end_ptr = text_position_get_match_ptr(&state);
                               3588                 :                : 
                               3589   [ +  +  +  + ]:             51 :     while (found && --fldnum > 0)
                               3590                 :                :     {
                               3591                 :                :         /* identify bounds of next field */
  387 peter@eisentraut.org     3592                 :             24 :         start_ptr = end_ptr + state.last_match_len;
 2606 heikki.linnakangas@i     3593                 :             24 :         found = text_position_next(&state);
                               3594         [ +  + ]:             24 :         if (found)
                               3595                 :              9 :             end_ptr = text_position_get_match_ptr(&state);
                               3596                 :                :     }
                               3597                 :                : 
 7099 tgl@sss.pgh.pa.us        3598                 :             27 :     text_position_cleanup(&state);
                               3599                 :                : 
                               3600         [ +  + ]:             27 :     if (fldnum > 0)
                               3601                 :                :     {
                               3602                 :                :         /* N'th field separator not found */
                               3603                 :                :         /* if last field requested, return it, else empty string */
                               3604         [ +  + ]:             15 :         if (fldnum == 1)
                               3605                 :                :         {
 2606 heikki.linnakangas@i     3606         [ +  + ]:             12 :             int         last_len = start_ptr - VARDATA_ANY(inputstring);
                               3607                 :                : 
                               3608                 :             12 :             result_text = cstring_to_text_with_len(start_ptr,
                               3609                 :                :                                                    inputstring_len - last_len);
                               3610                 :                :         }
                               3611                 :                :         else
 6564 tgl@sss.pgh.pa.us        3612                 :              3 :             result_text = cstring_to_text("");
                               3613                 :                :     }
                               3614                 :                :     else
                               3615                 :                :     {
                               3616                 :                :         /* non-last field requested */
 2606 heikki.linnakangas@i     3617                 :             12 :         result_text = cstring_to_text_with_len(start_ptr, end_ptr - start_ptr);
                               3618                 :                :     }
                               3619                 :                : 
 7099 tgl@sss.pgh.pa.us        3620                 :             27 :     PG_RETURN_TEXT_P(result_text);
                               3621                 :                : }
                               3622                 :                : 
                               3623                 :                : /*
                               3624                 :                :  * Convenience function to return true when two text params are equal.
                               3625                 :                :  */
                               3626                 :                : static bool
 2550 peter@eisentraut.org     3627                 :            192 : text_isequal(text *txt1, text *txt2, Oid collid)
                               3628                 :                : {
                               3629                 :            192 :     return DatumGetBool(DirectFunctionCall2Coll(texteq,
                               3630                 :                :                                                 collid,
                               3631                 :                :                                                 PointerGetDatum(txt1),
                               3632                 :                :                                                 PointerGetDatum(txt2)));
                               3633                 :                : }
                               3634                 :                : 
                               3635                 :                : /*
                               3636                 :                :  * text_to_array
                               3637                 :                :  * parse input string and return text array of elements,
                               3638                 :                :  * based on provided field separator
                               3639                 :                :  */
                               3640                 :                : Datum
 8297 tgl@sss.pgh.pa.us        3641                 :             85 : text_to_array(PG_FUNCTION_ARGS)
                               3642                 :                : {
                               3643                 :                :     SplitTextOutputData tstate;
                               3644                 :                : 
                               3645                 :                :     /* For array output, tstate should start as all zeroes */
 2020                          3646                 :             85 :     memset(&tstate, 0, sizeof(tstate));
                               3647                 :                : 
                               3648         [ +  + ]:             85 :     if (!split_text(fcinfo, &tstate))
                               3649                 :              3 :         PG_RETURN_NULL();
                               3650                 :                : 
                               3651         [ +  + ]:             82 :     if (tstate.astate == NULL)
                               3652                 :              3 :         PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID));
                               3653                 :                : 
 1295 peter@eisentraut.org     3654                 :             79 :     PG_RETURN_DATUM(makeArrayResult(tstate.astate,
                               3655                 :                :                                     CurrentMemoryContext));
                               3656                 :                : }
                               3657                 :                : 
                               3658                 :                : /*
                               3659                 :                :  * text_to_array_null
                               3660                 :                :  * parse input string and return text array of elements,
                               3661                 :                :  * based on provided field separator and null string
                               3662                 :                :  *
                               3663                 :                :  * This is a separate entry point only to prevent the regression tests from
                               3664                 :                :  * complaining about different argument sets for the same internal function.
                               3665                 :                :  */
                               3666                 :                : Datum
 5696 tgl@sss.pgh.pa.us        3667                 :             30 : text_to_array_null(PG_FUNCTION_ARGS)
                               3668                 :                : {
 2020                          3669                 :             30 :     return text_to_array(fcinfo);
                               3670                 :                : }
                               3671                 :                : 
                               3672                 :                : /*
                               3673                 :                :  * text_to_table
                               3674                 :                :  * parse input string and return table of elements,
                               3675                 :                :  * based on provided field separator
                               3676                 :                :  */
                               3677                 :                : Datum
                               3678                 :             42 : text_to_table(PG_FUNCTION_ARGS)
                               3679                 :                : {
                               3680                 :             42 :     ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo;
                               3681                 :                :     SplitTextOutputData tstate;
                               3682                 :                : 
                               3683                 :             42 :     tstate.astate = NULL;
 1244 michael@paquier.xyz      3684                 :             42 :     InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
 1469                          3685                 :             42 :     tstate.tupstore = rsi->setResult;
                               3686                 :             42 :     tstate.tupdesc = rsi->setDesc;
                               3687                 :                : 
 2020 tgl@sss.pgh.pa.us        3688                 :             42 :     (void) split_text(fcinfo, &tstate);
                               3689                 :                : 
                               3690                 :             42 :     return (Datum) 0;
                               3691                 :                : }
                               3692                 :                : 
                               3693                 :                : /*
                               3694                 :                :  * text_to_table_null
                               3695                 :                :  * parse input string and return table of elements,
                               3696                 :                :  * based on provided field separator and null string
                               3697                 :                :  *
                               3698                 :                :  * This is a separate entry point only to prevent the regression tests from
                               3699                 :                :  * complaining about different argument sets for the same internal function.
                               3700                 :                :  */
                               3701                 :                : Datum
                               3702                 :             12 : text_to_table_null(PG_FUNCTION_ARGS)
                               3703                 :                : {
                               3704                 :             12 :     return text_to_table(fcinfo);
                               3705                 :                : }
                               3706                 :                : 
                               3707                 :                : /*
                               3708                 :                :  * Common code for text_to_array, text_to_array_null, text_to_table
                               3709                 :                :  * and text_to_table_null functions.
                               3710                 :                :  *
                               3711                 :                :  * These are not strict so we have to test for null inputs explicitly.
                               3712                 :                :  * Returns false if result is to be null, else returns true.
                               3713                 :                :  *
                               3714                 :                :  * Note that if the result is valid but empty (zero elements), we return
                               3715                 :                :  * without changing *tstate --- caller must handle that case, too.
                               3716                 :                :  */
                               3717                 :                : static bool
                               3718                 :            127 : split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate)
                               3719                 :                : {
                               3720                 :                :     text       *inputstring;
                               3721                 :                :     text       *fldsep;
                               3722                 :                :     text       *null_string;
                               3723                 :            127 :     Oid         collation = PG_GET_COLLATION();
                               3724                 :                :     int         inputstring_len;
                               3725                 :                :     int         fldsep_len;
                               3726                 :                :     char       *start_ptr;
                               3727                 :                :     text       *result_text;
                               3728                 :                : 
                               3729                 :                :     /* when input string is NULL, then result is NULL too */
 5696                          3730         [ +  + ]:            127 :     if (PG_ARGISNULL(0))
 2020                          3731                 :              6 :         return false;
                               3732                 :                : 
 5696                          3733                 :            121 :     inputstring = PG_GETARG_TEXT_PP(0);
                               3734                 :                : 
                               3735                 :                :     /* fldsep can be NULL */
                               3736         [ +  + ]:            121 :     if (!PG_ARGISNULL(1))
                               3737                 :            106 :         fldsep = PG_GETARG_TEXT_PP(1);
                               3738                 :                :     else
                               3739                 :             15 :         fldsep = NULL;
                               3740                 :                : 
                               3741                 :                :     /* null_string can be NULL or omitted */
                               3742   [ +  +  +  - ]:            121 :     if (PG_NARGS() > 2 && !PG_ARGISNULL(2))
                               3743                 :             42 :         null_string = PG_GETARG_TEXT_PP(2);
                               3744                 :                :     else
                               3745                 :             79 :         null_string = NULL;
                               3746                 :                : 
                               3747         [ +  + ]:            121 :     if (fldsep != NULL)
                               3748                 :                :     {
                               3749                 :                :         /*
                               3750                 :                :          * Normal case with non-null fldsep.  Use the text_position machinery
                               3751                 :                :          * to search for occurrences of fldsep.
                               3752                 :                :          */
                               3753                 :                :         TextPositionState state;
                               3754                 :                : 
 2606 heikki.linnakangas@i     3755   [ -  +  -  -  :            106 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
                                     -  -  -  -  +  
                                                 + ]
                               3756   [ -  +  -  -  :            106 :         fldsep_len = VARSIZE_ANY_EXHDR(fldsep);
                                     -  -  -  -  -  
                                                 + ]
                               3757                 :                : 
                               3758                 :                :         /* return empty set for empty input string */
 5696 tgl@sss.pgh.pa.us        3759         [ +  + ]:            106 :         if (inputstring_len < 1)
 2020                          3760                 :             30 :             return true;
                               3761                 :                : 
                               3762                 :                :         /* empty field separator: return input string as a one-element set */
 5696                          3763         [ +  + ]:            100 :         if (fldsep_len < 1)
                               3764                 :                :         {
 2020                          3765                 :             24 :             split_text_accum_result(tstate, inputstring,
                               3766                 :                :                                     null_string, collation);
                               3767                 :             24 :             return true;
                               3768                 :                :         }
                               3769                 :                : 
                               3770                 :             76 :         text_position_setup(inputstring, fldsep, collation, &state);
                               3771                 :                : 
 5696                          3772         [ +  + ]:             76 :         start_ptr = VARDATA_ANY(inputstring);
                               3773                 :                : 
                               3774                 :                :         for (;;)
                               3775                 :            256 :         {
                               3776                 :                :             bool        found;
                               3777                 :                :             char       *end_ptr;
                               3778                 :                :             int         chunk_len;
                               3779                 :                : 
 2606 heikki.linnakangas@i     3780         [ -  + ]:            332 :             CHECK_FOR_INTERRUPTS();
                               3781                 :                : 
                               3782                 :            332 :             found = text_position_next(&state);
                               3783         [ +  + ]:            332 :             if (!found)
                               3784                 :                :             {
                               3785                 :                :                 /* fetch last field */
 5696 tgl@sss.pgh.pa.us        3786   [ -  +  -  -  :             76 :                 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
                                     -  -  -  -  +  
                                                 + ]
 2606                          3787                 :             76 :                 end_ptr = NULL; /* not used, but some compilers complain */
                               3788                 :                :             }
                               3789                 :                :             else
                               3790                 :                :             {
                               3791                 :                :                 /* fetch non-last field */
      heikki.linnakangas@i     3792                 :            256 :                 end_ptr = text_position_get_match_ptr(&state);
                               3793                 :            256 :                 chunk_len = end_ptr - start_ptr;
                               3794                 :                :             }
                               3795                 :                : 
                               3796                 :                :             /* build a temp text datum to pass to split_text_accum_result */
 5696 tgl@sss.pgh.pa.us        3797                 :            332 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
                               3798                 :                : 
                               3799                 :                :             /* stash away this field */
 2020                          3800                 :            332 :             split_text_accum_result(tstate, result_text,
                               3801                 :                :                                     null_string, collation);
                               3802                 :                : 
 5696                          3803                 :            332 :             pfree(result_text);
                               3804                 :                : 
 2606 heikki.linnakangas@i     3805         [ +  + ]:            332 :             if (!found)
 5696 tgl@sss.pgh.pa.us        3806                 :             76 :                 break;
                               3807                 :                : 
  387 peter@eisentraut.org     3808                 :            256 :             start_ptr = end_ptr + state.last_match_len;
                               3809                 :                :         }
                               3810                 :                : 
 5696 tgl@sss.pgh.pa.us        3811                 :             76 :         text_position_cleanup(&state);
                               3812                 :                :     }
                               3813                 :                :     else
                               3814                 :                :     {
                               3815                 :                :         const char *end_ptr;
                               3816                 :                : 
                               3817                 :                :         /*
                               3818                 :                :          * When fldsep is NULL, each character in the input string becomes a
                               3819                 :                :          * separate element in the result set.  The separator is effectively
                               3820                 :                :          * the space between characters.
                               3821                 :                :          */
                               3822   [ -  +  -  -  :             15 :         inputstring_len = VARSIZE_ANY_EXHDR(inputstring);
                                     -  -  -  -  -  
                                                 + ]
                               3823                 :                : 
                               3824         [ -  + ]:             15 :         start_ptr = VARDATA_ANY(inputstring);
   67 tmunro@postgresql.or     3825                 :             15 :         end_ptr = start_ptr + inputstring_len;
                               3826                 :                : 
 5696 tgl@sss.pgh.pa.us        3827         [ +  + ]:            126 :         while (inputstring_len > 0)
                               3828                 :                :         {
   67 tmunro@postgresql.or     3829                 :            111 :             int         chunk_len = pg_mblen_range(start_ptr, end_ptr);
                               3830                 :                : 
 5696 tgl@sss.pgh.pa.us        3831         [ -  + ]:            111 :             CHECK_FOR_INTERRUPTS();
                               3832                 :                : 
                               3833                 :                :             /* build a temp text datum to pass to split_text_accum_result */
                               3834                 :            111 :             result_text = cstring_to_text_with_len(start_ptr, chunk_len);
                               3835                 :                : 
                               3836                 :                :             /* stash away this field */
 2020                          3837                 :            111 :             split_text_accum_result(tstate, result_text,
                               3838                 :                :                                     null_string, collation);
                               3839                 :                : 
 5696                          3840                 :            111 :             pfree(result_text);
                               3841                 :                : 
                               3842                 :            111 :             start_ptr += chunk_len;
                               3843                 :            111 :             inputstring_len -= chunk_len;
                               3844                 :                :         }
                               3845                 :                :     }
                               3846                 :                : 
 2020                          3847                 :             91 :     return true;
                               3848                 :                : }
                               3849                 :                : 
                               3850                 :                : /*
                               3851                 :                :  * Add text item to result set (table or array).
                               3852                 :                :  *
                               3853                 :                :  * This is also responsible for checking to see if the item matches
                               3854                 :                :  * the null_string, in which case we should emit NULL instead.
                               3855                 :                :  */
                               3856                 :                : static void
                               3857                 :            467 : split_text_accum_result(SplitTextOutputData *tstate,
                               3858                 :                :                         text *field_value,
                               3859                 :                :                         text *null_string,
                               3860                 :                :                         Oid collation)
                               3861                 :                : {
                               3862                 :            467 :     bool        is_null = false;
                               3863                 :                : 
                               3864   [ +  +  +  + ]:            467 :     if (null_string && text_isequal(field_value, null_string, collation))
                               3865                 :             36 :         is_null = true;
                               3866                 :                : 
                               3867         [ +  + ]:            467 :     if (tstate->tupstore)
                               3868                 :                :     {
                               3869                 :                :         Datum       values[1];
                               3870                 :                :         bool        nulls[1];
                               3871                 :                : 
                               3872                 :            114 :         values[0] = PointerGetDatum(field_value);
                               3873                 :            114 :         nulls[0] = is_null;
                               3874                 :                : 
                               3875                 :            114 :         tuplestore_putvalues(tstate->tupstore,
                               3876                 :                :                              tstate->tupdesc,
                               3877                 :                :                              values,
                               3878                 :                :                              nulls);
                               3879                 :                :     }
                               3880                 :                :     else
                               3881                 :                :     {
                               3882                 :            353 :         tstate->astate = accumArrayResult(tstate->astate,
                               3883                 :                :                                           PointerGetDatum(field_value),
                               3884                 :                :                                           is_null,
                               3885                 :                :                                           TEXTOID,
                               3886                 :                :                                           CurrentMemoryContext);
                               3887                 :                :     }
 8297                          3888                 :            467 : }
                               3889                 :                : 
                               3890                 :                : /*
                               3891                 :                :  * array_to_text
                               3892                 :                :  * concatenate Cstring representation of input array elements
                               3893                 :                :  * using provided field separator
                               3894                 :                :  */
                               3895                 :                : Datum
                               3896                 :          39037 : array_to_text(PG_FUNCTION_ARGS)
                               3897                 :                : {
                               3898                 :          39037 :     ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
 6564                          3899                 :          39037 :     char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
                               3900                 :                : 
 5696                          3901                 :          39037 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, NULL));
                               3902                 :                : }
                               3903                 :                : 
                               3904                 :                : /*
                               3905                 :                :  * array_to_text_null
                               3906                 :                :  * concatenate Cstring representation of input array elements
                               3907                 :                :  * using provided field separator and null string
                               3908                 :                :  *
                               3909                 :                :  * This version is not strict so we have to test for null inputs explicitly.
                               3910                 :                :  */
                               3911                 :                : Datum
                               3912                 :              6 : array_to_text_null(PG_FUNCTION_ARGS)
                               3913                 :                : {
                               3914                 :                :     ArrayType  *v;
                               3915                 :                :     char       *fldsep;
                               3916                 :                :     char       *null_string;
                               3917                 :                : 
                               3918                 :                :     /* returns NULL when first or second parameter is NULL */
                               3919   [ +  -  -  + ]:              6 :     if (PG_ARGISNULL(0) || PG_ARGISNULL(1))
 5696 tgl@sss.pgh.pa.us        3920                 :UBC           0 :         PG_RETURN_NULL();
                               3921                 :                : 
 5696 tgl@sss.pgh.pa.us        3922                 :CBC           6 :     v = PG_GETARG_ARRAYTYPE_P(0);
                               3923                 :              6 :     fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
                               3924                 :                : 
                               3925                 :                :     /* NULL null string is passed through as a null pointer */
                               3926         [ +  + ]:              6 :     if (!PG_ARGISNULL(2))
                               3927                 :              3 :         null_string = text_to_cstring(PG_GETARG_TEXT_PP(2));
                               3928                 :                :     else
                               3929                 :              3 :         null_string = NULL;
                               3930                 :                : 
                               3931                 :              6 :     PG_RETURN_TEXT_P(array_to_text_internal(fcinfo, v, fldsep, null_string));
                               3932                 :                : }
                               3933                 :                : 
                               3934                 :                : /*
                               3935                 :                :  * common code for array_to_text and array_to_text_null functions
                               3936                 :                :  */
                               3937                 :                : static text *
                               3938                 :          39052 : array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v,
                               3939                 :                :                        const char *fldsep, const char *null_string)
                               3940                 :                : {
                               3941                 :                :     text       *result;
                               3942                 :                :     int         nitems,
                               3943                 :                :                *dims,
                               3944                 :                :                 ndims;
                               3945                 :                :     Oid         element_type;
                               3946                 :                :     int         typlen;
                               3947                 :                :     bool        typbyval;
                               3948                 :                :     char        typalign;
                               3949                 :                :     uint8       typalignby;
                               3950                 :                :     StringInfoData buf;
 7422                          3951                 :          39052 :     bool        printed = false;
                               3952                 :                :     char       *p;
                               3953                 :                :     bits8      *bitmap;
                               3954                 :                :     int         bitmask;
                               3955                 :                :     int         i;
                               3956                 :                :     ArrayMetaState *my_extra;
                               3957                 :                : 
 8297                          3958                 :          39052 :     ndims = ARR_NDIM(v);
                               3959                 :          39052 :     dims = ARR_DIMS(v);
                               3960                 :          39052 :     nitems = ArrayGetNItems(ndims, dims);
                               3961                 :                : 
                               3962                 :                :     /* if there are no elements, return an empty string */
                               3963         [ +  + ]:          39052 :     if (nitems == 0)
 5696                          3964                 :          26434 :         return cstring_to_text_with_len("", 0);
                               3965                 :                : 
 8297                          3966                 :          12618 :     element_type = ARR_ELEMTYPE(v);
 7319 neilc@samurai.com        3967                 :          12618 :     initStringInfo(&buf);
                               3968                 :                : 
                               3969                 :                :     /*
                               3970                 :                :      * We arrange to look up info about element type, including its output
                               3971                 :                :      * conversion proc, only once per series of calls, assuming the element
                               3972                 :                :      * type doesn't change underneath us.
                               3973                 :                :      */
 8297 tgl@sss.pgh.pa.us        3974                 :          12618 :     my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
                               3975         [ +  + ]:          12618 :     if (my_extra == NULL)
                               3976                 :                :     {
                               3977                 :            720 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                               3978                 :                :                                                       sizeof(ArrayMetaState));
                               3979                 :            720 :         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
 7422                          3980                 :            720 :         my_extra->element_type = ~element_type;
                               3981                 :                :     }
                               3982                 :                : 
 8297                          3983         [ +  + ]:          12618 :     if (my_extra->element_type != element_type)
                               3984                 :                :     {
                               3985                 :                :         /*
                               3986                 :                :          * Get info about element type, including its output conversion proc
                               3987                 :                :          */
                               3988                 :            720 :         get_type_io_data(element_type, IOFunc_output,
                               3989                 :                :                          &my_extra->typlen, &my_extra->typbyval,
                               3990                 :                :                          &my_extra->typalign, &my_extra->typdelim,
                               3991                 :                :                          &my_extra->typioparam, &my_extra->typiofunc);
                               3992                 :            720 :         fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
                               3993                 :            720 :                       fcinfo->flinfo->fn_mcxt);
                               3994                 :            720 :         my_extra->element_type = element_type;
                               3995                 :                :     }
                               3996                 :          12618 :     typlen = my_extra->typlen;
                               3997                 :          12618 :     typbyval = my_extra->typbyval;
                               3998                 :          12618 :     typalign = my_extra->typalign;
   41 tgl@sss.pgh.pa.us        3999                 :GNC       12618 :     typalignby = typalign_to_alignby(typalign);
                               4000                 :                : 
 7422 tgl@sss.pgh.pa.us        4001         [ +  + ]:CBC       12618 :     p = ARR_DATA_PTR(v);
                               4002         [ +  + ]:          12618 :     bitmap = ARR_NULLBITMAP(v);
                               4003                 :          12618 :     bitmask = 1;
                               4004                 :                : 
 8297                          4005         [ +  + ]:          42904 :     for (i = 0; i < nitems; i++)
                               4006                 :                :     {
                               4007                 :                :         Datum       itemvalue;
                               4008                 :                :         char       *value;
                               4009                 :                : 
                               4010                 :                :         /* Get source element, checking for NULL */
 7422                          4011   [ +  +  +  + ]:          30286 :         if (bitmap && (*bitmap & bitmask) == 0)
                               4012                 :                :         {
                               4013                 :                :             /* if null_string is NULL, we just ignore null elements */
 5696                          4014         [ +  + ]:              9 :             if (null_string != NULL)
                               4015                 :                :             {
                               4016         [ +  - ]:              3 :                 if (printed)
                               4017                 :              3 :                     appendStringInfo(&buf, "%s%s", fldsep, null_string);
                               4018                 :                :                 else
 5696 tgl@sss.pgh.pa.us        4019                 :UBC           0 :                     appendStringInfoString(&buf, null_string);
 5696 tgl@sss.pgh.pa.us        4020                 :CBC           3 :                 printed = true;
                               4021                 :                :             }
                               4022                 :                :         }
                               4023                 :                :         else
                               4024                 :                :         {
 7422                          4025                 :          30277 :             itemvalue = fetch_att(p, typbyval, typlen);
                               4026                 :                : 
 7285                          4027                 :          30277 :             value = OutputFunctionCall(&my_extra->proc, itemvalue);
                               4028                 :                : 
 7422                          4029         [ +  + ]:          30277 :             if (printed)
 7319 neilc@samurai.com        4030                 :          17659 :                 appendStringInfo(&buf, "%s%s", fldsep, value);
                               4031                 :                :             else
                               4032                 :          12618 :                 appendStringInfoString(&buf, value);
 7422 tgl@sss.pgh.pa.us        4033                 :          30277 :             printed = true;
                               4034                 :                : 
 6918                          4035   [ +  +  +  -  :          30277 :             p = att_addlength_pointer(p, typlen, p);
                                     -  -  -  -  -  
                                     -  -  -  -  +  
                                              -  - ]
   41 tgl@sss.pgh.pa.us        4036                 :GNC       30277 :             p = (char *) att_nominal_alignby(p, typalignby);
                               4037                 :                :         }
                               4038                 :                : 
                               4039                 :                :         /* advance bitmap pointer if any */
 7422 tgl@sss.pgh.pa.us        4040         [ +  + ]:CBC       30286 :         if (bitmap)
                               4041                 :                :         {
                               4042                 :             54 :             bitmask <<= 1;
                               4043         [ -  + ]:             54 :             if (bitmask == 0x100)
                               4044                 :                :             {
 7422 tgl@sss.pgh.pa.us        4045                 :UBC           0 :                 bitmap++;
                               4046                 :              0 :                 bitmask = 1;
                               4047                 :                :             }
                               4048                 :                :         }
                               4049                 :                :     }
                               4050                 :                : 
 5696 tgl@sss.pgh.pa.us        4051                 :CBC       12618 :     result = cstring_to_text_with_len(buf.data, buf.len);
                               4052                 :          12618 :     pfree(buf.data);
                               4053                 :                : 
                               4054                 :          12618 :     return result;
                               4055                 :                : }
                               4056                 :                : 
                               4057                 :                : /*
                               4058                 :                :  * Workhorse for to_bin, to_oct, and to_hex.  Note that base must be > 1 and <=
                               4059                 :                :  * 16.
                               4060                 :                :  */
                               4061                 :                : static inline text *
  935 nathan@postgresql.or     4062                 :          19375 : convert_to_base(uint64 value, int base)
                               4063                 :                : {
 8122 tgl@sss.pgh.pa.us        4064                 :          19375 :     const char *digits = "0123456789abcdef";
                               4065                 :                : 
                               4066                 :                :     /* We size the buffer for to_bin's longest possible return value. */
                               4067                 :                :     char        buf[sizeof(uint64) * BITS_PER_BYTE];
  935 nathan@postgresql.or     4068                 :          19375 :     char       *const end = buf + sizeof(buf);
                               4069                 :          19375 :     char       *ptr = end;
                               4070                 :                : 
                               4071         [ -  + ]:          19375 :     Assert(base > 1);
                               4072         [ -  + ]:          19375 :     Assert(base <= 16);
                               4073                 :                : 
                               4074                 :                :     do
                               4075                 :                :     {
                               4076                 :          37985 :         *--ptr = digits[value % base];
                               4077                 :          37985 :         value /= base;
 8606 bruce@momjian.us         4078   [ +  +  +  + ]:          37985 :     } while (ptr > buf && value);
                               4079                 :                : 
  935 nathan@postgresql.or     4080                 :          19375 :     return cstring_to_text_with_len(ptr, end - ptr);
                               4081                 :                : }
                               4082                 :                : 
                               4083                 :                : /*
                               4084                 :                :  * Convert an integer to a string containing a base-2 (binary) representation
                               4085                 :                :  * of the number.
                               4086                 :                :  */
                               4087                 :                : Datum
                               4088                 :              6 : to_bin32(PG_FUNCTION_ARGS)
                               4089                 :                : {
                               4090                 :              6 :     uint64      value = (uint32) PG_GETARG_INT32(0);
                               4091                 :                : 
                               4092                 :              6 :     PG_RETURN_TEXT_P(convert_to_base(value, 2));
                               4093                 :                : }
                               4094                 :                : Datum
                               4095                 :              6 : to_bin64(PG_FUNCTION_ARGS)
                               4096                 :                : {
                               4097                 :              6 :     uint64      value = (uint64) PG_GETARG_INT64(0);
                               4098                 :                : 
                               4099                 :              6 :     PG_RETURN_TEXT_P(convert_to_base(value, 2));
                               4100                 :                : }
                               4101                 :                : 
                               4102                 :                : /*
                               4103                 :                :  * Convert an integer to a string containing a base-8 (oct) representation of
                               4104                 :                :  * the number.
                               4105                 :                :  */
                               4106                 :                : Datum
                               4107                 :              6 : to_oct32(PG_FUNCTION_ARGS)
                               4108                 :                : {
                               4109                 :              6 :     uint64      value = (uint32) PG_GETARG_INT32(0);
                               4110                 :                : 
                               4111                 :              6 :     PG_RETURN_TEXT_P(convert_to_base(value, 8));
                               4112                 :                : }
                               4113                 :                : Datum
                               4114                 :              6 : to_oct64(PG_FUNCTION_ARGS)
                               4115                 :                : {
 8122 tgl@sss.pgh.pa.us        4116                 :              6 :     uint64      value = (uint64) PG_GETARG_INT64(0);
                               4117                 :                : 
  935 nathan@postgresql.or     4118                 :              6 :     PG_RETURN_TEXT_P(convert_to_base(value, 8));
                               4119                 :                : }
                               4120                 :                : 
                               4121                 :                : /*
                               4122                 :                :  * Convert an integer to a string containing a base-16 (hex) representation of
                               4123                 :                :  * the number.
                               4124                 :                :  */
                               4125                 :                : Datum
                               4126                 :          19345 : to_hex32(PG_FUNCTION_ARGS)
                               4127                 :                : {
                               4128                 :          19345 :     uint64      value = (uint32) PG_GETARG_INT32(0);
                               4129                 :                : 
                               4130                 :          19345 :     PG_RETURN_TEXT_P(convert_to_base(value, 16));
                               4131                 :                : }
                               4132                 :                : Datum
                               4133                 :              6 : to_hex64(PG_FUNCTION_ARGS)
                               4134                 :                : {
                               4135                 :              6 :     uint64      value = (uint64) PG_GETARG_INT64(0);
                               4136                 :                : 
                               4137                 :              6 :     PG_RETURN_TEXT_P(convert_to_base(value, 16));
                               4138                 :                : }
                               4139                 :                : 
                               4140                 :                : /*
                               4141                 :                :  * Return the size of a datum, possibly compressed
                               4142                 :                :  *
                               4143                 :                :  * Works on any data type
                               4144                 :                :  */
                               4145                 :                : Datum
 7557 bruce@momjian.us         4146                 :             61 : pg_column_size(PG_FUNCTION_ARGS)
                               4147                 :                : {
 7530 tgl@sss.pgh.pa.us        4148                 :             61 :     Datum       value = PG_GETARG_DATUM(0);
                               4149                 :                :     int32       result;
                               4150                 :                :     int         typlen;
                               4151                 :                : 
                               4152                 :                :     /* On first call, get the input type's typlen, and save at *fn_extra */
                               4153         [ +  - ]:             61 :     if (fcinfo->flinfo->fn_extra == NULL)
                               4154                 :                :     {
                               4155                 :                :         /* Lookup the datatype of the supplied argument */
 7456 bruce@momjian.us         4156                 :             61 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
                               4157                 :                : 
 7530 tgl@sss.pgh.pa.us        4158                 :             61 :         typlen = get_typlen(argtypeid);
                               4159         [ -  + ]:             61 :         if (typlen == 0)        /* should not happen */
 7556 bruce@momjian.us         4160         [ #  # ]:UBC           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
                               4161                 :                : 
 7557 bruce@momjian.us         4162                 :CBC          61 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                               4163                 :                :                                                       sizeof(int));
 7530 tgl@sss.pgh.pa.us        4164                 :             61 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
                               4165                 :                :     }
                               4166                 :                :     else
 7530 tgl@sss.pgh.pa.us        4167                 :UBC           0 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
                               4168                 :                : 
 7530 tgl@sss.pgh.pa.us        4169         [ +  - ]:CBC          61 :     if (typlen == -1)
                               4170                 :                :     {
                               4171                 :                :         /* varlena type, possibly toasted */
                               4172                 :             61 :         result = toast_datum_size(value);
                               4173                 :                :     }
 7530 tgl@sss.pgh.pa.us        4174         [ #  # ]:UBC           0 :     else if (typlen == -2)
                               4175                 :                :     {
                               4176                 :                :         /* cstring */
                               4177                 :              0 :         result = strlen(DatumGetCString(value)) + 1;
                               4178                 :                :     }
                               4179                 :                :     else
                               4180                 :                :     {
                               4181                 :                :         /* ordinary fixed-width type */
                               4182                 :              0 :         result = typlen;
                               4183                 :                :     }
                               4184                 :                : 
 7530 tgl@sss.pgh.pa.us        4185                 :CBC          61 :     PG_RETURN_INT32(result);
                               4186                 :                : }
                               4187                 :                : 
                               4188                 :                : /*
                               4189                 :                :  * Return the compression method stored in the compressed attribute.  Return
                               4190                 :                :  * NULL for non varlena type or uncompressed data.
                               4191                 :                :  */
                               4192                 :                : Datum
 1822 rhaas@postgresql.org     4193                 :             96 : pg_column_compression(PG_FUNCTION_ARGS)
                               4194                 :                : {
                               4195                 :                :     int         typlen;
                               4196                 :                :     char       *result;
                               4197                 :                :     ToastCompressionId cmid;
                               4198                 :                : 
                               4199                 :                :     /* On first call, get the input type's typlen, and save at *fn_extra */
                               4200         [ +  + ]:             96 :     if (fcinfo->flinfo->fn_extra == NULL)
                               4201                 :                :     {
                               4202                 :                :         /* Lookup the datatype of the supplied argument */
                               4203                 :             78 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
                               4204                 :                : 
                               4205                 :             78 :         typlen = get_typlen(argtypeid);
                               4206         [ -  + ]:             78 :         if (typlen == 0)        /* should not happen */
 1822 rhaas@postgresql.org     4207         [ #  # ]:UBC           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
                               4208                 :                : 
 1822 rhaas@postgresql.org     4209                 :CBC          78 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                               4210                 :                :                                                       sizeof(int));
                               4211                 :             78 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
                               4212                 :                :     }
                               4213                 :                :     else
                               4214                 :             18 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
                               4215                 :                : 
                               4216         [ -  + ]:             96 :     if (typlen != -1)
 1822 rhaas@postgresql.org     4217                 :UBC           0 :         PG_RETURN_NULL();
                               4218                 :                : 
                               4219                 :                :     /* get the compression method id stored in the compressed varlena */
   32 michael@paquier.xyz      4220                 :GNC          96 :     cmid = toast_get_compression_id((varlena *)
 1822 rhaas@postgresql.org     4221                 :CBC          96 :                                     DatumGetPointer(PG_GETARG_DATUM(0)));
                               4222         [ +  + ]:             96 :     if (cmid == TOAST_INVALID_COMPRESSION_ID)
                               4223                 :             21 :         PG_RETURN_NULL();
                               4224                 :                : 
                               4225                 :                :     /* convert compression method id to compression method name */
                               4226      [ +  +  - ]:             75 :     switch (cmid)
                               4227                 :                :     {
                               4228                 :             42 :         case TOAST_PGLZ_COMPRESSION_ID:
                               4229                 :             42 :             result = "pglz";
                               4230                 :             42 :             break;
                               4231                 :             33 :         case TOAST_LZ4_COMPRESSION_ID:
                               4232                 :             33 :             result = "lz4";
                               4233                 :             33 :             break;
 1822 rhaas@postgresql.org     4234                 :UBC           0 :         default:
                               4235         [ #  # ]:              0 :             elog(ERROR, "invalid compression method id %d", cmid);
                               4236                 :                :     }
                               4237                 :                : 
 1822 rhaas@postgresql.org     4238                 :CBC          75 :     PG_RETURN_TEXT_P(cstring_to_text(result));
                               4239                 :                : }
                               4240                 :                : 
                               4241                 :                : /*
                               4242                 :                :  * Return the chunk_id of the on-disk TOASTed value.  Return NULL if the value
                               4243                 :                :  * is un-TOASTed or not on-disk.
                               4244                 :                :  */
                               4245                 :                : Datum
  731 nathan@postgresql.or     4246                 :             26 : pg_column_toast_chunk_id(PG_FUNCTION_ARGS)
                               4247                 :                : {
                               4248                 :                :     int         typlen;
                               4249                 :                :     varlena    *attr;
                               4250                 :                :     varatt_external toast_pointer;
                               4251                 :                : 
                               4252                 :                :     /* On first call, get the input type's typlen, and save at *fn_extra */
                               4253         [ +  + ]:             26 :     if (fcinfo->flinfo->fn_extra == NULL)
                               4254                 :                :     {
                               4255                 :                :         /* Lookup the datatype of the supplied argument */
                               4256                 :             20 :         Oid         argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
                               4257                 :                : 
                               4258                 :             20 :         typlen = get_typlen(argtypeid);
                               4259         [ -  + ]:             20 :         if (typlen == 0)        /* should not happen */
  731 nathan@postgresql.or     4260         [ #  # ]:UBC           0 :             elog(ERROR, "cache lookup failed for type %u", argtypeid);
                               4261                 :                : 
  731 nathan@postgresql.or     4262                 :CBC          20 :         fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                               4263                 :                :                                                       sizeof(int));
                               4264                 :             20 :         *((int *) fcinfo->flinfo->fn_extra) = typlen;
                               4265                 :                :     }
                               4266                 :                :     else
  731 nathan@postgresql.or     4267                 :GBC           6 :         typlen = *((int *) fcinfo->flinfo->fn_extra);
                               4268                 :                : 
  731 nathan@postgresql.or     4269         [ -  + ]:CBC          26 :     if (typlen != -1)
  731 nathan@postgresql.or     4270                 :UBC           0 :         PG_RETURN_NULL();
                               4271                 :                : 
   32 michael@paquier.xyz      4272                 :GNC          26 :     attr = (varlena *) DatumGetPointer(PG_GETARG_DATUM(0));
                               4273                 :                : 
  731 nathan@postgresql.or     4274   [ +  +  -  + ]:CBC          26 :     if (!VARATT_IS_EXTERNAL_ONDISK(attr))
                               4275                 :              6 :         PG_RETURN_NULL();
                               4276                 :                : 
                               4277   [ -  +  -  +  :             20 :     VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
                                     +  -  -  +  -  
                                                 + ]
                               4278                 :                : 
                               4279                 :             20 :     PG_RETURN_OID(toast_pointer.va_valueid);
                               4280                 :                : }
                               4281                 :                : 
                               4282                 :                : /*
                               4283                 :                :  * string_agg - Concatenates values and returns string.
                               4284                 :                :  *
                               4285                 :                :  * Syntax: string_agg(value text, delimiter text) RETURNS text
                               4286                 :                :  *
                               4287                 :                :  * Note: Any NULL values are ignored. The first-call delimiter isn't
                               4288                 :                :  * actually used at all, and on subsequent calls the delimiter precedes
                               4289                 :                :  * the associated value.
                               4290                 :                :  */
                               4291                 :                : 
                               4292                 :                : /* subroutine to initialize state */
                               4293                 :                : static StringInfo
 5879 tgl@sss.pgh.pa.us        4294                 :           1217 : makeStringAggState(FunctionCallInfo fcinfo)
                               4295                 :                : {
                               4296                 :                :     StringInfo  state;
                               4297                 :                :     MemoryContext aggcontext;
                               4298                 :                :     MemoryContext oldcontext;
                               4299                 :                : 
                               4300         [ -  + ]:           1217 :     if (!AggCheckCallContext(fcinfo, &aggcontext))
                               4301                 :                :     {
                               4302                 :                :         /* cannot be called directly because of internal-type argument */
 5886 itagaki.takahiro@gma     4303         [ #  # ]:UBC           0 :         elog(ERROR, "string_agg_transfn called in non-aggregate context");
                               4304                 :                :     }
                               4305                 :                : 
                               4306                 :                :     /*
                               4307                 :                :      * Create state in aggregate context.  It'll stay there across subsequent
                               4308                 :                :      * calls.
                               4309                 :                :      */
 5886 itagaki.takahiro@gma     4310                 :CBC        1217 :     oldcontext = MemoryContextSwitchTo(aggcontext);
                               4311                 :           1217 :     state = makeStringInfo();
                               4312                 :           1217 :     MemoryContextSwitchTo(oldcontext);
                               4313                 :                : 
                               4314                 :           1217 :     return state;
                               4315                 :                : }
                               4316                 :                : 
                               4317                 :                : Datum
                               4318                 :         547573 : string_agg_transfn(PG_FUNCTION_ARGS)
                               4319                 :                : {
                               4320                 :                :     StringInfo  state;
                               4321                 :                : 
                               4322         [ +  + ]:         547573 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                               4323                 :                : 
                               4324                 :                :     /* Append the value unless null, preceding it with the delimiter. */
                               4325         [ +  + ]:         547573 :     if (!PG_ARGISNULL(1))
                               4326                 :                :     {
 1147 drowley@postgresql.o     4327                 :         540049 :         text       *value = PG_GETARG_TEXT_PP(1);
                               4328                 :         540049 :         bool        isfirst = false;
                               4329                 :                : 
                               4330                 :                :         /*
                               4331                 :                :          * You might think we can just throw away the first delimiter, however
                               4332                 :                :          * we must keep it as we may be a parallel worker doing partial
                               4333                 :                :          * aggregation building a state to send to the main process.  We need
                               4334                 :                :          * to keep the delimiter of every aggregation so that the combine
                               4335                 :                :          * function can properly join up the strings of two separately
                               4336                 :                :          * partially aggregated results.  The first delimiter is only stripped
                               4337                 :                :          * off in the final function.  To know how much to strip off the front
                               4338                 :                :          * of the string, we store the length of the first delimiter in the
                               4339                 :                :          * StringInfo's cursor field, which we don't otherwise need here.
                               4340                 :                :          */
 5886 itagaki.takahiro@gma     4341         [ +  + ]:         540049 :         if (state == NULL)
                               4342                 :                :         {
 5879 tgl@sss.pgh.pa.us        4343                 :           1037 :             state = makeStringAggState(fcinfo);
 1147 drowley@postgresql.o     4344                 :           1037 :             isfirst = true;
                               4345                 :                :         }
                               4346                 :                : 
                               4347         [ +  - ]:         540049 :         if (!PG_ARGISNULL(2))
                               4348                 :                :         {
                               4349                 :         540049 :             text       *delim = PG_GETARG_TEXT_PP(2);
                               4350                 :                : 
                               4351                 :         540049 :             appendStringInfoText(state, delim);
                               4352         [ +  + ]:         540049 :             if (isfirst)
                               4353   [ -  +  -  -  :           1037 :                 state->cursor = VARSIZE_ANY_EXHDR(delim);
                                     -  -  -  -  +  
                                                 + ]
                               4354                 :                :         }
                               4355                 :                : 
                               4356                 :         540049 :         appendStringInfoText(state, value);
                               4357                 :                :     }
                               4358                 :                : 
                               4359                 :                :     /*
                               4360                 :                :      * The transition type for string_agg() is declared to be "internal",
                               4361                 :                :      * which is a pass-by-value type the same size as a pointer.
                               4362                 :                :      */
                               4363         [ +  + ]:         547573 :     if (state)
                               4364                 :         547526 :         PG_RETURN_POINTER(state);
                               4365                 :             47 :     PG_RETURN_NULL();
                               4366                 :                : }
                               4367                 :                : 
                               4368                 :                : /*
                               4369                 :                :  * string_agg_combine
                               4370                 :                :  *      Aggregate combine function for string_agg(text) and string_agg(bytea)
                               4371                 :                :  */
                               4372                 :                : Datum
                               4373                 :            120 : string_agg_combine(PG_FUNCTION_ARGS)
                               4374                 :                : {
                               4375                 :                :     StringInfo  state1;
                               4376                 :                :     StringInfo  state2;
                               4377                 :                :     MemoryContext agg_context;
                               4378                 :                : 
                               4379         [ -  + ]:            120 :     if (!AggCheckCallContext(fcinfo, &agg_context))
 1147 drowley@postgresql.o     4380         [ #  # ]:UBC           0 :         elog(ERROR, "aggregate function called in non-aggregate context");
                               4381                 :                : 
 1147 drowley@postgresql.o     4382         [ +  + ]:CBC         120 :     state1 = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                               4383         [ +  - ]:            120 :     state2 = PG_ARGISNULL(1) ? NULL : (StringInfo) PG_GETARG_POINTER(1);
                               4384                 :                : 
                               4385         [ -  + ]:            120 :     if (state2 == NULL)
                               4386                 :                :     {
                               4387                 :                :         /*
                               4388                 :                :          * NULL state2 is easy, just return state1, which we know is already
                               4389                 :                :          * in the agg_context
                               4390                 :                :          */
 1147 drowley@postgresql.o     4391         [ #  # ]:UBC           0 :         if (state1 == NULL)
                               4392                 :              0 :             PG_RETURN_NULL();
                               4393                 :              0 :         PG_RETURN_POINTER(state1);
                               4394                 :                :     }
                               4395                 :                : 
 1147 drowley@postgresql.o     4396         [ +  + ]:CBC         120 :     if (state1 == NULL)
                               4397                 :                :     {
                               4398                 :                :         /* We must copy state2's data into the agg_context */
                               4399                 :                :         MemoryContext old_context;
                               4400                 :                : 
                               4401                 :             60 :         old_context = MemoryContextSwitchTo(agg_context);
                               4402                 :             60 :         state1 = makeStringAggState(fcinfo);
                               4403                 :             60 :         appendBinaryStringInfo(state1, state2->data, state2->len);
                               4404                 :             60 :         state1->cursor = state2->cursor;
                               4405                 :             60 :         MemoryContextSwitchTo(old_context);
                               4406                 :                :     }
                               4407         [ +  - ]:             60 :     else if (state2->len > 0)
                               4408                 :                :     {
                               4409                 :                :         /* Combine ... state1->cursor does not change in this case */
                               4410                 :             60 :         appendBinaryStringInfo(state1, state2->data, state2->len);
                               4411                 :                :     }
                               4412                 :                : 
                               4413                 :            120 :     PG_RETURN_POINTER(state1);
                               4414                 :                : }
                               4415                 :                : 
                               4416                 :                : /*
                               4417                 :                :  * string_agg_serialize
                               4418                 :                :  *      Aggregate serialize function for string_agg(text) and string_agg(bytea)
                               4419                 :                :  *
                               4420                 :                :  * This is strict, so we need not handle NULL input
                               4421                 :                :  */
                               4422                 :                : Datum
                               4423                 :            120 : string_agg_serialize(PG_FUNCTION_ARGS)
                               4424                 :                : {
                               4425                 :                :     StringInfo  state;
                               4426                 :                :     StringInfoData buf;
                               4427                 :                :     bytea      *result;
                               4428                 :                : 
                               4429                 :                :     /* cannot be called directly because of internal-type argument */
                               4430         [ -  + ]:            120 :     Assert(AggCheckCallContext(fcinfo, NULL));
                               4431                 :                : 
                               4432                 :            120 :     state = (StringInfo) PG_GETARG_POINTER(0);
                               4433                 :                : 
                               4434                 :            120 :     pq_begintypsend(&buf);
                               4435                 :                : 
                               4436                 :                :     /* cursor */
                               4437                 :            120 :     pq_sendint(&buf, state->cursor, 4);
                               4438                 :                : 
                               4439                 :                :     /* data */
                               4440                 :            120 :     pq_sendbytes(&buf, state->data, state->len);
                               4441                 :                : 
                               4442                 :            120 :     result = pq_endtypsend(&buf);
                               4443                 :                : 
                               4444                 :            120 :     PG_RETURN_BYTEA_P(result);
                               4445                 :                : }
                               4446                 :                : 
                               4447                 :                : /*
                               4448                 :                :  * string_agg_deserialize
                               4449                 :                :  *      Aggregate deserial function for string_agg(text) and string_agg(bytea)
                               4450                 :                :  *
                               4451                 :                :  * This is strict, so we need not handle NULL input
                               4452                 :                :  */
                               4453                 :                : Datum
                               4454                 :            120 : string_agg_deserialize(PG_FUNCTION_ARGS)
                               4455                 :                : {
                               4456                 :                :     bytea      *sstate;
                               4457                 :                :     StringInfo  result;
                               4458                 :                :     StringInfoData buf;
                               4459                 :                :     char       *data;
                               4460                 :                :     int         datalen;
                               4461                 :                : 
                               4462                 :                :     /* cannot be called directly because of internal-type argument */
                               4463         [ -  + ]:            120 :     Assert(AggCheckCallContext(fcinfo, NULL));
                               4464                 :                : 
                               4465                 :            120 :     sstate = PG_GETARG_BYTEA_PP(0);
                               4466                 :                : 
                               4467                 :                :     /*
                               4468                 :                :      * Initialize a StringInfo so that we can "receive" it using the standard
                               4469                 :                :      * recv-function infrastructure.
                               4470                 :                :      */
  870                          4471         [ -  + ]:            120 :     initReadOnlyStringInfo(&buf, VARDATA_ANY(sstate),
                               4472   [ -  +  -  -  :            120 :                            VARSIZE_ANY_EXHDR(sstate));
                                     -  -  -  -  -  
                                                 + ]
                               4473                 :                : 
 1147                          4474                 :            120 :     result = makeStringAggState(fcinfo);
                               4475                 :                : 
                               4476                 :                :     /* cursor */
                               4477                 :            120 :     result->cursor = pq_getmsgint(&buf, 4);
                               4478                 :                : 
                               4479                 :                :     /* data */
                               4480   [ -  +  -  -  :            120 :     datalen = VARSIZE_ANY_EXHDR(sstate) - 4;
                                     -  -  -  -  -  
                                                 + ]
                               4481                 :            120 :     data = (char *) pq_getmsgbytes(&buf, datalen);
                               4482                 :            120 :     appendBinaryStringInfo(result, data, datalen);
                               4483                 :                : 
                               4484                 :            120 :     pq_getmsgend(&buf);
                               4485                 :                : 
                               4486                 :            120 :     PG_RETURN_POINTER(result);
                               4487                 :                : }
                               4488                 :                : 
                               4489                 :                : Datum
 5886 itagaki.takahiro@gma     4490                 :           1049 : string_agg_finalfn(PG_FUNCTION_ARGS)
                               4491                 :                : {
                               4492                 :                :     StringInfo  state;
                               4493                 :                : 
                               4494                 :                :     /* cannot be called directly because of internal-type argument */
 5879 tgl@sss.pgh.pa.us        4495         [ -  + ]:           1049 :     Assert(AggCheckCallContext(fcinfo, NULL));
                               4496                 :                : 
                               4497         [ +  + ]:           1049 :     state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
                               4498                 :                : 
 5886 itagaki.takahiro@gma     4499         [ +  + ]:           1049 :     if (state != NULL)
                               4500                 :                :     {
                               4501                 :                :         /* As per comment in transfn, strip data before the cursor position */
 1147 drowley@postgresql.o     4502                 :           1007 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(&state->data[state->cursor],
                               4503                 :                :                                                   state->len - state->cursor));
                               4504                 :                :     }
                               4505                 :                :     else
 5886 itagaki.takahiro@gma     4506                 :             42 :         PG_RETURN_NULL();
                               4507                 :                : }
                               4508                 :                : 
                               4509                 :                : /*
                               4510                 :                :  * Prepare cache with fmgr info for the output functions of the datatypes of
                               4511                 :                :  * the arguments of a concat-like function, beginning with argument "argidx".
                               4512                 :                :  * (Arguments before that will have corresponding slots in the resulting
                               4513                 :                :  * FmgrInfo array, but we don't fill those slots.)
                               4514                 :                :  */
                               4515                 :                : static FmgrInfo *
 3099 tgl@sss.pgh.pa.us        4516                 :             53 : build_concat_foutcache(FunctionCallInfo fcinfo, int argidx)
                               4517                 :                : {
                               4518                 :                :     FmgrInfo   *foutcache;
                               4519                 :                :     int         i;
                               4520                 :                : 
                               4521                 :                :     /* We keep the info in fn_mcxt so it survives across calls */
                               4522                 :             53 :     foutcache = (FmgrInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
                               4523                 :             53 :                                                 PG_NARGS() * sizeof(FmgrInfo));
                               4524                 :                : 
                               4525         [ +  + ]:            200 :     for (i = argidx; i < PG_NARGS(); i++)
                               4526                 :                :     {
                               4527                 :                :         Oid         valtype;
                               4528                 :                :         Oid         typOutput;
                               4529                 :                :         bool        typIsVarlena;
                               4530                 :                : 
                               4531                 :            147 :         valtype = get_fn_expr_argtype(fcinfo->flinfo, i);
                               4532         [ -  + ]:            147 :         if (!OidIsValid(valtype))
 3099 tgl@sss.pgh.pa.us        4533         [ #  # ]:UBC           0 :             elog(ERROR, "could not determine data type of concat() input");
                               4534                 :                : 
 3099 tgl@sss.pgh.pa.us        4535                 :CBC         147 :         getTypeOutputInfo(valtype, &typOutput, &typIsVarlena);
                               4536                 :            147 :         fmgr_info_cxt(typOutput, &foutcache[i], fcinfo->flinfo->fn_mcxt);
                               4537                 :                :     }
                               4538                 :                : 
                               4539                 :             53 :     fcinfo->flinfo->fn_extra = foutcache;
                               4540                 :                : 
                               4541                 :             53 :     return foutcache;
                               4542                 :                : }
                               4543                 :                : 
                               4544                 :                : /*
                               4545                 :                :  * Implementation of both concat() and concat_ws().
                               4546                 :                :  *
                               4547                 :                :  * sepstr is the separator string to place between values.
                               4548                 :                :  * argidx identifies the first argument to concatenate (counting from zero);
                               4549                 :                :  * note that this must be constant across any one series of calls.
                               4550                 :                :  *
                               4551                 :                :  * Returns NULL if result should be NULL, else text value.
                               4552                 :                :  */
                               4553                 :                : static text *
 4797                          4554                 :            132 : concat_internal(const char *sepstr, int argidx,
                               4555                 :                :                 FunctionCallInfo fcinfo)
                               4556                 :                : {
                               4557                 :                :     text       *result;
                               4558                 :                :     StringInfoData str;
                               4559                 :                :     FmgrInfo   *foutcache;
 5312                          4560                 :            132 :     bool        first_arg = true;
                               4561                 :                :     int         i;
                               4562                 :                : 
                               4563                 :                :     /*
                               4564                 :                :      * concat(VARIADIC some-array) is essentially equivalent to
                               4565                 :                :      * array_to_text(), ie concat the array elements with the given separator.
                               4566                 :                :      * So we just pass the case off to that code.
                               4567                 :                :      */
 4797                          4568         [ +  + ]:            132 :     if (get_fn_expr_variadic(fcinfo->flinfo))
                               4569                 :                :     {
                               4570                 :                :         ArrayType  *arr;
                               4571                 :                : 
                               4572                 :                :         /* Should have just the one argument */
                               4573         [ -  + ]:             15 :         Assert(argidx == PG_NARGS() - 1);
                               4574                 :                : 
                               4575                 :                :         /* concat(VARIADIC NULL) is defined as NULL */
                               4576         [ +  + ]:             15 :         if (PG_ARGISNULL(argidx))
                               4577                 :              6 :             return NULL;
                               4578                 :                : 
                               4579                 :                :         /*
                               4580                 :                :          * Non-null argument had better be an array.  We assume that any call
                               4581                 :                :          * context that could let get_fn_expr_variadic return true will have
                               4582                 :                :          * checked that a VARIADIC-labeled parameter actually is an array.  So
                               4583                 :                :          * it should be okay to just Assert that it's an array rather than
                               4584                 :                :          * doing a full-fledged error check.
                               4585                 :                :          */
 4364                          4586         [ -  + ]:              9 :         Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, argidx))));
                               4587                 :                : 
                               4588                 :                :         /* OK, safe to fetch the array value */
 4797                          4589                 :              9 :         arr = PG_GETARG_ARRAYTYPE_P(argidx);
                               4590                 :                : 
                               4591                 :                :         /*
                               4592                 :                :          * And serialize the array.  We tell array_to_text to ignore null
                               4593                 :                :          * elements, which matches the behavior of the loop below.
                               4594                 :                :          */
                               4595                 :              9 :         return array_to_text_internal(fcinfo, arr, sepstr, NULL);
                               4596                 :                :     }
                               4597                 :                : 
                               4598                 :                :     /* Normal case without explicit VARIADIC marker */
 5682 itagaki.takahiro@gma     4599                 :            117 :     initStringInfo(&str);
                               4600                 :                : 
                               4601                 :                :     /* Get output function info, building it if first time through */
 3099 tgl@sss.pgh.pa.us        4602                 :            117 :     foutcache = (FmgrInfo *) fcinfo->flinfo->fn_extra;
                               4603         [ +  + ]:            117 :     if (foutcache == NULL)
                               4604                 :             53 :         foutcache = build_concat_foutcache(fcinfo, argidx);
                               4605                 :                : 
 5682 itagaki.takahiro@gma     4606         [ +  + ]:            411 :     for (i = argidx; i < PG_NARGS(); i++)
                               4607                 :                :     {
                               4608         [ +  + ]:            294 :         if (!PG_ARGISNULL(i))
                               4609                 :                :         {
 5312 tgl@sss.pgh.pa.us        4610                 :            255 :             Datum       value = PG_GETARG_DATUM(i);
                               4611                 :                : 
                               4612                 :                :             /* add separator if appropriate */
                               4613         [ +  + ]:            255 :             if (first_arg)
                               4614                 :            114 :                 first_arg = false;
                               4615                 :                :             else
 4797                          4616                 :            141 :                 appendStringInfoString(&str, sepstr);
                               4617                 :                : 
                               4618                 :                :             /* call the appropriate type output function, append the result */
 5682 itagaki.takahiro@gma     4619                 :            255 :             appendStringInfoString(&str,
 3099 tgl@sss.pgh.pa.us        4620                 :            255 :                                    OutputFunctionCall(&foutcache[i], value));
                               4621                 :                :         }
                               4622                 :                :     }
                               4623                 :                : 
 5682 itagaki.takahiro@gma     4624                 :            117 :     result = cstring_to_text_with_len(str.data, str.len);
                               4625                 :            117 :     pfree(str.data);
                               4626                 :                : 
                               4627                 :            117 :     return result;
                               4628                 :                : }
                               4629                 :                : 
                               4630                 :                : /*
                               4631                 :                :  * Concatenate all arguments. NULL arguments are ignored.
                               4632                 :                :  */
                               4633                 :                : Datum
                               4634                 :             93 : text_concat(PG_FUNCTION_ARGS)
                               4635                 :                : {
                               4636                 :                :     text       *result;
                               4637                 :                : 
 4797 tgl@sss.pgh.pa.us        4638                 :             93 :     result = concat_internal("", 0, fcinfo);
                               4639         [ +  + ]:             93 :     if (result == NULL)
                               4640                 :              3 :         PG_RETURN_NULL();
                               4641                 :             90 :     PG_RETURN_TEXT_P(result);
                               4642                 :                : }
                               4643                 :                : 
                               4644                 :                : /*
                               4645                 :                :  * Concatenate all but first argument value with separators. The first
                               4646                 :                :  * parameter is used as the separator. NULL arguments are ignored.
                               4647                 :                :  */
                               4648                 :                : Datum
 5682 itagaki.takahiro@gma     4649                 :             42 : text_concat_ws(PG_FUNCTION_ARGS)
                               4650                 :                : {
                               4651                 :                :     char       *sep;
                               4652                 :                :     text       *result;
                               4653                 :                : 
                               4654                 :                :     /* return NULL when separator is NULL */
                               4655         [ +  + ]:             42 :     if (PG_ARGISNULL(0))
                               4656                 :              3 :         PG_RETURN_NULL();
 4797 tgl@sss.pgh.pa.us        4657                 :             39 :     sep = text_to_cstring(PG_GETARG_TEXT_PP(0));
                               4658                 :                : 
                               4659                 :             39 :     result = concat_internal(sep, 1, fcinfo);
                               4660         [ +  + ]:             39 :     if (result == NULL)
                               4661                 :              3 :         PG_RETURN_NULL();
                               4662                 :             36 :     PG_RETURN_TEXT_P(result);
                               4663                 :                : }
                               4664                 :                : 
                               4665                 :                : /*
                               4666                 :                :  * Return first n characters in the string. When n is negative,
                               4667                 :                :  * return all but last |n| characters.
                               4668                 :                :  */
                               4669                 :                : Datum
 5682 itagaki.takahiro@gma     4670                 :           1074 : text_left(PG_FUNCTION_ARGS)
                               4671                 :                : {
 2489 tgl@sss.pgh.pa.us        4672                 :           1074 :     int         n = PG_GETARG_INT32(1);
                               4673                 :                : 
 5682 itagaki.takahiro@gma     4674         [ +  + ]:           1074 :     if (n < 0)
                               4675                 :                :     {
 2539 sfrost@snowman.net       4676                 :             15 :         text       *str = PG_GETARG_TEXT_PP(0);
                               4677         [ -  + ]:             15 :         const char *p = VARDATA_ANY(str);
                               4678   [ -  +  -  -  :             15 :         int         len = VARSIZE_ANY_EXHDR(str);
                                     -  -  -  -  -  
                                                 + ]
                               4679                 :                :         int         rlen;
                               4680                 :                : 
                               4681                 :             15 :         n = pg_mbstrlen_with_len(p, len) + n;
                               4682                 :             15 :         rlen = pg_mbcharcliplen(p, len, n);
                               4683                 :             15 :         PG_RETURN_TEXT_P(cstring_to_text_with_len(p, rlen));
                               4684                 :                :     }
                               4685                 :                :     else
                               4686                 :           1059 :         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0), 1, n, false));
                               4687                 :                : }
                               4688                 :                : 
                               4689                 :                : /*
                               4690                 :                :  * Return last n characters in the string. When n is negative,
                               4691                 :                :  * return all but first |n| characters.
                               4692                 :                :  */
                               4693                 :                : Datum
 5682 itagaki.takahiro@gma     4694                 :             33 : text_right(PG_FUNCTION_ARGS)
                               4695                 :                : {
                               4696                 :             33 :     text       *str = PG_GETARG_TEXT_PP(0);
                               4697         [ -  + ]:             33 :     const char *p = VARDATA_ANY(str);
                               4698   [ -  +  -  -  :             33 :     int         len = VARSIZE_ANY_EXHDR(str);
                                     -  -  -  -  -  
                                                 + ]
                               4699                 :             33 :     int         n = PG_GETARG_INT32(1);
                               4700                 :                :     int         off;
                               4701                 :                : 
                               4702         [ +  + ]:             33 :     if (n < 0)
                               4703                 :             15 :         n = -n;
                               4704                 :                :     else
                               4705                 :             18 :         n = pg_mbstrlen_with_len(p, len) - n;
                               4706                 :             33 :     off = pg_mbcharcliplen(p, len, n);
                               4707                 :                : 
                               4708                 :             33 :     PG_RETURN_TEXT_P(cstring_to_text_with_len(p + off, len - off));
                               4709                 :                : }
                               4710                 :                : 
                               4711                 :                : /*
                               4712                 :                :  * Return reversed string
                               4713                 :                :  */
                               4714                 :                : Datum
                               4715                 :             21 : text_reverse(PG_FUNCTION_ARGS)
                               4716                 :                : {
 5453 bruce@momjian.us         4717                 :             21 :     text       *str = PG_GETARG_TEXT_PP(0);
                               4718         [ +  + ]:             21 :     const char *p = VARDATA_ANY(str);
                               4719   [ -  +  -  -  :             21 :     int         len = VARSIZE_ANY_EXHDR(str);
                                     -  -  -  -  +  
                                                 + ]
                               4720                 :             21 :     const char *endp = p + len;
                               4721                 :                :     text       *result;
                               4722                 :                :     char       *dst;
                               4723                 :                : 
 5682 itagaki.takahiro@gma     4724                 :             21 :     result = palloc(len + VARHDRSZ);
 5453 bruce@momjian.us         4725                 :             21 :     dst = (char *) VARDATA(result) + len;
 5682 itagaki.takahiro@gma     4726                 :             21 :     SET_VARSIZE(result, len + VARHDRSZ);
                               4727                 :                : 
                               4728         [ +  - ]:             21 :     if (pg_database_encoding_max_length() > 1)
                               4729                 :                :     {
                               4730                 :                :         /* multibyte version */
                               4731         [ +  + ]:            162 :         while (p < endp)
                               4732                 :                :         {
                               4733                 :                :             int         sz;
                               4734                 :                : 
   67 tmunro@postgresql.or     4735                 :            144 :             sz = pg_mblen_range(p, endp);
 5682 itagaki.takahiro@gma     4736                 :            141 :             dst -= sz;
                               4737                 :            141 :             memcpy(dst, p, sz);
                               4738                 :            141 :             p += sz;
                               4739                 :                :         }
                               4740                 :                :     }
                               4741                 :                :     else
                               4742                 :                :     {
                               4743                 :                :         /* single byte version */
 5682 itagaki.takahiro@gma     4744         [ #  # ]:UBC           0 :         while (p < endp)
                               4745                 :              0 :             *(--dst) = *p++;
                               4746                 :                :     }
                               4747                 :                : 
 5682 itagaki.takahiro@gma     4748                 :CBC          18 :     PG_RETURN_TEXT_P(result);
                               4749                 :                : }
                               4750                 :                : 
                               4751                 :                : 
                               4752                 :                : /*
                               4753                 :                :  * Support macros for text_format()
                               4754                 :                :  */
                               4755                 :                : #define TEXT_FORMAT_FLAG_MINUS  0x0001  /* is minus flag present? */
                               4756                 :                : 
                               4757                 :                : #define ADVANCE_PARSE_POINTER(ptr,end_ptr) \
                               4758                 :                :     do { \
                               4759                 :                :         if (++(ptr) >= (end_ptr)) \
                               4760                 :                :             ereport(ERROR, \
                               4761                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE), \
                               4762                 :                :                      errmsg("unterminated format() type specifier"), \
                               4763                 :                :                      errhint("For a single \"%%\" use \"%%%%\"."))); \
                               4764                 :                :     } while (0)
                               4765                 :                : 
                               4766                 :                : /*
                               4767                 :                :  * Returns a formatted string
                               4768                 :                :  */
                               4769                 :                : Datum
 5594 rhaas@postgresql.org     4770                 :          16600 : text_format(PG_FUNCTION_ARGS)
                               4771                 :                : {
                               4772                 :                :     text       *fmt;
                               4773                 :                :     StringInfoData str;
                               4774                 :                :     const char *cp;
                               4775                 :                :     const char *start_ptr;
                               4776                 :                :     const char *end_ptr;
                               4777                 :                :     text       *result;
                               4778                 :                :     int         arg;
                               4779                 :                :     bool        funcvariadic;
                               4780                 :                :     int         nargs;
 4797 tgl@sss.pgh.pa.us        4781                 :          16600 :     Datum      *elements = NULL;
                               4782                 :          16600 :     bool       *nulls = NULL;
                               4783                 :          16600 :     Oid         element_type = InvalidOid;
                               4784                 :          16600 :     Oid         prev_type = InvalidOid;
 4749                          4785                 :          16600 :     Oid         prev_width_type = InvalidOid;
                               4786                 :                :     FmgrInfo    typoutputfinfo;
                               4787                 :                :     FmgrInfo    typoutputinfo_width;
                               4788                 :                : 
                               4789                 :                :     /* When format string is null, immediately return null */
 5594 rhaas@postgresql.org     4790         [ +  + ]:          16600 :     if (PG_ARGISNULL(0))
                               4791                 :              3 :         PG_RETURN_NULL();
                               4792                 :                : 
                               4793                 :                :     /* If argument is marked VARIADIC, expand array into elements */
 4797 tgl@sss.pgh.pa.us        4794         [ +  + ]:          16597 :     if (get_fn_expr_variadic(fcinfo->flinfo))
                               4795                 :                :     {
                               4796                 :                :         ArrayType  *arr;
                               4797                 :                :         int16       elmlen;
                               4798                 :                :         bool        elmbyval;
                               4799                 :                :         char        elmalign;
                               4800                 :                :         int         nitems;
                               4801                 :                : 
                               4802                 :                :         /* Should have just the one argument */
                               4803         [ -  + ]:             24 :         Assert(PG_NARGS() == 2);
                               4804                 :                : 
                               4805                 :                :         /* If argument is NULL, we treat it as zero-length array */
                               4806         [ +  + ]:             24 :         if (PG_ARGISNULL(1))
                               4807                 :              3 :             nitems = 0;
                               4808                 :                :         else
                               4809                 :                :         {
                               4810                 :                :             /*
                               4811                 :                :              * Non-null argument had better be an array.  We assume that any
                               4812                 :                :              * call context that could let get_fn_expr_variadic return true
                               4813                 :                :              * will have checked that a VARIADIC-labeled parameter actually is
                               4814                 :                :              * an array.  So it should be okay to just Assert that it's an
                               4815                 :                :              * array rather than doing a full-fledged error check.
                               4816                 :                :              */
 4364                          4817         [ -  + ]:             21 :             Assert(OidIsValid(get_base_element_type(get_fn_expr_argtype(fcinfo->flinfo, 1))));
                               4818                 :                : 
                               4819                 :                :             /* OK, safe to fetch the array value */
 4797                          4820                 :             21 :             arr = PG_GETARG_ARRAYTYPE_P(1);
                               4821                 :                : 
                               4822                 :                :             /* Get info about array element type */
                               4823                 :             21 :             element_type = ARR_ELEMTYPE(arr);
                               4824                 :             21 :             get_typlenbyvalalign(element_type,
                               4825                 :                :                                  &elmlen, &elmbyval, &elmalign);
                               4826                 :                : 
                               4827                 :                :             /* Extract all array elements */
                               4828                 :             21 :             deconstruct_array(arr, element_type, elmlen, elmbyval, elmalign,
                               4829                 :                :                               &elements, &nulls, &nitems);
                               4830                 :                :         }
                               4831                 :                : 
                               4832                 :             24 :         nargs = nitems + 1;
                               4833                 :             24 :         funcvariadic = true;
                               4834                 :                :     }
                               4835                 :                :     else
                               4836                 :                :     {
                               4837                 :                :         /* Non-variadic case, we'll process the arguments individually */
                               4838                 :          16573 :         nargs = PG_NARGS();
                               4839                 :          16573 :         funcvariadic = false;
                               4840                 :                :     }
                               4841                 :                : 
                               4842                 :                :     /* Setup for main loop. */
 5594 rhaas@postgresql.org     4843                 :          16597 :     fmt = PG_GETARG_TEXT_PP(0);
                               4844         [ -  + ]:          16597 :     start_ptr = VARDATA_ANY(fmt);
                               4845   [ -  +  -  -  :          16597 :     end_ptr = start_ptr + VARSIZE_ANY_EXHDR(fmt);
                                     -  -  -  -  -  
                                                 + ]
                               4846                 :          16597 :     initStringInfo(&str);
 4749 tgl@sss.pgh.pa.us        4847                 :          16597 :     arg = 1;                    /* next argument position to print */
                               4848                 :                : 
                               4849                 :                :     /* Scan format string, looking for conversion specifiers. */
 5594 rhaas@postgresql.org     4850         [ +  + ]:         506500 :     for (cp = start_ptr; cp < end_ptr; cp++)
                               4851                 :                :     {
                               4852                 :                :         int         argpos;
                               4853                 :                :         int         widthpos;
                               4854                 :                :         int         flags;
                               4855                 :                :         int         width;
                               4856                 :                :         Datum       value;
                               4857                 :                :         bool        isNull;
                               4858                 :                :         Oid         typid;
                               4859                 :                : 
                               4860                 :                :         /*
                               4861                 :                :          * If it's not the start of a conversion specifier, just copy it to
                               4862                 :                :          * the output buffer.
                               4863                 :                :          */
                               4864         [ +  + ]:         489933 :         if (*cp != '%')
                               4865                 :                :         {
                               4866         [ -  + ]:         457007 :             appendStringInfoCharMacro(&str, *cp);
                               4867                 :         457016 :             continue;
                               4868                 :                :         }
                               4869                 :                : 
 4749 tgl@sss.pgh.pa.us        4870   [ -  +  -  - ]:          32926 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
                               4871                 :                : 
                               4872                 :                :         /* Easy case: %% outputs a single % */
 5594 rhaas@postgresql.org     4873         [ +  + ]:          32926 :         if (*cp == '%')
                               4874                 :                :         {
                               4875         [ -  + ]:              9 :             appendStringInfoCharMacro(&str, *cp);
                               4876                 :              9 :             continue;
                               4877                 :                :         }
                               4878                 :                : 
                               4879                 :                :         /* Parse the optional portions of the format specifier */
 4749 tgl@sss.pgh.pa.us        4880                 :          32917 :         cp = text_format_parse_format(cp, end_ptr,
                               4881                 :                :                                       &argpos, &widthpos,
                               4882                 :                :                                       &flags, &width);
                               4883                 :                : 
                               4884                 :                :         /*
                               4885                 :                :          * Next we should see the main conversion specifier.  Whether or not
                               4886                 :                :          * an argument position was present, it's known that at least one
                               4887                 :                :          * character remains in the string at this point.  Experience suggests
                               4888                 :                :          * that it's worth checking that that character is one of the expected
                               4889                 :                :          * ones before we try to fetch arguments, so as to produce the least
                               4890                 :                :          * confusing response to a mis-formatted specifier.
                               4891                 :                :          */
                               4892         [ +  + ]:          32905 :         if (strchr("sIL", *cp) == NULL)
                               4893         [ +  - ]:              3 :             ereport(ERROR,
                               4894                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               4895                 :                :                      errmsg("unrecognized format() type specifier \"%.*s\"",
                               4896                 :                :                             pg_mblen_range(cp, end_ptr), cp),
                               4897                 :                :                      errhint("For a single \"%%\" use \"%%%%\".")));
                               4898                 :                : 
                               4899                 :                :         /* If indirect width was specified, get its value */
                               4900         [ +  + ]:          32902 :         if (widthpos >= 0)
                               4901                 :                :         {
                               4902                 :                :             /* Collect the specified or next argument position */
                               4903         [ +  + ]:             21 :             if (widthpos > 0)
                               4904                 :             18 :                 arg = widthpos;
                               4905         [ -  + ]:             21 :             if (arg >= nargs)
 5410 heikki.linnakangas@i     4906         [ #  # ]:UBC           0 :                 ereport(ERROR,
                               4907                 :                :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               4908                 :                :                          errmsg("too few arguments for format()")));
                               4909                 :                : 
                               4910                 :                :             /* Get the value and type of the selected argument */
 4749 tgl@sss.pgh.pa.us        4911         [ +  - ]:CBC          21 :             if (!funcvariadic)
                               4912                 :                :             {
                               4913                 :             21 :                 value = PG_GETARG_DATUM(arg);
                               4914                 :             21 :                 isNull = PG_ARGISNULL(arg);
                               4915                 :             21 :                 typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
                               4916                 :                :             }
                               4917                 :                :             else
                               4918                 :                :             {
 4749 tgl@sss.pgh.pa.us        4919                 :UBC           0 :                 value = elements[arg - 1];
                               4920                 :              0 :                 isNull = nulls[arg - 1];
                               4921                 :              0 :                 typid = element_type;
                               4922                 :                :             }
 4749 tgl@sss.pgh.pa.us        4923         [ -  + ]:CBC          21 :             if (!OidIsValid(typid))
 4749 tgl@sss.pgh.pa.us        4924         [ #  # ]:UBC           0 :                 elog(ERROR, "could not determine data type of format() input");
                               4925                 :                : 
 4749 tgl@sss.pgh.pa.us        4926                 :CBC          21 :             arg++;
                               4927                 :                : 
                               4928                 :                :             /* We can treat NULL width the same as zero */
                               4929         [ +  + ]:             21 :             if (isNull)
                               4930                 :              3 :                 width = 0;
                               4931         [ +  - ]:             18 :             else if (typid == INT4OID)
                               4932                 :             18 :                 width = DatumGetInt32(value);
 4749 tgl@sss.pgh.pa.us        4933         [ #  # ]:UBC           0 :             else if (typid == INT2OID)
                               4934                 :              0 :                 width = DatumGetInt16(value);
                               4935                 :                :             else
                               4936                 :                :             {
                               4937                 :                :                 /* For less-usual datatypes, convert to text then to int */
                               4938                 :                :                 char       *str;
                               4939                 :                : 
                               4940         [ #  # ]:              0 :                 if (typid != prev_width_type)
                               4941                 :                :                 {
                               4942                 :                :                     Oid         typoutputfunc;
                               4943                 :                :                     bool        typIsVarlena;
                               4944                 :                : 
                               4945                 :              0 :                     getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
                               4946                 :              0 :                     fmgr_info(typoutputfunc, &typoutputinfo_width);
                               4947                 :              0 :                     prev_width_type = typid;
                               4948                 :                :                 }
                               4949                 :                : 
                               4950                 :              0 :                 str = OutputFunctionCall(&typoutputinfo_width, value);
                               4951                 :                : 
                               4952                 :                :                 /* pg_strtoint32 will complain about bad data or overflow */
 2793 andres@anarazel.de       4953                 :              0 :                 width = pg_strtoint32(str);
                               4954                 :                : 
 4749 tgl@sss.pgh.pa.us        4955                 :              0 :                 pfree(str);
                               4956                 :                :             }
                               4957                 :                :         }
                               4958                 :                : 
                               4959                 :                :         /* Collect the specified or next argument position */
 4749 tgl@sss.pgh.pa.us        4960         [ +  + ]:CBC       32902 :         if (argpos > 0)
                               4961                 :             66 :             arg = argpos;
                               4962         [ +  + ]:          32902 :         if (arg >= nargs)
 5594 rhaas@postgresql.org     4963         [ +  - ]:             12 :             ereport(ERROR,
                               4964                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               4965                 :                :                      errmsg("too few arguments for format()")));
                               4966                 :                : 
                               4967                 :                :         /* Get the value and type of the selected argument */
 4797 tgl@sss.pgh.pa.us        4968         [ +  + ]:          32890 :         if (!funcvariadic)
                               4969                 :                :         {
                               4970                 :          32254 :             value = PG_GETARG_DATUM(arg);
                               4971                 :          32254 :             isNull = PG_ARGISNULL(arg);
                               4972                 :          32254 :             typid = get_fn_expr_argtype(fcinfo->flinfo, arg);
                               4973                 :                :         }
                               4974                 :                :         else
                               4975                 :                :         {
                               4976                 :            636 :             value = elements[arg - 1];
                               4977                 :            636 :             isNull = nulls[arg - 1];
                               4978                 :            636 :             typid = element_type;
                               4979                 :                :         }
                               4980         [ -  + ]:          32890 :         if (!OidIsValid(typid))
 4797 tgl@sss.pgh.pa.us        4981         [ #  # ]:UBC           0 :             elog(ERROR, "could not determine data type of format() input");
                               4982                 :                : 
 4749 tgl@sss.pgh.pa.us        4983                 :CBC       32890 :         arg++;
                               4984                 :                : 
                               4985                 :                :         /*
                               4986                 :                :          * Get the appropriate typOutput function, reusing previous one if
                               4987                 :                :          * same type as previous argument.  That's particularly useful in the
                               4988                 :                :          * variadic-array case, but often saves work even for ordinary calls.
                               4989                 :                :          */
 4797                          4990         [ +  + ]:          32890 :         if (typid != prev_type)
                               4991                 :                :         {
                               4992                 :                :             Oid         typoutputfunc;
                               4993                 :                :             bool        typIsVarlena;
                               4994                 :                : 
                               4995                 :          17131 :             getTypeOutputInfo(typid, &typoutputfunc, &typIsVarlena);
                               4996                 :          17131 :             fmgr_info(typoutputfunc, &typoutputfinfo);
                               4997                 :          17131 :             prev_type = typid;
                               4998                 :                :         }
                               4999                 :                : 
                               5000                 :                :         /*
                               5001                 :                :          * And now we can format the value.
                               5002                 :                :          */
 5594 rhaas@postgresql.org     5003         [ +  - ]:          32890 :         switch (*cp)
                               5004                 :                :         {
                               5005                 :          32890 :             case 's':
                               5006                 :                :             case 'I':
                               5007                 :                :             case 'L':
 4797 tgl@sss.pgh.pa.us        5008                 :          32890 :                 text_format_string_conversion(&str, *cp, &typoutputfinfo,
                               5009                 :                :                                               value, isNull,
                               5010                 :                :                                               flags, width);
 5594 rhaas@postgresql.org     5011                 :          32887 :                 break;
 5594 rhaas@postgresql.org     5012                 :UBC           0 :             default:
                               5013                 :                :                 /* should not get here, because of previous check */
                               5014         [ #  # ]:              0 :                 ereport(ERROR,
                               5015                 :                :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5016                 :                :                          errmsg("unrecognized format() type specifier \"%.*s\"",
                               5017                 :                :                                 pg_mblen_range(cp, end_ptr), cp),
                               5018                 :                :                          errhint("For a single \"%%\" use \"%%%%\".")));
                               5019                 :                :                 break;
                               5020                 :                :         }
                               5021                 :                :     }
                               5022                 :                : 
                               5023                 :                :     /* Don't need deconstruct_array results anymore. */
 4797 tgl@sss.pgh.pa.us        5024         [ +  + ]:CBC       16567 :     if (elements != NULL)
                               5025                 :             21 :         pfree(elements);
                               5026         [ +  + ]:          16567 :     if (nulls != NULL)
                               5027                 :             21 :         pfree(nulls);
                               5028                 :                : 
                               5029                 :                :     /* Generate results. */
 5594 rhaas@postgresql.org     5030                 :          16567 :     result = cstring_to_text_with_len(str.data, str.len);
                               5031                 :          16567 :     pfree(str.data);
                               5032                 :                : 
                               5033                 :          16567 :     PG_RETURN_TEXT_P(result);
                               5034                 :                : }
                               5035                 :                : 
                               5036                 :                : /*
                               5037                 :                :  * Parse contiguous digits as a decimal number.
                               5038                 :                :  *
                               5039                 :                :  * Returns true if some digits could be parsed.
                               5040                 :                :  * The value is returned into *value, and *ptr is advanced to the next
                               5041                 :                :  * character to be parsed.
                               5042                 :                :  *
                               5043                 :                :  * Note parsing invariant: at least one character is known available before
                               5044                 :                :  * string end (end_ptr) at entry, and this is still true at exit.
                               5045                 :                :  */
                               5046                 :                : static bool
 4749 tgl@sss.pgh.pa.us        5047                 :          65816 : text_format_parse_digits(const char **ptr, const char *end_ptr, int *value)
                               5048                 :                : {
                               5049                 :          65816 :     bool        found = false;
                               5050                 :          65816 :     const char *cp = *ptr;
                               5051                 :          65816 :     int         val = 0;
                               5052                 :                : 
                               5053   [ +  +  +  + ]:          65972 :     while (*cp >= '0' && *cp <= '9')
                               5054                 :                :     {
 3015 andres@anarazel.de       5055                 :            159 :         int8        digit = (*cp - '0');
                               5056                 :                : 
                               5057         [ +  - ]:            159 :         if (unlikely(pg_mul_s32_overflow(val, 10, &val)) ||
                               5058         [ -  + ]:            159 :             unlikely(pg_add_s32_overflow(val, digit, &val)))
 4749 tgl@sss.pgh.pa.us        5059         [ #  # ]:UBC           0 :             ereport(ERROR,
                               5060                 :                :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                               5061                 :                :                      errmsg("number is out of range")));
 4749 tgl@sss.pgh.pa.us        5062   [ +  +  +  - ]:CBC         159 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5063                 :            156 :         found = true;
                               5064                 :                :     }
                               5065                 :                : 
                               5066                 :          65813 :     *ptr = cp;
                               5067                 :          65813 :     *value = val;
                               5068                 :                : 
                               5069                 :          65813 :     return found;
                               5070                 :                : }
                               5071                 :                : 
                               5072                 :                : /*
                               5073                 :                :  * Parse a format specifier (generally following the SUS printf spec).
                               5074                 :                :  *
                               5075                 :                :  * We have already advanced over the initial '%', and we are looking for
                               5076                 :                :  * [argpos][flags][width]type (but the type character is not consumed here).
                               5077                 :                :  *
                               5078                 :                :  * Inputs are start_ptr (the position after '%') and end_ptr (string end + 1).
                               5079                 :                :  * Output parameters:
                               5080                 :                :  *  argpos: argument position for value to be printed.  -1 means unspecified.
                               5081                 :                :  *  widthpos: argument position for width.  Zero means the argument position
                               5082                 :                :  *          was unspecified (ie, take the next arg) and -1 means no width
                               5083                 :                :  *          argument (width was omitted or specified as a constant).
                               5084                 :                :  *  flags: bitmask of flags.
                               5085                 :                :  *  width: directly-specified width value.  Zero means the width was omitted
                               5086                 :                :  *          (note it's not necessary to distinguish this case from an explicit
                               5087                 :                :  *          zero width value).
                               5088                 :                :  *
                               5089                 :                :  * The function result is the next character position to be parsed, ie, the
                               5090                 :                :  * location where the type character is/should be.
                               5091                 :                :  *
                               5092                 :                :  * Note parsing invariant: at least one character is known available before
                               5093                 :                :  * string end (end_ptr) at entry, and this is still true at exit.
                               5094                 :                :  */
                               5095                 :                : static const char *
                               5096                 :          32917 : text_format_parse_format(const char *start_ptr, const char *end_ptr,
                               5097                 :                :                          int *argpos, int *widthpos,
                               5098                 :                :                          int *flags, int *width)
                               5099                 :                : {
                               5100                 :          32917 :     const char *cp = start_ptr;
                               5101                 :                :     int         n;
                               5102                 :                : 
                               5103                 :                :     /* set defaults for output parameters */
                               5104                 :          32917 :     *argpos = -1;
                               5105                 :          32917 :     *widthpos = -1;
                               5106                 :          32917 :     *flags = 0;
                               5107                 :          32917 :     *width = 0;
                               5108                 :                : 
                               5109                 :                :     /* try to identify first number */
                               5110         [ +  + ]:          32917 :     if (text_format_parse_digits(&cp, end_ptr, &n))
                               5111                 :                :     {
                               5112         [ +  + ]:             87 :         if (*cp != '$')
                               5113                 :                :         {
                               5114                 :                :             /* Must be just a width and a type, so we're done */
                               5115                 :             12 :             *width = n;
                               5116                 :             12 :             return cp;
                               5117                 :                :         }
                               5118                 :                :         /* The number was argument position */
                               5119                 :             75 :         *argpos = n;
                               5120                 :                :         /* Explicit 0 for argument index is immediately refused */
                               5121         [ +  + ]:             75 :         if (n == 0)
                               5122         [ +  - ]:              3 :             ereport(ERROR,
                               5123                 :                :                     (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5124                 :                :                      errmsg("format specifies argument 0, but arguments are numbered from 1")));
                               5125   [ +  +  +  - ]:             72 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5126                 :                :     }
                               5127                 :                : 
                               5128                 :                :     /* Handle flags (only minus is supported now) */
                               5129         [ +  + ]:          32914 :     while (*cp == '-')
                               5130                 :                :     {
                               5131                 :             15 :         *flags |= TEXT_FORMAT_FLAG_MINUS;
                               5132   [ -  +  -  - ]:             15 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5133                 :                :     }
                               5134                 :                : 
                               5135         [ +  + ]:          32899 :     if (*cp == '*')
                               5136                 :                :     {
                               5137                 :                :         /* Handle indirect width */
                               5138   [ -  +  -  - ]:             24 :         ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5139         [ +  + ]:             24 :         if (text_format_parse_digits(&cp, end_ptr, &n))
                               5140                 :                :         {
                               5141                 :                :             /* number in this position must be closed by $ */
                               5142         [ -  + ]:             21 :             if (*cp != '$')
 4749 tgl@sss.pgh.pa.us        5143         [ #  # ]:UBC           0 :                 ereport(ERROR,
                               5144                 :                :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5145                 :                :                          errmsg("width argument position must be ended by \"$\"")));
                               5146                 :                :             /* The number was width argument position */
 4749 tgl@sss.pgh.pa.us        5147                 :CBC          21 :             *widthpos = n;
                               5148                 :                :             /* Explicit 0 for argument index is immediately refused */
                               5149         [ +  + ]:             21 :             if (n == 0)
                               5150         [ +  - ]:              3 :                 ereport(ERROR,
                               5151                 :                :                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5152                 :                :                          errmsg("format specifies argument 0, but arguments are numbered from 1")));
                               5153   [ -  +  -  - ]:             18 :             ADVANCE_PARSE_POINTER(cp, end_ptr);
                               5154                 :                :         }
                               5155                 :                :         else
                               5156                 :              3 :             *widthpos = 0;      /* width's argument position is unspecified */
                               5157                 :                :     }
                               5158                 :                :     else
                               5159                 :                :     {
                               5160                 :                :         /* Check for direct width specification */
                               5161         [ +  + ]:          32875 :         if (text_format_parse_digits(&cp, end_ptr, &n))
                               5162                 :             15 :             *width = n;
                               5163                 :                :     }
                               5164                 :                : 
                               5165                 :                :     /* cp should now be pointing at type character */
                               5166                 :          32893 :     return cp;
                               5167                 :                : }
                               5168                 :                : 
                               5169                 :                : /*
                               5170                 :                :  * Format a %s, %I, or %L conversion
                               5171                 :                :  */
                               5172                 :                : static void
 5594 rhaas@postgresql.org     5173                 :          32890 : text_format_string_conversion(StringInfo buf, char conversion,
                               5174                 :                :                               FmgrInfo *typOutputInfo,
                               5175                 :                :                               Datum value, bool isNull,
                               5176                 :                :                               int flags, int width)
                               5177                 :                : {
                               5178                 :                :     char       *str;
                               5179                 :                : 
                               5180                 :                :     /* Handle NULL arguments before trying to stringify the value. */
                               5181         [ +  + ]:          32890 :     if (isNull)
                               5182                 :                :     {
 4749 tgl@sss.pgh.pa.us        5183         [ +  + ]:            171 :         if (conversion == 's')
                               5184                 :            135 :             text_format_append_string(buf, "", flags, width);
                               5185         [ +  + ]:             36 :         else if (conversion == 'L')
                               5186                 :             33 :             text_format_append_string(buf, "NULL", flags, width);
 5594 rhaas@postgresql.org     5187         [ +  - ]:              3 :         else if (conversion == 'I')
 5453 bruce@momjian.us         5188         [ +  - ]:              3 :             ereport(ERROR,
                               5189                 :                :                     (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
                               5190                 :                :                      errmsg("null values cannot be formatted as an SQL identifier")));
 5594 rhaas@postgresql.org     5191                 :            168 :         return;
                               5192                 :                :     }
                               5193                 :                : 
                               5194                 :                :     /* Stringify. */
 4797 tgl@sss.pgh.pa.us        5195                 :          32719 :     str = OutputFunctionCall(typOutputInfo, value);
                               5196                 :                : 
                               5197                 :                :     /* Escape. */
 5594 rhaas@postgresql.org     5198         [ +  + ]:          32719 :     if (conversion == 'I')
                               5199                 :                :     {
                               5200                 :                :         /* quote_identifier may or may not allocate a new string. */
 4749 tgl@sss.pgh.pa.us        5201                 :           2453 :         text_format_append_string(buf, quote_identifier(str), flags, width);
                               5202                 :                :     }
 5594 rhaas@postgresql.org     5203         [ +  + ]:          30266 :     else if (conversion == 'L')
                               5204                 :                :     {
 5453 bruce@momjian.us         5205                 :           1620 :         char       *qstr = quote_literal_cstr(str);
                               5206                 :                : 
 4749 tgl@sss.pgh.pa.us        5207                 :           1620 :         text_format_append_string(buf, qstr, flags, width);
                               5208                 :                :         /* quote_literal_cstr() always allocates a new string */
 5594 rhaas@postgresql.org     5209                 :           1620 :         pfree(qstr);
                               5210                 :                :     }
                               5211                 :                :     else
 4749 tgl@sss.pgh.pa.us        5212                 :          28646 :         text_format_append_string(buf, str, flags, width);
                               5213                 :                : 
                               5214                 :                :     /* Cleanup. */
 5594 rhaas@postgresql.org     5215                 :          32719 :     pfree(str);
                               5216                 :                : }
                               5217                 :                : 
                               5218                 :                : /*
                               5219                 :                :  * Append str to buf, padding as directed by flags/width
                               5220                 :                :  */
                               5221                 :                : static void
 4749 tgl@sss.pgh.pa.us        5222                 :          32887 : text_format_append_string(StringInfo buf, const char *str,
                               5223                 :                :                           int flags, int width)
                               5224                 :                : {
                               5225                 :          32887 :     bool        align_to_left = false;
                               5226                 :                :     int         len;
                               5227                 :                : 
                               5228                 :                :     /* fast path for typical easy case */
                               5229         [ +  + ]:          32887 :     if (width == 0)
                               5230                 :                :     {
                               5231                 :          32845 :         appendStringInfoString(buf, str);
                               5232                 :          32845 :         return;
                               5233                 :                :     }
                               5234                 :                : 
                               5235         [ +  + ]:             42 :     if (width < 0)
                               5236                 :                :     {
                               5237                 :                :         /* Negative width: implicit '-' flag, then take absolute value */
                               5238                 :              3 :         align_to_left = true;
                               5239                 :                :         /* -INT_MIN is undefined */
                               5240         [ -  + ]:              3 :         if (width <= INT_MIN)
 4749 tgl@sss.pgh.pa.us        5241         [ #  # ]:UBC           0 :             ereport(ERROR,
                               5242                 :                :                     (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
                               5243                 :                :                      errmsg("number is out of range")));
 4749 tgl@sss.pgh.pa.us        5244                 :CBC           3 :         width = -width;
                               5245                 :                :     }
                               5246         [ +  + ]:             39 :     else if (flags & TEXT_FORMAT_FLAG_MINUS)
                               5247                 :             12 :         align_to_left = true;
                               5248                 :                : 
                               5249                 :             42 :     len = pg_mbstrlen(str);
                               5250         [ +  + ]:             42 :     if (align_to_left)
                               5251                 :                :     {
                               5252                 :                :         /* left justify */
                               5253                 :             15 :         appendStringInfoString(buf, str);
                               5254         [ +  - ]:             15 :         if (len < width)
                               5255                 :             15 :             appendStringInfoSpaces(buf, width - len);
                               5256                 :                :     }
                               5257                 :                :     else
                               5258                 :                :     {
                               5259                 :                :         /* right justify */
                               5260         [ +  - ]:             27 :         if (len < width)
                               5261                 :             27 :             appendStringInfoSpaces(buf, width - len);
                               5262                 :             27 :         appendStringInfoString(buf, str);
                               5263                 :                :     }
                               5264                 :                : }
                               5265                 :                : 
                               5266                 :                : /*
                               5267                 :                :  * text_format_nv - nonvariadic wrapper for text_format function.
                               5268                 :                :  *
                               5269                 :                :  * note: this wrapper is necessary to pass the sanity check in opr_sanity,
                               5270                 :                :  * which checks that all built-in functions that share the implementing C
                               5271                 :                :  * function take the same number of arguments.
                               5272                 :                :  */
                               5273                 :                : Datum
 5594 rhaas@postgresql.org     5274                 :           1905 : text_format_nv(PG_FUNCTION_ARGS)
                               5275                 :                : {
                               5276                 :           1905 :     return text_format(fcinfo);
                               5277                 :                : }
                               5278                 :                : 
                               5279                 :                : /*
                               5280                 :                :  * Helper function for Levenshtein distance functions. Faster than memcmp(),
                               5281                 :                :  * for this use case.
                               5282                 :                :  */
                               5283                 :                : static inline bool
 4140 rhaas@postgresql.org     5284                 :UBC           0 : rest_of_char_same(const char *s1, const char *s2, int len)
                               5285                 :                : {
                               5286         [ #  # ]:              0 :     while (len > 0)
                               5287                 :                :     {
                               5288                 :              0 :         len--;
                               5289         [ #  # ]:              0 :         if (s1[len] != s2[len])
                               5290                 :              0 :             return false;
                               5291                 :                :     }
                               5292                 :              0 :     return true;
                               5293                 :                : }
                               5294                 :                : 
                               5295                 :                : /* Expand each Levenshtein distance variant */
                               5296                 :                : #include "levenshtein.c"
                               5297                 :                : #define LEVENSHTEIN_LESS_EQUAL
                               5298                 :                : #include "levenshtein.c"
                               5299                 :                : 
                               5300                 :                : 
                               5301                 :                : /*
                               5302                 :                :  * The following *ClosestMatch() functions can be used to determine whether a
                               5303                 :                :  * user-provided string resembles any known valid values, which is useful for
                               5304                 :                :  * providing hints in log messages, among other things.  Use these functions
                               5305                 :                :  * like so:
                               5306                 :                :  *
                               5307                 :                :  *      initClosestMatch(&state, source_string, max_distance);
                               5308                 :                :  *
                               5309                 :                :  *      for (int i = 0; i < num_valid_strings; i++)
                               5310                 :                :  *          updateClosestMatch(&state, valid_strings[i]);
                               5311                 :                :  *
                               5312                 :                :  *      closestMatch = getClosestMatch(&state);
                               5313                 :                :  */
                               5314                 :                : 
                               5315                 :                : /*
                               5316                 :                :  * Initialize the given state with the source string and maximum Levenshtein
                               5317                 :                :  * distance to consider.
                               5318                 :                :  */
                               5319                 :                : void
 1276 peter@eisentraut.org     5320                 :CBC          39 : initClosestMatch(ClosestMatchState *state, const char *source, int max_d)
                               5321                 :                : {
                               5322         [ -  + ]:             39 :     Assert(state);
                               5323         [ -  + ]:             39 :     Assert(max_d >= 0);
                               5324                 :                : 
                               5325                 :             39 :     state->source = source;
                               5326                 :             39 :     state->min_d = -1;
                               5327                 :             39 :     state->max_d = max_d;
                               5328                 :             39 :     state->match = NULL;
                               5329                 :             39 : }
                               5330                 :                : 
                               5331                 :                : /*
                               5332                 :                :  * If the candidate string is a closer match than the current one saved (or
                               5333                 :                :  * there is no match saved), save it as the closest match.
                               5334                 :                :  *
                               5335                 :                :  * If the source or candidate string is NULL, empty, or too long, this function
                               5336                 :                :  * takes no action.  Likewise, if the Levenshtein distance exceeds the maximum
                               5337                 :                :  * allowed or more than half the characters are different, no action is taken.
                               5338                 :                :  */
                               5339                 :                : void
                               5340                 :            402 : updateClosestMatch(ClosestMatchState *state, const char *candidate)
                               5341                 :                : {
                               5342                 :                :     int         dist;
                               5343                 :                : 
                               5344         [ -  + ]:            402 :     Assert(state);
                               5345                 :                : 
                               5346   [ +  -  +  -  :            402 :     if (state->source == NULL || state->source[0] == '\0' ||
                                              +  - ]
                               5347         [ -  + ]:            402 :         candidate == NULL || candidate[0] == '\0')
 1276 peter@eisentraut.org     5348                 :UBC           0 :         return;
                               5349                 :                : 
                               5350                 :                :     /*
                               5351                 :                :      * To avoid ERROR-ing, we check the lengths here instead of setting
                               5352                 :                :      * 'trusted' to false in the call to varstr_levenshtein_less_equal().
                               5353                 :                :      */
 1276 peter@eisentraut.org     5354         [ +  - ]:CBC         402 :     if (strlen(state->source) > MAX_LEVENSHTEIN_STRLEN ||
                               5355         [ -  + ]:            402 :         strlen(candidate) > MAX_LEVENSHTEIN_STRLEN)
 1276 peter@eisentraut.org     5356                 :UBC           0 :         return;
                               5357                 :                : 
 1276 peter@eisentraut.org     5358                 :CBC         402 :     dist = varstr_levenshtein_less_equal(state->source, strlen(state->source),
                               5359                 :            402 :                                          candidate, strlen(candidate), 1, 1, 1,
                               5360                 :                :                                          state->max_d, true);
                               5361         [ +  + ]:            402 :     if (dist <= state->max_d &&
                               5362         [ +  + ]:             31 :         dist <= strlen(state->source) / 2 &&
                               5363   [ -  +  -  - ]:              7 :         (state->min_d == -1 || dist < state->min_d))
                               5364                 :                :     {
                               5365                 :              7 :         state->min_d = dist;
                               5366                 :              7 :         state->match = candidate;
                               5367                 :                :     }
                               5368                 :                : }
                               5369                 :                : 
                               5370                 :                : /*
                               5371                 :                :  * Return the closest match.  If no suitable candidates were provided via
                               5372                 :                :  * updateClosestMatch(), return NULL.
                               5373                 :                :  */
                               5374                 :                : const char *
                               5375                 :             39 : getClosestMatch(ClosestMatchState *state)
                               5376                 :                : {
                               5377         [ -  + ]:             39 :     Assert(state);
                               5378                 :                : 
                               5379                 :             39 :     return state->match;
                               5380                 :                : }
                               5381                 :                : 
                               5382                 :                : 
                               5383                 :                : /*
                               5384                 :                :  * Unicode support
                               5385                 :                :  */
                               5386                 :                : 
                               5387                 :                : static UnicodeNormalizationForm
 2180                          5388                 :            105 : unicode_norm_form_from_string(const char *formstr)
                               5389                 :                : {
                               5390                 :            105 :     UnicodeNormalizationForm form = -1;
                               5391                 :                : 
                               5392                 :                :     /*
                               5393                 :                :      * Might as well check this while we're here.
                               5394                 :                :      */
                               5395         [ -  + ]:            105 :     if (GetDatabaseEncoding() != PG_UTF8)
 2180 peter@eisentraut.org     5396         [ #  # ]:UBC           0 :         ereport(ERROR,
                               5397                 :                :                 (errcode(ERRCODE_SYNTAX_ERROR),
                               5398                 :                :                  errmsg("Unicode normalization can only be performed if server encoding is UTF8")));
                               5399                 :                : 
 2180 peter@eisentraut.org     5400         [ +  + ]:CBC         105 :     if (pg_strcasecmp(formstr, "NFC") == 0)
                               5401                 :             33 :         form = UNICODE_NFC;
                               5402         [ +  + ]:             72 :     else if (pg_strcasecmp(formstr, "NFD") == 0)
                               5403                 :             30 :         form = UNICODE_NFD;
                               5404         [ +  + ]:             42 :     else if (pg_strcasecmp(formstr, "NFKC") == 0)
                               5405                 :             18 :         form = UNICODE_NFKC;
                               5406         [ +  + ]:             24 :     else if (pg_strcasecmp(formstr, "NFKD") == 0)
                               5407                 :             18 :         form = UNICODE_NFKD;
                               5408                 :                :     else
                               5409         [ +  - ]:              6 :         ereport(ERROR,
                               5410                 :                :                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5411                 :                :                  errmsg("invalid normalization form: %s", formstr)));
                               5412                 :                : 
                               5413                 :             99 :     return form;
                               5414                 :                : }
                               5415                 :                : 
                               5416                 :                : /*
                               5417                 :                :  * Returns version of Unicode used by Postgres in "major.minor" format (the
                               5418                 :                :  * same format as the Unicode version reported by ICU). The third component
                               5419                 :                :  * ("update version") never involves additions to the character repertoire and
                               5420                 :                :  * is unimportant for most purposes.
                               5421                 :                :  *
                               5422                 :                :  * See: https://unicode.org/versions/
                               5423                 :                :  */
                               5424                 :                : Datum
  865 jdavis@postgresql.or     5425                 :             17 : unicode_version(PG_FUNCTION_ARGS)
                               5426                 :                : {
                               5427                 :             17 :     PG_RETURN_TEXT_P(cstring_to_text(PG_UNICODE_VERSION));
                               5428                 :                : }
                               5429                 :                : 
                               5430                 :                : /*
                               5431                 :                :  * Returns version of Unicode used by ICU, if enabled; otherwise NULL.
                               5432                 :                :  */
                               5433                 :                : Datum
                               5434                 :              1 : icu_unicode_version(PG_FUNCTION_ARGS)
                               5435                 :                : {
   68 jdavis@postgresql.or     5436                 :GNC           1 :     const char *version = pg_icu_unicode_version();
                               5437                 :                : 
                               5438         [ +  - ]:              1 :     if (version)
                               5439                 :              1 :         PG_RETURN_TEXT_P(cstring_to_text(version));
                               5440                 :                :     else
   68 jdavis@postgresql.or     5441                 :UNC           0 :         PG_RETURN_NULL();
                               5442                 :                : }
                               5443                 :                : 
                               5444                 :                : /*
                               5445                 :                :  * Check whether the string contains only assigned Unicode code
                               5446                 :                :  * points. Requires that the database encoding is UTF-8.
                               5447                 :                :  */
                               5448                 :                : Datum
  865 jdavis@postgresql.or     5449                 :CBC           6 : unicode_assigned(PG_FUNCTION_ARGS)
                               5450                 :                : {
                               5451                 :              6 :     text       *input = PG_GETARG_TEXT_PP(0);
                               5452                 :                :     unsigned char *p;
                               5453                 :                :     int         size;
                               5454                 :                : 
                               5455         [ -  + ]:              6 :     if (GetDatabaseEncoding() != PG_UTF8)
  865 jdavis@postgresql.or     5456         [ #  # ]:UBC           0 :         ereport(ERROR,
                               5457                 :                :                 (errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
                               5458                 :                : 
                               5459                 :                :     /* convert to char32_t */
  865 jdavis@postgresql.or     5460   [ -  +  -  -  :CBC           6 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
                                     -  -  -  -  -  
                                           +  -  + ]
                               5461         [ -  + ]:              6 :     p = (unsigned char *) VARDATA_ANY(input);
                               5462         [ +  + ]:             24 :     for (int i = 0; i < size; i++)
                               5463                 :                :     {
  137 jdavis@postgresql.or     5464                 :GNC          21 :         char32_t    uchar = utf8_to_unicode(p);
  865 jdavis@postgresql.or     5465                 :CBC          21 :         int         category = unicode_category(uchar);
                               5466                 :                : 
                               5467         [ +  + ]:             21 :         if (category == PG_U_UNASSIGNED)
                               5468                 :              3 :             PG_RETURN_BOOL(false);
                               5469                 :                : 
                               5470                 :             18 :         p += pg_utf_mblen(p);
                               5471                 :                :     }
                               5472                 :                : 
                               5473                 :              3 :     PG_RETURN_BOOL(true);
                               5474                 :                : }
                               5475                 :                : 
                               5476                 :                : Datum
 2180 peter@eisentraut.org     5477                 :             36 : unicode_normalize_func(PG_FUNCTION_ARGS)
                               5478                 :                : {
                               5479                 :             36 :     text       *input = PG_GETARG_TEXT_PP(0);
                               5480                 :             36 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
                               5481                 :                :     UnicodeNormalizationForm form;
                               5482                 :                :     int         size;
                               5483                 :                :     char32_t   *input_chars;
                               5484                 :                :     char32_t   *output_chars;
                               5485                 :                :     unsigned char *p;
                               5486                 :                :     text       *result;
                               5487                 :                :     int         i;
                               5488                 :                : 
                               5489                 :             36 :     form = unicode_norm_form_from_string(formstr);
                               5490                 :                : 
                               5491                 :                :     /* convert to char32_t */
                               5492   [ -  +  -  -  :             33 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
                                     -  -  -  -  +  
                                           +  +  + ]
  137 jdavis@postgresql.or     5493                 :GNC          33 :     input_chars = palloc((size + 1) * sizeof(char32_t));
 2180 peter@eisentraut.org     5494         [ +  + ]:CBC          33 :     p = (unsigned char *) VARDATA_ANY(input);
                               5495         [ +  + ]:            144 :     for (i = 0; i < size; i++)
                               5496                 :                :     {
                               5497                 :            111 :         input_chars[i] = utf8_to_unicode(p);
                               5498                 :            111 :         p += pg_utf_mblen(p);
                               5499                 :                :     }
  137 jdavis@postgresql.or     5500                 :GNC          33 :     input_chars[i] = (char32_t) '\0';
 2180 peter@eisentraut.org     5501   [ -  +  -  +  :CBC          33 :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
                                     -  -  -  -  -  
                                        -  +  +  -  
                                                 + ]
                               5502                 :                : 
                               5503                 :                :     /* action */
                               5504                 :             33 :     output_chars = unicode_normalize(form, input_chars);
                               5505                 :                : 
                               5506                 :                :     /* convert back to UTF-8 string */
                               5507                 :             33 :     size = 0;
  137 jdavis@postgresql.or     5508         [ +  + ]:GNC         153 :     for (char32_t *wp = output_chars; *wp; wp++)
                               5509                 :                :     {
                               5510                 :                :         unsigned char buf[4];
                               5511                 :                : 
 2180 peter@eisentraut.org     5512                 :CBC         120 :         unicode_to_utf8(*wp, buf);
                               5513                 :            120 :         size += pg_utf_mblen(buf);
                               5514                 :                :     }
                               5515                 :                : 
                               5516                 :             33 :     result = palloc(size + VARHDRSZ);
                               5517                 :             33 :     SET_VARSIZE(result, size + VARHDRSZ);
                               5518                 :                : 
                               5519         [ -  + ]:             33 :     p = (unsigned char *) VARDATA_ANY(result);
  137 jdavis@postgresql.or     5520         [ +  + ]:GNC         153 :     for (char32_t *wp = output_chars; *wp; wp++)
                               5521                 :                :     {
 2180 peter@eisentraut.org     5522                 :CBC         120 :         unicode_to_utf8(*wp, p);
                               5523                 :            120 :         p += pg_utf_mblen(p);
                               5524                 :                :     }
                               5525         [ -  + ]:             33 :     Assert((char *) p == (char *) result + size + VARHDRSZ);
                               5526                 :                : 
                               5527                 :             33 :     PG_RETURN_TEXT_P(result);
                               5528                 :                : }
                               5529                 :                : 
                               5530                 :                : /*
                               5531                 :                :  * Check whether the string is in the specified Unicode normalization form.
                               5532                 :                :  *
                               5533                 :                :  * This is done by converting the string to the specified normal form and then
                               5534                 :                :  * comparing that to the original string.  To speed that up, we also apply the
                               5535                 :                :  * "quick check" algorithm specified in UAX #15, which can give a yes or no
                               5536                 :                :  * answer for many strings by just scanning the string once.
                               5537                 :                :  *
                               5538                 :                :  * This function should generally be optimized for the case where the string
                               5539                 :                :  * is in fact normalized.  In that case, we'll end up looking at the entire
                               5540                 :                :  * string, so it's probably not worth doing any incremental conversion etc.
                               5541                 :                :  */
                               5542                 :                : Datum
                               5543                 :             69 : unicode_is_normalized(PG_FUNCTION_ARGS)
                               5544                 :                : {
                               5545                 :             69 :     text       *input = PG_GETARG_TEXT_PP(0);
                               5546                 :             69 :     char       *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
                               5547                 :                :     UnicodeNormalizationForm form;
                               5548                 :                :     int         size;
                               5549                 :                :     char32_t   *input_chars;
                               5550                 :                :     char32_t   *output_chars;
                               5551                 :                :     unsigned char *p;
                               5552                 :                :     int         i;
                               5553                 :                :     UnicodeNormalizationQC quickcheck;
                               5554                 :                :     int         output_size;
                               5555                 :                :     bool        result;
                               5556                 :                : 
                               5557                 :             69 :     form = unicode_norm_form_from_string(formstr);
                               5558                 :                : 
                               5559                 :                :     /* convert to char32_t */
                               5560   [ -  +  -  -  :             66 :     size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
                                     -  -  -  -  -  
                                           +  -  + ]
  137 jdavis@postgresql.or     5561                 :GNC          66 :     input_chars = palloc((size + 1) * sizeof(char32_t));
 2180 peter@eisentraut.org     5562         [ -  + ]:CBC          66 :     p = (unsigned char *) VARDATA_ANY(input);
                               5563         [ +  + ]:            252 :     for (i = 0; i < size; i++)
                               5564                 :                :     {
                               5565                 :            186 :         input_chars[i] = utf8_to_unicode(p);
                               5566                 :            186 :         p += pg_utf_mblen(p);
                               5567                 :                :     }
  137 jdavis@postgresql.or     5568                 :GNC          66 :     input_chars[i] = (char32_t) '\0';
 2180 peter@eisentraut.org     5569   [ -  +  -  +  :CBC          66 :     Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
                                     -  -  -  -  -  
                                        -  -  +  -  
                                                 + ]
                               5570                 :                : 
                               5571                 :                :     /* quick check (see UAX #15) */
                               5572                 :             66 :     quickcheck = unicode_is_normalized_quickcheck(form, input_chars);
                               5573         [ +  + ]:             66 :     if (quickcheck == UNICODE_NORM_QC_YES)
                               5574                 :             21 :         PG_RETURN_BOOL(true);
                               5575         [ +  + ]:             45 :     else if (quickcheck == UNICODE_NORM_QC_NO)
                               5576                 :              6 :         PG_RETURN_BOOL(false);
                               5577                 :                : 
                               5578                 :                :     /* normalize and compare with original */
                               5579                 :             39 :     output_chars = unicode_normalize(form, input_chars);
                               5580                 :                : 
                               5581                 :             39 :     output_size = 0;
  137 jdavis@postgresql.or     5582         [ +  + ]:GNC         162 :     for (char32_t *wp = output_chars; *wp; wp++)
 2180 peter@eisentraut.org     5583                 :CBC         123 :         output_size++;
                               5584                 :                : 
                               5585         [ +  + ]:             57 :     result = (size == output_size) &&
  137 jdavis@postgresql.or     5586         [ +  + ]:GNC          18 :         (memcmp(input_chars, output_chars, size * sizeof(char32_t)) == 0);
                               5587                 :                : 
 2180 peter@eisentraut.org     5588                 :CBC          39 :     PG_RETURN_BOOL(result);
                               5589                 :                : }
                               5590                 :                : 
                               5591                 :                : /*
                               5592                 :                :  * Check if first n chars are hexadecimal digits
                               5593                 :                :  */
                               5594                 :                : static bool
 1813                          5595                 :             78 : isxdigits_n(const char *instr, size_t n)
                               5596                 :                : {
                               5597         [ +  + ]:            330 :     for (size_t i = 0; i < n; i++)
                               5598         [ +  + ]:            285 :         if (!isxdigit((unsigned char) instr[i]))
                               5599                 :             33 :             return false;
                               5600                 :                : 
                               5601                 :             45 :     return true;
                               5602                 :                : }
                               5603                 :                : 
                               5604                 :                : static unsigned int
                               5605                 :            252 : hexval(unsigned char c)
                               5606                 :                : {
                               5607   [ +  -  +  + ]:            252 :     if (c >= '0' && c <= '9')
                               5608                 :            192 :         return c - '0';
                               5609   [ +  +  +  - ]:             60 :     if (c >= 'a' && c <= 'f')
                               5610                 :             30 :         return c - 'a' + 0xA;
                               5611   [ +  -  +  - ]:             30 :     if (c >= 'A' && c <= 'F')
                               5612                 :             30 :         return c - 'A' + 0xA;
 1813 peter@eisentraut.org     5613         [ #  # ]:UBC           0 :     elog(ERROR, "invalid hexadecimal digit");
                               5614                 :                :     return 0;                   /* not reached */
                               5615                 :                : }
                               5616                 :                : 
                               5617                 :                : /*
                               5618                 :                :  * Translate string with hexadecimal digits to number
                               5619                 :                :  */
                               5620                 :                : static unsigned int
 1813 peter@eisentraut.org     5621                 :CBC          45 : hexval_n(const char *instr, size_t n)
                               5622                 :                : {
                               5623                 :             45 :     unsigned int result = 0;
                               5624                 :                : 
                               5625         [ +  + ]:            297 :     for (size_t i = 0; i < n; i++)
                               5626                 :            252 :         result += hexval(instr[i]) << (4 * (n - i - 1));
                               5627                 :                : 
                               5628                 :             45 :     return result;
                               5629                 :                : }
                               5630                 :                : 
                               5631                 :                : /*
                               5632                 :                :  * Replaces Unicode escape sequences by Unicode characters
                               5633                 :                :  */
                               5634                 :                : Datum
                               5635                 :             33 : unistr(PG_FUNCTION_ARGS)
                               5636                 :                : {
                               5637                 :             33 :     text       *input_text = PG_GETARG_TEXT_PP(0);
                               5638                 :                :     char       *instr;
                               5639                 :                :     int         len;
                               5640                 :                :     StringInfoData str;
                               5641                 :                :     text       *result;
  137 jdavis@postgresql.or     5642                 :GNC          33 :     char16_t    pair_first = 0;
                               5643                 :                :     char        cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
                               5644                 :                : 
 1813 peter@eisentraut.org     5645         [ -  + ]:CBC          33 :     instr = VARDATA_ANY(input_text);
                               5646   [ -  +  -  -  :             33 :     len = VARSIZE_ANY_EXHDR(input_text);
                                     -  -  -  -  -  
                                                 + ]
                               5647                 :                : 
                               5648                 :             33 :     initStringInfo(&str);
                               5649                 :                : 
                               5650         [ +  + ]:            255 :     while (len > 0)
                               5651                 :                :     {
                               5652         [ +  + ]:            243 :         if (instr[0] == '\\')
                               5653                 :                :         {
                               5654         [ +  - ]:             51 :             if (len >= 2 &&
                               5655         [ +  + ]:             51 :                 instr[1] == '\\')
                               5656                 :                :             {
                               5657         [ -  + ]:              3 :                 if (pair_first)
 1813 peter@eisentraut.org     5658                 :UBC           0 :                     goto invalid_pair;
 1813 peter@eisentraut.org     5659                 :CBC           3 :                 appendStringInfoChar(&str, '\\');
                               5660                 :              3 :                 instr += 2;
                               5661                 :              3 :                 len -= 2;
                               5662                 :                :             }
                               5663   [ +  +  +  +  :             48 :             else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
                                              +  + ]
                               5664   [ +  +  +  - ]:             33 :                      (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
                               5665                 :             15 :             {
                               5666                 :                :                 char32_t    unicode;
                               5667         [ +  + ]:             21 :                 int         offset = instr[1] == 'u' ? 2 : 1;
                               5668                 :                : 
                               5669                 :             21 :                 unicode = hexval_n(instr + offset, 4);
                               5670                 :                : 
                               5671         [ -  + ]:             21 :                 if (!is_valid_unicode_codepoint(unicode))
 1813 peter@eisentraut.org     5672         [ #  # ]:UBC           0 :                     ereport(ERROR,
                               5673                 :                :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5674                 :                :                             errmsg("invalid Unicode code point: %04X", unicode));
                               5675                 :                : 
 1813 peter@eisentraut.org     5676         [ +  + ]:CBC          21 :                 if (pair_first)
                               5677                 :                :                 {
                               5678         [ -  + ]:              6 :                     if (is_utf16_surrogate_second(unicode))
                               5679                 :                :                     {
 1813 peter@eisentraut.org     5680                 :UBC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                               5681                 :              0 :                         pair_first = 0;
                               5682                 :                :                     }
                               5683                 :                :                     else
 1813 peter@eisentraut.org     5684                 :CBC           6 :                         goto invalid_pair;
                               5685                 :                :                 }
                               5686         [ -  + ]:             15 :                 else if (is_utf16_surrogate_second(unicode))
 1813 peter@eisentraut.org     5687                 :UBC           0 :                     goto invalid_pair;
                               5688                 :                : 
 1813 peter@eisentraut.org     5689         [ +  + ]:CBC          15 :                 if (is_utf16_surrogate_first(unicode))
                               5690                 :              9 :                     pair_first = unicode;
                               5691                 :                :                 else
                               5692                 :                :                 {
                               5693                 :              6 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
                               5694                 :              6 :                     appendStringInfoString(&str, cbuf);
                               5695                 :                :                 }
                               5696                 :                : 
                               5697                 :             15 :                 instr += 4 + offset;
                               5698                 :             15 :                 len -= 4 + offset;
                               5699                 :                :             }
                               5700   [ +  +  +  +  :             27 :             else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
                                              +  - ]
                               5701                 :              6 :             {
                               5702                 :                :                 char32_t    unicode;
                               5703                 :                : 
                               5704                 :             12 :                 unicode = hexval_n(instr + 2, 6);
                               5705                 :                : 
                               5706         [ +  + ]:             12 :                 if (!is_valid_unicode_codepoint(unicode))
                               5707         [ +  - ]:              3 :                     ereport(ERROR,
                               5708                 :                :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5709                 :                :                             errmsg("invalid Unicode code point: %04X", unicode));
                               5710                 :                : 
                               5711         [ +  + ]:              9 :                 if (pair_first)
                               5712                 :                :                 {
                               5713         [ -  + ]:              3 :                     if (is_utf16_surrogate_second(unicode))
                               5714                 :                :                     {
 1813 peter@eisentraut.org     5715                 :UBC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                               5716                 :              0 :                         pair_first = 0;
                               5717                 :                :                     }
                               5718                 :                :                     else
 1813 peter@eisentraut.org     5719                 :CBC           3 :                         goto invalid_pair;
                               5720                 :                :                 }
                               5721         [ -  + ]:              6 :                 else if (is_utf16_surrogate_second(unicode))
 1813 peter@eisentraut.org     5722                 :UBC           0 :                     goto invalid_pair;
                               5723                 :                : 
 1813 peter@eisentraut.org     5724         [ +  + ]:CBC           6 :                 if (is_utf16_surrogate_first(unicode))
                               5725                 :              3 :                     pair_first = unicode;
                               5726                 :                :                 else
                               5727                 :                :                 {
                               5728                 :              3 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
                               5729                 :              3 :                     appendStringInfoString(&str, cbuf);
                               5730                 :                :                 }
                               5731                 :                : 
                               5732                 :              6 :                 instr += 8;
                               5733                 :              6 :                 len -= 8;
                               5734                 :                :             }
                               5735   [ +  +  +  -  :             15 :             else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
                                              +  - ]
                               5736                 :              6 :             {
                               5737                 :                :                 char32_t    unicode;
                               5738                 :                : 
                               5739                 :             12 :                 unicode = hexval_n(instr + 2, 8);
                               5740                 :                : 
                               5741         [ +  + ]:             12 :                 if (!is_valid_unicode_codepoint(unicode))
                               5742         [ +  - ]:              3 :                     ereport(ERROR,
                               5743                 :                :                             errcode(ERRCODE_INVALID_PARAMETER_VALUE),
                               5744                 :                :                             errmsg("invalid Unicode code point: %04X", unicode));
                               5745                 :                : 
                               5746         [ +  + ]:              9 :                 if (pair_first)
                               5747                 :                :                 {
                               5748         [ -  + ]:              3 :                     if (is_utf16_surrogate_second(unicode))
                               5749                 :                :                     {
 1813 peter@eisentraut.org     5750                 :UBC           0 :                         unicode = surrogate_pair_to_codepoint(pair_first, unicode);
                               5751                 :              0 :                         pair_first = 0;
                               5752                 :                :                     }
                               5753                 :                :                     else
 1813 peter@eisentraut.org     5754                 :CBC           3 :                         goto invalid_pair;
                               5755                 :                :                 }
                               5756         [ -  + ]:              6 :                 else if (is_utf16_surrogate_second(unicode))
 1813 peter@eisentraut.org     5757                 :UBC           0 :                     goto invalid_pair;
                               5758                 :                : 
 1813 peter@eisentraut.org     5759         [ +  + ]:CBC           6 :                 if (is_utf16_surrogate_first(unicode))
                               5760                 :              3 :                     pair_first = unicode;
                               5761                 :                :                 else
                               5762                 :                :                 {
                               5763                 :              3 :                     pg_unicode_to_server(unicode, (unsigned char *) cbuf);
                               5764                 :              3 :                     appendStringInfoString(&str, cbuf);
                               5765                 :                :                 }
                               5766                 :                : 
                               5767                 :              6 :                 instr += 10;
                               5768                 :              6 :                 len -= 10;
                               5769                 :                :             }
                               5770                 :                :             else
                               5771         [ +  - ]:              3 :                 ereport(ERROR,
                               5772                 :                :                         (errcode(ERRCODE_SYNTAX_ERROR),
                               5773                 :                :                          errmsg("invalid Unicode escape"),
                               5774                 :                :                          errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
                               5775                 :                :         }
                               5776                 :                :         else
                               5777                 :                :         {
                               5778         [ -  + ]:            192 :             if (pair_first)
 1813 peter@eisentraut.org     5779                 :UBC           0 :                 goto invalid_pair;
                               5780                 :                : 
 1813 peter@eisentraut.org     5781                 :CBC         192 :             appendStringInfoChar(&str, *instr++);
                               5782                 :            192 :             len--;
                               5783                 :                :         }
                               5784                 :                :     }
                               5785                 :                : 
                               5786                 :                :     /* unfinished surrogate pair? */
                               5787         [ +  + ]:             12 :     if (pair_first)
                               5788                 :              3 :         goto invalid_pair;
                               5789                 :                : 
                               5790                 :              9 :     result = cstring_to_text_with_len(str.data, str.len);
                               5791                 :              9 :     pfree(str.data);
                               5792                 :                : 
                               5793                 :              9 :     PG_RETURN_TEXT_P(result);
                               5794                 :                : 
                               5795                 :             15 : invalid_pair:
                               5796         [ +  - ]:             15 :     ereport(ERROR,
                               5797                 :                :             (errcode(ERRCODE_SYNTAX_ERROR),
                               5798                 :                :              errmsg("invalid Unicode surrogate pair")));
                               5799                 :                :     PG_RETURN_NULL();           /* keep compiler quiet */
                               5800                 :                : }

Generated by: LCOV version 2.4-beta